blob: c516fc5532f127b412471442a258c632dfa97200 [file] [log] [blame]
Simon Pilgrime612ab02018-10-20 14:29:59 +00001; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2; RUN: opt < %s -cost-model -costmodel-reduxcost=true -mtriple=x86_64-apple-darwin -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
3; RUN: opt < %s -cost-model -costmodel-reduxcost=true -mtriple=x86_64-apple-darwin -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSE,SSSE3
4; RUN: opt < %s -cost-model -costmodel-reduxcost=true -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
5; RUN: opt < %s -cost-model -costmodel-reduxcost=true -mtriple=x86_64-apple-darwin -analyze -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
6; RUN: opt < %s -cost-model -costmodel-reduxcost=true -mtriple=x86_64-apple-darwin -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
7; RUN: opt < %s -cost-model -costmodel-reduxcost=true -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
8; RUN: opt < %s -cost-model -costmodel-reduxcost=true -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
9; RUN: opt < %s -cost-model -costmodel-reduxcost=true -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512DQ
10
11define i32 @reduce_i64(i32 %arg) {
12; SSE2-LABEL: 'reduce_i64'
13; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef)
Simon Pilgrim102854f2018-12-01 14:18:31 +000014; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef)
15; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
16; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
17; SSE2-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +000018; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
19;
20; SSSE3-LABEL: 'reduce_i64'
21; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef)
Simon Pilgrim102854f2018-12-01 14:18:31 +000022; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef)
23; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
24; SSSE3-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
25; SSSE3-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +000026; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
27;
28; SSE42-LABEL: 'reduce_i64'
29; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef)
30; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef)
31; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
32; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
33; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
34; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
35;
36; AVX1-LABEL: 'reduce_i64'
37; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef)
38; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef)
Simon Pilgrim102854f2018-12-01 14:18:31 +000039; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
40; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
41; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +000042; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
43;
44; AVX2-LABEL: 'reduce_i64'
45; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef)
46; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef)
47; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
48; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
49; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
50; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
51;
52; AVX512-LABEL: 'reduce_i64'
53; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef)
54; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef)
55; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
56; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
57; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
58; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
59;
60 %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef)
61 %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef)
62 %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
63 %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
64 %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
65 ret i32 undef
66}
67
68define i32 @reduce_i32(i32 %arg) {
69; SSE2-LABEL: 'reduce_i32'
Simon Pilgrim102854f2018-12-01 14:18:31 +000070; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef)
71; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef)
72; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef)
73; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef)
74; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +000075; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
76;
77; SSSE3-LABEL: 'reduce_i32'
Simon Pilgrim102854f2018-12-01 14:18:31 +000078; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef)
79; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef)
80; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef)
81; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef)
82; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +000083; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
84;
85; SSE42-LABEL: 'reduce_i32'
86; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef)
87; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef)
88; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef)
89; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef)
90; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef)
91; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
92;
93; AVX1-LABEL: 'reduce_i32'
94; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef)
95; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef)
96; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef)
97; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef)
98; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef)
99; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
100;
101; AVX2-LABEL: 'reduce_i32'
102; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef)
103; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef)
104; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef)
105; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef)
106; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef)
107; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
108;
109; AVX512-LABEL: 'reduce_i32'
110; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef)
111; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef)
112; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef)
113; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef)
114; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef)
115; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
116;
117 %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef)
118 %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef)
119 %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef)
120 %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef)
121 %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef)
122 ret i32 undef
123}
124
125define i32 @reduce_i16(i32 %arg) {
126; SSE2-LABEL: 'reduce_i16'
Simon Pilgrim102854f2018-12-01 14:18:31 +0000127; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
128; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
129; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
130; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
131; SSE2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000132; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
133;
134; SSSE3-LABEL: 'reduce_i16'
Simon Pilgrim102854f2018-12-01 14:18:31 +0000135; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
136; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
137; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
138; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
139; SSSE3-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000140; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
141;
142; SSE42-LABEL: 'reduce_i16'
143; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
144; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
145; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
146; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
147; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
148; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
149;
150; AVX1-LABEL: 'reduce_i16'
151; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
152; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
Simon Pilgrim102854f2018-12-01 14:18:31 +0000153; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
154; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
155; AVX1-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000156; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
157;
158; AVX2-LABEL: 'reduce_i16'
159; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
160; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
161; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
162; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
163; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
164; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
165;
166; AVX512F-LABEL: 'reduce_i16'
167; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
168; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
169; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
170; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
171; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
172; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
173;
174; AVX512BW-LABEL: 'reduce_i16'
175; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
176; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
177; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
Simon Pilgrim102854f2018-12-01 14:18:31 +0000178; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
179; AVX512BW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000180; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
181;
182; AVX512DQ-LABEL: 'reduce_i16'
183; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
184; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
185; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
186; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
187; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
188; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
189;
190 %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
191 %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
192 %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
193 %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
194 %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
195 ret i32 undef
196}
197
198define i32 @reduce_i8(i32 %arg) {
199; SSE2-LABEL: 'reduce_i8'
Simon Pilgrim102854f2018-12-01 14:18:31 +0000200; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
201; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
202; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
203; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
204; SSE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000205; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
206;
207; SSSE3-LABEL: 'reduce_i8'
Simon Pilgrim102854f2018-12-01 14:18:31 +0000208; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
209; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
210; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
211; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
212; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000213; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
214;
215; SSE42-LABEL: 'reduce_i8'
216; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
Simon Pilgrim102854f2018-12-01 14:18:31 +0000217; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
218; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
219; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
220; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000221; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
222;
223; AVX1-LABEL: 'reduce_i8'
224; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
Simon Pilgrim102854f2018-12-01 14:18:31 +0000225; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
226; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
227; AVX1-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
228; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000229; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
230;
231; AVX2-LABEL: 'reduce_i8'
232; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
Simon Pilgrim102854f2018-12-01 14:18:31 +0000233; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000234; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
235; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
236; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
237; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
238;
239; AVX512F-LABEL: 'reduce_i8'
240; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
Simon Pilgrim102854f2018-12-01 14:18:31 +0000241; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000242; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
243; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
244; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
245; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
246;
247; AVX512BW-LABEL: 'reduce_i8'
248; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
Simon Pilgrim102854f2018-12-01 14:18:31 +0000249; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000250; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
Simon Pilgrim102854f2018-12-01 14:18:31 +0000251; AVX512BW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
252; AVX512BW-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000253; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
254;
255; AVX512DQ-LABEL: 'reduce_i8'
256; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
Simon Pilgrim102854f2018-12-01 14:18:31 +0000257; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000258; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
259; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
260; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
261; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
262;
263 %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
264 %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
265 %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
266 %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
267 %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
268 ret i32 undef
269}
270
271declare i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64>)
272declare i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64>)
273declare i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64>)
274declare i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64>)
275declare i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64>)
276
277declare i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32>)
278declare i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32>)
279declare i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32>)
280declare i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32>)
281declare i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32>)
282
283declare i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16>)
284declare i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16>)
285declare i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16>)
286declare i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16>)
287declare i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16>)
288
289declare i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8>)
290declare i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8>)
291declare i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8>)
292declare i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8>)
293declare i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8>)