blob: ce336cd24d1eab8fe163223dbbcb046124263fea [file] [log] [blame]
Jim Grosbach30af4422012-10-12 22:59:21 +00001; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
2; Make sure that ARM backend with NEON handles vselect.
3
4define void @vmax_v4i32(<4 x i32>* %m, <4 x i32> %a, <4 x i32> %b) {
5; CHECK: vcgt.s32 [[QR:q[0-9]+]], [[Q1:q[0-9]+]], [[Q2:q[0-9]+]]
6; CHECK: vbsl [[QR]], [[Q1]], [[Q2]]
7 %cmpres = icmp sgt <4 x i32> %a, %b
8 %maxres = select <4 x i1> %cmpres, <4 x i32> %a, <4 x i32> %b
9 store <4 x i32> %maxres, <4 x i32>* %m
10 ret void
11}
12
Arnold Schwaighofer8070b382013-03-14 19:17:02 +000013; We adjusted the cost model of the following selects. When we improve code
14; lowering we also need to adjust the cost.
15; RUN: opt < %s -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -march=arm -mcpu=cortex-a8 | FileCheck %s --check-prefix=COST
16%T0_3 = type <4 x i8>
17%T1_3 = type <4 x i1>
18; CHECK: func_blend3:
19define void @func_blend3(%T0_3* %loadaddr, %T0_3* %loadaddr2,
20 %T1_3* %blend, %T0_3* %storeaddr) {
21; CHECK: strh
22; CHECK: strh
23; CHECK: strh
24; CHECK: strh
25; CHECK: vldr
26 %v0 = load %T0_3* %loadaddr
27 %v1 = load %T0_3* %loadaddr2
28 %c = load %T1_3* %blend
29; COST: func_blend3
30; COST: cost of 10 {{.*}} select
31 %r = select %T1_3 %c, %T0_3 %v0, %T0_3 %v1
32 store %T0_3 %r, %T0_3* %storeaddr
33 ret void
34}
35%T0_4 = type <8 x i8>
36%T1_4 = type <8 x i1>
37; CHECK: func_blend4:
38define void @func_blend4(%T0_4* %loadaddr, %T0_4* %loadaddr2,
39 %T1_4* %blend, %T0_4* %storeaddr) {
40 %v0 = load %T0_4* %loadaddr
41 %v1 = load %T0_4* %loadaddr2
42 %c = load %T1_4* %blend
43; check: strb
44; check: strb
45; check: strb
46; check: strb
47; check: vldr
48; COST: func_blend4
49; COST: cost of 17 {{.*}} select
50 %r = select %T1_4 %c, %T0_4 %v0, %T0_4 %v1
51 store %T0_4 %r, %T0_4* %storeaddr
52 ret void
53}
54%T0_5 = type <16 x i8>
55%T1_5 = type <16 x i1>
56; CHECK: func_blend5:
57define void @func_blend5(%T0_5* %loadaddr, %T0_5* %loadaddr2,
58 %T1_5* %blend, %T0_5* %storeaddr) {
59 %v0 = load %T0_5* %loadaddr
60 %v1 = load %T0_5* %loadaddr2
61 %c = load %T1_5* %blend
62; CHECK: strb
63; CHECK: strb
64; CHECK: strb
65; CHECK: strb
66; CHECK: vld
67; COST: func_blend5
68; COST: cost of 33 {{.*}} select
69 %r = select %T1_5 %c, %T0_5 %v0, %T0_5 %v1
70 store %T0_5 %r, %T0_5* %storeaddr
71 ret void
72}
73%T0_8 = type <4 x i16>
74%T1_8 = type <4 x i1>
75; CHECK: func_blend8:
76define void @func_blend8(%T0_8* %loadaddr, %T0_8* %loadaddr2,
77 %T1_8* %blend, %T0_8* %storeaddr) {
78 %v0 = load %T0_8* %loadaddr
79 %v1 = load %T0_8* %loadaddr2
80 %c = load %T1_8* %blend
81; CHECK: strh
82; CHECK: strh
83; CHECK: strh
84; CHECK: strh
85; CHECK: vld
86; COST: func_blend8
87; COST: cost of 9 {{.*}} select
88 %r = select %T1_8 %c, %T0_8 %v0, %T0_8 %v1
89 store %T0_8 %r, %T0_8* %storeaddr
90 ret void
91}
92%T0_9 = type <8 x i16>
93%T1_9 = type <8 x i1>
94; CHECK: func_blend9:
95define void @func_blend9(%T0_9* %loadaddr, %T0_9* %loadaddr2,
96 %T1_9* %blend, %T0_9* %storeaddr) {
97 %v0 = load %T0_9* %loadaddr
98 %v1 = load %T0_9* %loadaddr2
99 %c = load %T1_9* %blend
100; CHECK: strh
101; CHECK: strh
102; CHECK: strh
103; CHECK: strh
104; CHECK: vld
105; COST: func_blend9
106; COST: cost of 17 {{.*}} select
107 %r = select %T1_9 %c, %T0_9 %v0, %T0_9 %v1
108 store %T0_9 %r, %T0_9* %storeaddr
109 ret void
110}
111%T0_10 = type <16 x i16>
112%T1_10 = type <16 x i1>
113; CHECK: func_blend10:
114define void @func_blend10(%T0_10* %loadaddr, %T0_10* %loadaddr2,
115 %T1_10* %blend, %T0_10* %storeaddr) {
116 %v0 = load %T0_10* %loadaddr
117 %v1 = load %T0_10* %loadaddr2
118 %c = load %T1_10* %blend
119; CHECK: strb
120; CHECK: strb
121; CHECK: strb
122; CHECK: strb
123; CHECK: vld
124; COST: func_blend10
125; COST: cost of 40 {{.*}} select
126 %r = select %T1_10 %c, %T0_10 %v0, %T0_10 %v1
127 store %T0_10 %r, %T0_10* %storeaddr
128 ret void
129}
130%T0_14 = type <8 x i32>
131%T1_14 = type <8 x i1>
132; CHECK: func_blend14:
133define void @func_blend14(%T0_14* %loadaddr, %T0_14* %loadaddr2,
134 %T1_14* %blend, %T0_14* %storeaddr) {
135 %v0 = load %T0_14* %loadaddr
136 %v1 = load %T0_14* %loadaddr2
137 %c = load %T1_14* %blend
138; CHECK: strb
139; CHECK: strb
140; CHECK: strb
141; CHECK: strb
142; COST: func_blend14
143; COST: cost of 41 {{.*}} select
144 %r = select %T1_14 %c, %T0_14 %v0, %T0_14 %v1
145 store %T0_14 %r, %T0_14* %storeaddr
146 ret void
147}
148%T0_15 = type <16 x i32>
149%T1_15 = type <16 x i1>
150; CHECK: func_blend15:
151define void @func_blend15(%T0_15* %loadaddr, %T0_15* %loadaddr2,
152 %T1_15* %blend, %T0_15* %storeaddr) {
153 %v0 = load %T0_15* %loadaddr
154 %v1 = load %T0_15* %loadaddr2
155 %c = load %T1_15* %blend
156; CHECK: strb
157; CHECK: strb
158; CHECK: strb
159; CHECK: strb
160; COST: func_blend15
161; COST: cost of 82 {{.*}} select
162 %r = select %T1_15 %c, %T0_15 %v0, %T0_15 %v1
163 store %T0_15 %r, %T0_15* %storeaddr
164 ret void
165}
166%T0_18 = type <4 x i64>
167%T1_18 = type <4 x i1>
168; CHECK: func_blend18:
169define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2,
170 %T1_18* %blend, %T0_18* %storeaddr) {
171 %v0 = load %T0_18* %loadaddr
172 %v1 = load %T0_18* %loadaddr2
173 %c = load %T1_18* %blend
174; CHECK: strh
175; CHECK: strh
176; CHECK: strh
177; CHECK: strh
178; COST: func_blend18
179; COST: cost of 19 {{.*}} select
180 %r = select %T1_18 %c, %T0_18 %v0, %T0_18 %v1
181 store %T0_18 %r, %T0_18* %storeaddr
182 ret void
183}
184%T0_19 = type <8 x i64>
185%T1_19 = type <8 x i1>
186; CHECK: func_blend19:
187define void @func_blend19(%T0_19* %loadaddr, %T0_19* %loadaddr2,
188 %T1_19* %blend, %T0_19* %storeaddr) {
189 %v0 = load %T0_19* %loadaddr
190 %v1 = load %T0_19* %loadaddr2
191 %c = load %T1_19* %blend
192; CHECK: strb
193; CHECK: strb
194; CHECK: strb
195; CHECK: strb
196; COST: func_blend19
197; COST: cost of 50 {{.*}} select
198 %r = select %T1_19 %c, %T0_19 %v0, %T0_19 %v1
199 store %T0_19 %r, %T0_19* %storeaddr
200 ret void
201}
202%T0_20 = type <16 x i64>
203%T1_20 = type <16 x i1>
204; CHECK: func_blend20:
205define void @func_blend20(%T0_20* %loadaddr, %T0_20* %loadaddr2,
206 %T1_20* %blend, %T0_20* %storeaddr) {
207 %v0 = load %T0_20* %loadaddr
208 %v1 = load %T0_20* %loadaddr2
209 %c = load %T1_20* %blend
210; CHECK: strb
211; CHECK: strb
212; CHECK: strb
213; CHECK: strb
214; COST: func_blend20
215; COST: cost of 100 {{.*}} select
216 %r = select %T1_20 %c, %T0_20 %v0, %T0_20 %v1
217 store %T0_20 %r, %T0_20* %storeaddr
218 ret void
219}