blob: 19e75aba3bb9d4649fe76bf0a21071cc3fa93ace [file] [log] [blame]
Sanjay Patelc71adc82018-07-16 22:59:31 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-- -mattr=sse2 | FileCheck %s --check-prefixes=ANY,X32-SSE2
3; RUN: llc < %s -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=ANY,X64-AVX2
4
5declare i8 @llvm.fshl.i8(i8, i8, i8)
6declare i16 @llvm.fshl.i16(i16, i16, i16)
7declare i32 @llvm.fshl.i32(i32, i32, i32)
8declare i64 @llvm.fshl.i64(i64, i64, i64)
9declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
10
11declare i8 @llvm.fshr.i8(i8, i8, i8)
12declare i16 @llvm.fshr.i16(i16, i16, i16)
13declare i32 @llvm.fshr.i32(i32, i32, i32)
14declare i64 @llvm.fshr.i64(i64, i64, i64)
15declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
16
17; When first 2 operands match, it's a rotate.
18
19define i8 @rotl_i8_const_shift(i8 %x) nounwind {
20; X32-SSE2-LABEL: rotl_i8_const_shift:
21; X32-SSE2: # %bb.0:
22; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %al
23; X32-SSE2-NEXT: rolb $3, %al
24; X32-SSE2-NEXT: retl
25;
26; X64-AVX2-LABEL: rotl_i8_const_shift:
27; X64-AVX2: # %bb.0:
Sanjay Patelc71adc82018-07-16 22:59:31 +000028; X64-AVX2-NEXT: movl %edi, %eax
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +000029; X64-AVX2-NEXT: rolb $3, %al
30; X64-AVX2-NEXT: # kill: def $al killed $al killed $eax
Sanjay Patelc71adc82018-07-16 22:59:31 +000031; X64-AVX2-NEXT: retq
32 %f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3)
33 ret i8 %f
34}
35
36define i64 @rotl_i64_const_shift(i64 %x) nounwind {
37; X32-SSE2-LABEL: rotl_i64_const_shift:
38; X32-SSE2: # %bb.0:
39; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
40; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
41; X32-SSE2-NEXT: movl %ecx, %eax
42; X32-SSE2-NEXT: shldl $3, %edx, %eax
43; X32-SSE2-NEXT: shldl $3, %ecx, %edx
44; X32-SSE2-NEXT: retl
45;
46; X64-AVX2-LABEL: rotl_i64_const_shift:
47; X64-AVX2: # %bb.0:
Sanjay Patelc71adc82018-07-16 22:59:31 +000048; X64-AVX2-NEXT: movq %rdi, %rax
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +000049; X64-AVX2-NEXT: rolq $3, %rax
Sanjay Patelc71adc82018-07-16 22:59:31 +000050; X64-AVX2-NEXT: retq
51 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 3)
52 ret i64 %f
53}
54
55define i16 @rotl_i16(i16 %x, i16 %z) nounwind {
56; X32-SSE2-LABEL: rotl_i16:
57; X32-SSE2: # %bb.0:
58; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
59; X32-SSE2-NEXT: movzwl {{[0-9]+}}(%esp), %eax
60; X32-SSE2-NEXT: rolw %cl, %ax
61; X32-SSE2-NEXT: retl
62;
63; X64-AVX2-LABEL: rotl_i16:
64; X64-AVX2: # %bb.0:
65; X64-AVX2-NEXT: movl %esi, %ecx
Sanjay Patelc71adc82018-07-16 22:59:31 +000066; X64-AVX2-NEXT: movl %edi, %eax
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +000067; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
68; X64-AVX2-NEXT: rolw %cl, %ax
69; X64-AVX2-NEXT: # kill: def $ax killed $ax killed $eax
Sanjay Patelc71adc82018-07-16 22:59:31 +000070; X64-AVX2-NEXT: retq
71 %f = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 %z)
72 ret i16 %f
73}
74
75define i32 @rotl_i32(i32 %x, i32 %z) nounwind {
76; X32-SSE2-LABEL: rotl_i32:
77; X32-SSE2: # %bb.0:
78; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
79; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
80; X32-SSE2-NEXT: roll %cl, %eax
81; X32-SSE2-NEXT: retl
82;
83; X64-AVX2-LABEL: rotl_i32:
84; X64-AVX2: # %bb.0:
85; X64-AVX2-NEXT: movl %esi, %ecx
Sanjay Patelc71adc82018-07-16 22:59:31 +000086; X64-AVX2-NEXT: movl %edi, %eax
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +000087; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
88; X64-AVX2-NEXT: roll %cl, %eax
Sanjay Patelc71adc82018-07-16 22:59:31 +000089; X64-AVX2-NEXT: retq
90 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z)
91 ret i32 %f
92}
93
94; Vector rotate.
95
96define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) nounwind {
97; X32-SSE2-LABEL: rotl_v4i32:
98; X32-SSE2: # %bb.0:
Sanjay Patel8aac22e2018-08-01 17:17:08 +000099; X32-SSE2-NEXT: pxor %xmm3, %xmm3
Sanjay Patelc71adc82018-07-16 22:59:31 +0000100; X32-SSE2-NEXT: psubd %xmm1, %xmm3
101; X32-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [31,31,31,31]
102; X32-SSE2-NEXT: pand %xmm4, %xmm3
103; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm3[2,3,3,3,4,5,6,7]
104; X32-SSE2-NEXT: movdqa %xmm0, %xmm5
105; X32-SSE2-NEXT: psrld %xmm2, %xmm5
106; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm6 = xmm3[0,1,1,1,4,5,6,7]
107; X32-SSE2-NEXT: movdqa %xmm0, %xmm2
108; X32-SSE2-NEXT: psrld %xmm6, %xmm2
109; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm5[0]
110; X32-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
111; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm5 = xmm3[2,3,3,3,4,5,6,7]
112; X32-SSE2-NEXT: movdqa %xmm0, %xmm6
113; X32-SSE2-NEXT: psrld %xmm5, %xmm6
114; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,1,1,4,5,6,7]
115; X32-SSE2-NEXT: movdqa %xmm0, %xmm5
116; X32-SSE2-NEXT: psrld %xmm3, %xmm5
117; X32-SSE2-NEXT: punpckhqdq {{.*#+}} xmm5 = xmm5[1],xmm6[1]
118; X32-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,3],xmm5[0,3]
119; X32-SSE2-NEXT: pand %xmm4, %xmm1
120; X32-SSE2-NEXT: pslld $23, %xmm1
121; X32-SSE2-NEXT: paddd {{\.LCPI.*}}, %xmm1
122; X32-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
123; X32-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
124; X32-SSE2-NEXT: pmuludq %xmm1, %xmm0
125; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
126; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
127; X32-SSE2-NEXT: pmuludq %xmm3, %xmm1
128; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
129; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
130; X32-SSE2-NEXT: orps %xmm0, %xmm2
131; X32-SSE2-NEXT: movaps %xmm2, %xmm0
132; X32-SSE2-NEXT: retl
133;
134; X64-AVX2-LABEL: rotl_v4i32:
135; X64-AVX2: # %bb.0:
136; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
137; X64-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
138; X64-AVX2-NEXT: vpsllvd %xmm3, %xmm0, %xmm3
Sanjay Patel8aac22e2018-08-01 17:17:08 +0000139; X64-AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
Sanjay Patelc71adc82018-07-16 22:59:31 +0000140; X64-AVX2-NEXT: vpsubd %xmm1, %xmm4, %xmm1
141; X64-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
142; X64-AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
143; X64-AVX2-NEXT: vpor %xmm0, %xmm3, %xmm0
144; X64-AVX2-NEXT: retq
145 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
146 ret <4 x i32> %f
147}
148
149; Vector rotate by constant splat amount.
150
151define <4 x i32> @rotl_v4i32_const_shift(<4 x i32> %x) nounwind {
152; X32-SSE2-LABEL: rotl_v4i32_const_shift:
153; X32-SSE2: # %bb.0:
154; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
155; X32-SSE2-NEXT: psrld $29, %xmm1
156; X32-SSE2-NEXT: pslld $3, %xmm0
157; X32-SSE2-NEXT: por %xmm1, %xmm0
158; X32-SSE2-NEXT: retl
159;
160; X64-AVX2-LABEL: rotl_v4i32_const_shift:
161; X64-AVX2: # %bb.0:
162; X64-AVX2-NEXT: vpsrld $29, %xmm0, %xmm1
163; X64-AVX2-NEXT: vpslld $3, %xmm0, %xmm0
164; X64-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
165; X64-AVX2-NEXT: retq
166 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
167 ret <4 x i32> %f
168}
169
170; Repeat everything for funnel shift right.
171
172define i8 @rotr_i8_const_shift(i8 %x) nounwind {
173; X32-SSE2-LABEL: rotr_i8_const_shift:
174; X32-SSE2: # %bb.0:
175; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %al
Sanjay Patel215dcbf2018-07-25 21:38:30 +0000176; X32-SSE2-NEXT: rorb $3, %al
Sanjay Patelc71adc82018-07-16 22:59:31 +0000177; X32-SSE2-NEXT: retl
178;
179; X64-AVX2-LABEL: rotr_i8_const_shift:
180; X64-AVX2: # %bb.0:
Sanjay Patelc71adc82018-07-16 22:59:31 +0000181; X64-AVX2-NEXT: movl %edi, %eax
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +0000182; X64-AVX2-NEXT: rorb $3, %al
183; X64-AVX2-NEXT: # kill: def $al killed $al killed $eax
Sanjay Patelc71adc82018-07-16 22:59:31 +0000184; X64-AVX2-NEXT: retq
185 %f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3)
186 ret i8 %f
187}
188
189define i32 @rotr_i32_const_shift(i32 %x) nounwind {
190; X32-SSE2-LABEL: rotr_i32_const_shift:
191; X32-SSE2: # %bb.0:
192; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
Sanjay Patel215dcbf2018-07-25 21:38:30 +0000193; X32-SSE2-NEXT: rorl $3, %eax
Sanjay Patelc71adc82018-07-16 22:59:31 +0000194; X32-SSE2-NEXT: retl
195;
196; X64-AVX2-LABEL: rotr_i32_const_shift:
197; X64-AVX2: # %bb.0:
Sanjay Patelc71adc82018-07-16 22:59:31 +0000198; X64-AVX2-NEXT: movl %edi, %eax
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +0000199; X64-AVX2-NEXT: rorl $3, %eax
Sanjay Patelc71adc82018-07-16 22:59:31 +0000200; X64-AVX2-NEXT: retq
201 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 3)
202 ret i32 %f
203}
204
205; When first 2 operands match, it's a rotate (by variable amount).
206
207define i16 @rotr_i16(i16 %x, i16 %z) nounwind {
208; X32-SSE2-LABEL: rotr_i16:
209; X32-SSE2: # %bb.0:
210; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
211; X32-SSE2-NEXT: movzwl {{[0-9]+}}(%esp), %eax
212; X32-SSE2-NEXT: rorw %cl, %ax
213; X32-SSE2-NEXT: retl
214;
215; X64-AVX2-LABEL: rotr_i16:
216; X64-AVX2: # %bb.0:
217; X64-AVX2-NEXT: movl %esi, %ecx
Sanjay Patelc71adc82018-07-16 22:59:31 +0000218; X64-AVX2-NEXT: movl %edi, %eax
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +0000219; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
220; X64-AVX2-NEXT: rorw %cl, %ax
221; X64-AVX2-NEXT: # kill: def $ax killed $ax killed $eax
Sanjay Patelc71adc82018-07-16 22:59:31 +0000222; X64-AVX2-NEXT: retq
223 %f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z)
224 ret i16 %f
225}
226
227define i64 @rotr_i64(i64 %x, i64 %z) nounwind {
228; X32-SSE2-LABEL: rotr_i64:
229; X32-SSE2: # %bb.0:
230; X32-SSE2-NEXT: pushl %ebp
231; X32-SSE2-NEXT: pushl %ebx
232; X32-SSE2-NEXT: pushl %edi
233; X32-SSE2-NEXT: pushl %esi
234; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
235; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
Sanjay Patel8aac22e2018-08-01 17:17:08 +0000236; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx
237; X32-SSE2-NEXT: movl %ebx, %ecx
Sanjay Patelc71adc82018-07-16 22:59:31 +0000238; X32-SSE2-NEXT: andl $63, %ecx
239; X32-SSE2-NEXT: movl %edx, %edi
240; X32-SSE2-NEXT: shrl %cl, %edi
Sanjay Patel8aac22e2018-08-01 17:17:08 +0000241; X32-SSE2-NEXT: movl %esi, %ebp
242; X32-SSE2-NEXT: shrdl %cl, %edx, %ebp
243; X32-SSE2-NEXT: xorl %eax, %eax
Sanjay Patelc71adc82018-07-16 22:59:31 +0000244; X32-SSE2-NEXT: testb $32, %cl
Sanjay Patel8aac22e2018-08-01 17:17:08 +0000245; X32-SSE2-NEXT: cmovnel %edi, %ebp
246; X32-SSE2-NEXT: cmovnel %eax, %edi
247; X32-SSE2-NEXT: negl %ebx
248; X32-SSE2-NEXT: andl $63, %ebx
Sanjay Patelc71adc82018-07-16 22:59:31 +0000249; X32-SSE2-NEXT: movl %esi, %eax
Sanjay Patel8aac22e2018-08-01 17:17:08 +0000250; X32-SSE2-NEXT: movl %ebx, %ecx
Sanjay Patelc71adc82018-07-16 22:59:31 +0000251; X32-SSE2-NEXT: shll %cl, %eax
252; X32-SSE2-NEXT: shldl %cl, %esi, %edx
Sanjay Patel8aac22e2018-08-01 17:17:08 +0000253; X32-SSE2-NEXT: testb $32, %bl
Sanjay Patelc71adc82018-07-16 22:59:31 +0000254; X32-SSE2-NEXT: cmovnel %eax, %edx
Sanjay Patel8aac22e2018-08-01 17:17:08 +0000255; X32-SSE2-NEXT: movl $0, %ecx
256; X32-SSE2-NEXT: cmovnel %ecx, %eax
257; X32-SSE2-NEXT: orl %ebp, %eax
Sanjay Patelc71adc82018-07-16 22:59:31 +0000258; X32-SSE2-NEXT: orl %edi, %edx
259; X32-SSE2-NEXT: popl %esi
260; X32-SSE2-NEXT: popl %edi
261; X32-SSE2-NEXT: popl %ebx
262; X32-SSE2-NEXT: popl %ebp
263; X32-SSE2-NEXT: retl
264;
265; X64-AVX2-LABEL: rotr_i64:
266; X64-AVX2: # %bb.0:
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +0000267; X64-AVX2-NEXT: movq %rsi, %rcx
Sanjay Patelc71adc82018-07-16 22:59:31 +0000268; X64-AVX2-NEXT: movq %rdi, %rax
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +0000269; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $rcx
270; X64-AVX2-NEXT: rorq %cl, %rax
Sanjay Patelc71adc82018-07-16 22:59:31 +0000271; X64-AVX2-NEXT: retq
272 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z)
273 ret i64 %f
274}
275
276; Vector rotate.
277
278define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) nounwind {
279; X32-SSE2-LABEL: rotr_v4i32:
280; X32-SSE2: # %bb.0:
281; X32-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [31,31,31,31]
Sanjay Patel8aac22e2018-08-01 17:17:08 +0000282; X32-SSE2-NEXT: pxor %xmm3, %xmm3
Sanjay Patelc71adc82018-07-16 22:59:31 +0000283; X32-SSE2-NEXT: psubd %xmm1, %xmm3
284; X32-SSE2-NEXT: movdqa %xmm1, %xmm4
285; X32-SSE2-NEXT: pand %xmm2, %xmm4
286; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm4[2,3,3,3,4,5,6,7]
287; X32-SSE2-NEXT: movdqa %xmm0, %xmm5
288; X32-SSE2-NEXT: psrld %xmm1, %xmm5
289; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm6 = xmm4[0,1,1,1,4,5,6,7]
290; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
291; X32-SSE2-NEXT: psrld %xmm6, %xmm1
292; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0]
293; X32-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
294; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm5 = xmm4[2,3,3,3,4,5,6,7]
295; X32-SSE2-NEXT: movdqa %xmm0, %xmm6
296; X32-SSE2-NEXT: psrld %xmm5, %xmm6
297; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,1,1,1,4,5,6,7]
298; X32-SSE2-NEXT: movdqa %xmm0, %xmm5
299; X32-SSE2-NEXT: psrld %xmm4, %xmm5
300; X32-SSE2-NEXT: punpckhqdq {{.*#+}} xmm5 = xmm5[1],xmm6[1]
301; X32-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],xmm5[0,3]
302; X32-SSE2-NEXT: pand %xmm2, %xmm3
303; X32-SSE2-NEXT: pslld $23, %xmm3
304; X32-SSE2-NEXT: paddd {{\.LCPI.*}}, %xmm3
305; X32-SSE2-NEXT: cvttps2dq %xmm3, %xmm2
306; X32-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
307; X32-SSE2-NEXT: pmuludq %xmm2, %xmm0
308; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
309; X32-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
310; X32-SSE2-NEXT: pmuludq %xmm3, %xmm2
311; X32-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
312; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
313; X32-SSE2-NEXT: orps %xmm0, %xmm1
314; X32-SSE2-NEXT: movaps %xmm1, %xmm0
315; X32-SSE2-NEXT: retl
316;
317; X64-AVX2-LABEL: rotr_v4i32:
318; X64-AVX2: # %bb.0:
319; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
320; X64-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
321; X64-AVX2-NEXT: vpsrlvd %xmm3, %xmm0, %xmm3
Sanjay Patel8aac22e2018-08-01 17:17:08 +0000322; X64-AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
Sanjay Patelc71adc82018-07-16 22:59:31 +0000323; X64-AVX2-NEXT: vpsubd %xmm1, %xmm4, %xmm1
324; X64-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
325; X64-AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
326; X64-AVX2-NEXT: vpor %xmm3, %xmm0, %xmm0
327; X64-AVX2-NEXT: retq
328 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
329 ret <4 x i32> %f
330}
331
332; Vector rotate by constant splat amount.
333
334define <4 x i32> @rotr_v4i32_const_shift(<4 x i32> %x) nounwind {
335; X32-SSE2-LABEL: rotr_v4i32_const_shift:
336; X32-SSE2: # %bb.0:
337; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
338; X32-SSE2-NEXT: psrld $3, %xmm1
339; X32-SSE2-NEXT: pslld $29, %xmm0
340; X32-SSE2-NEXT: por %xmm1, %xmm0
341; X32-SSE2-NEXT: retl
342;
343; X64-AVX2-LABEL: rotr_v4i32_const_shift:
344; X64-AVX2: # %bb.0:
345; X64-AVX2-NEXT: vpsrld $3, %xmm0, %xmm1
346; X64-AVX2-NEXT: vpslld $29, %xmm0, %xmm0
347; X64-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
348; X64-AVX2-NEXT: retq
349 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
350 ret <4 x i32> %f
351}
352
353define i32 @rotl_i32_shift_by_bitwidth(i32 %x) nounwind {
354; X32-SSE2-LABEL: rotl_i32_shift_by_bitwidth:
355; X32-SSE2: # %bb.0:
356; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
357; X32-SSE2-NEXT: retl
358;
359; X64-AVX2-LABEL: rotl_i32_shift_by_bitwidth:
360; X64-AVX2: # %bb.0:
361; X64-AVX2-NEXT: movl %edi, %eax
362; X64-AVX2-NEXT: retq
363 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 32)
364 ret i32 %f
365}
366
367define i32 @rotr_i32_shift_by_bitwidth(i32 %x) nounwind {
368; X32-SSE2-LABEL: rotr_i32_shift_by_bitwidth:
369; X32-SSE2: # %bb.0:
370; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
371; X32-SSE2-NEXT: retl
372;
373; X64-AVX2-LABEL: rotr_i32_shift_by_bitwidth:
374; X64-AVX2: # %bb.0:
375; X64-AVX2-NEXT: movl %edi, %eax
376; X64-AVX2-NEXT: retq
377 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 32)
378 ret i32 %f
379}
380
381define <4 x i32> @rotl_v4i32_shift_by_bitwidth(<4 x i32> %x) nounwind {
382; ANY-LABEL: rotl_v4i32_shift_by_bitwidth:
383; ANY: # %bb.0:
384; ANY-NEXT: ret{{[l|q]}}
385 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
386 ret <4 x i32> %f
387}
388
389define <4 x i32> @rotr_v4i32_shift_by_bitwidth(<4 x i32> %x) nounwind {
390; ANY-LABEL: rotr_v4i32_shift_by_bitwidth:
391; ANY: # %bb.0:
392; ANY-NEXT: ret{{[l|q]}}
393 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
394 ret <4 x i32> %f
395}
396
Sanjay Pateld5ae1832018-08-01 17:18:50 +0000397; Non power-of-2 types can't use the negated shift amount to avoid a select.
Sanjay Patel6d302c92018-08-01 16:59:54 +0000398
399declare i7 @llvm.fshl.i7(i7, i7, i7)
400declare i7 @llvm.fshr.i7(i7, i7, i7)
401
402; extract(concat(0b1110000, 0b1110000) << 9) = 0b1000011
403; Try an oversized shift to test modulo functionality.
404
405define i7 @fshl_i7() {
406; ANY-LABEL: fshl_i7:
407; ANY: # %bb.0:
Sanjay Patel8aac22e2018-08-01 17:17:08 +0000408; ANY-NEXT: movb $67, %al
Sanjay Patel6d302c92018-08-01 16:59:54 +0000409; ANY-NEXT: ret{{[l|q]}}
410 %f = call i7 @llvm.fshl.i7(i7 112, i7 112, i7 9)
411 ret i7 %f
412}
413
414; extract(concat(0b1110001, 0b1110001) >> 16) = 0b0111100
415; Try an oversized shift to test modulo functionality.
416
417define i7 @fshr_i7() {
418; ANY-LABEL: fshr_i7:
419; ANY: # %bb.0:
Sanjay Patel8aac22e2018-08-01 17:17:08 +0000420; ANY-NEXT: movb $60, %al
Sanjay Patel6d302c92018-08-01 16:59:54 +0000421; ANY-NEXT: ret{{[l|q]}}
422 %f = call i7 @llvm.fshr.i7(i7 113, i7 113, i7 16)
423 ret i7 %f
424}
425