blob: e07d34f9e6421654d9ec2aaf90974f5355b604de [file] [log] [blame]
Sanjay Patelc71adc82018-07-16 22:59:31 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-- -mattr=sse2 | FileCheck %s --check-prefixes=ANY,X32-SSE2
3; RUN: llc < %s -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=ANY,X64-AVX2
4
5declare i8 @llvm.fshl.i8(i8, i8, i8)
6declare i16 @llvm.fshl.i16(i16, i16, i16)
7declare i32 @llvm.fshl.i32(i32, i32, i32)
8declare i64 @llvm.fshl.i64(i64, i64, i64)
9declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
10
11declare i8 @llvm.fshr.i8(i8, i8, i8)
12declare i16 @llvm.fshr.i16(i16, i16, i16)
13declare i32 @llvm.fshr.i32(i32, i32, i32)
14declare i64 @llvm.fshr.i64(i64, i64, i64)
15declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
16
17; When first 2 operands match, it's a rotate.
18
19define i8 @rotl_i8_const_shift(i8 %x) nounwind {
20; X32-SSE2-LABEL: rotl_i8_const_shift:
21; X32-SSE2: # %bb.0:
22; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %al
23; X32-SSE2-NEXT: rolb $3, %al
24; X32-SSE2-NEXT: retl
25;
26; X64-AVX2-LABEL: rotl_i8_const_shift:
27; X64-AVX2: # %bb.0:
Sanjay Patelc71adc82018-07-16 22:59:31 +000028; X64-AVX2-NEXT: movl %edi, %eax
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +000029; X64-AVX2-NEXT: rolb $3, %al
30; X64-AVX2-NEXT: # kill: def $al killed $al killed $eax
Sanjay Patelc71adc82018-07-16 22:59:31 +000031; X64-AVX2-NEXT: retq
32 %f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3)
33 ret i8 %f
34}
35
36define i64 @rotl_i64_const_shift(i64 %x) nounwind {
37; X32-SSE2-LABEL: rotl_i64_const_shift:
38; X32-SSE2: # %bb.0:
39; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
40; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
41; X32-SSE2-NEXT: movl %ecx, %eax
42; X32-SSE2-NEXT: shldl $3, %edx, %eax
43; X32-SSE2-NEXT: shldl $3, %ecx, %edx
44; X32-SSE2-NEXT: retl
45;
46; X64-AVX2-LABEL: rotl_i64_const_shift:
47; X64-AVX2: # %bb.0:
Sanjay Patelc71adc82018-07-16 22:59:31 +000048; X64-AVX2-NEXT: movq %rdi, %rax
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +000049; X64-AVX2-NEXT: rolq $3, %rax
Sanjay Patelc71adc82018-07-16 22:59:31 +000050; X64-AVX2-NEXT: retq
51 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 3)
52 ret i64 %f
53}
54
55define i16 @rotl_i16(i16 %x, i16 %z) nounwind {
56; X32-SSE2-LABEL: rotl_i16:
57; X32-SSE2: # %bb.0:
58; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
59; X32-SSE2-NEXT: movzwl {{[0-9]+}}(%esp), %eax
60; X32-SSE2-NEXT: rolw %cl, %ax
61; X32-SSE2-NEXT: retl
62;
63; X64-AVX2-LABEL: rotl_i16:
64; X64-AVX2: # %bb.0:
65; X64-AVX2-NEXT: movl %esi, %ecx
Sanjay Patelc71adc82018-07-16 22:59:31 +000066; X64-AVX2-NEXT: movl %edi, %eax
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +000067; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
68; X64-AVX2-NEXT: rolw %cl, %ax
69; X64-AVX2-NEXT: # kill: def $ax killed $ax killed $eax
Sanjay Patelc71adc82018-07-16 22:59:31 +000070; X64-AVX2-NEXT: retq
71 %f = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 %z)
72 ret i16 %f
73}
74
75define i32 @rotl_i32(i32 %x, i32 %z) nounwind {
76; X32-SSE2-LABEL: rotl_i32:
77; X32-SSE2: # %bb.0:
78; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
79; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
80; X32-SSE2-NEXT: roll %cl, %eax
81; X32-SSE2-NEXT: retl
82;
83; X64-AVX2-LABEL: rotl_i32:
84; X64-AVX2: # %bb.0:
85; X64-AVX2-NEXT: movl %esi, %ecx
Sanjay Patelc71adc82018-07-16 22:59:31 +000086; X64-AVX2-NEXT: movl %edi, %eax
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +000087; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
88; X64-AVX2-NEXT: roll %cl, %eax
Sanjay Patelc71adc82018-07-16 22:59:31 +000089; X64-AVX2-NEXT: retq
90 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z)
91 ret i32 %f
92}
93
94; Vector rotate.
95
96define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) nounwind {
97; X32-SSE2-LABEL: rotl_v4i32:
98; X32-SSE2: # %bb.0:
Sanjay Patel8aac22e2018-08-01 17:17:08 +000099; X32-SSE2-NEXT: pxor %xmm3, %xmm3
Sanjay Patelc71adc82018-07-16 22:59:31 +0000100; X32-SSE2-NEXT: psubd %xmm1, %xmm3
101; X32-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [31,31,31,31]
102; X32-SSE2-NEXT: pand %xmm4, %xmm3
103; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm3[2,3,3,3,4,5,6,7]
104; X32-SSE2-NEXT: movdqa %xmm0, %xmm5
105; X32-SSE2-NEXT: psrld %xmm2, %xmm5
106; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm6 = xmm3[0,1,1,1,4,5,6,7]
107; X32-SSE2-NEXT: movdqa %xmm0, %xmm2
108; X32-SSE2-NEXT: psrld %xmm6, %xmm2
109; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm5[0]
110; X32-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
111; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm5 = xmm3[2,3,3,3,4,5,6,7]
112; X32-SSE2-NEXT: movdqa %xmm0, %xmm6
113; X32-SSE2-NEXT: psrld %xmm5, %xmm6
114; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,1,1,4,5,6,7]
115; X32-SSE2-NEXT: movdqa %xmm0, %xmm5
116; X32-SSE2-NEXT: psrld %xmm3, %xmm5
117; X32-SSE2-NEXT: punpckhqdq {{.*#+}} xmm5 = xmm5[1],xmm6[1]
118; X32-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,3],xmm5[0,3]
119; X32-SSE2-NEXT: pand %xmm4, %xmm1
120; X32-SSE2-NEXT: pslld $23, %xmm1
121; X32-SSE2-NEXT: paddd {{\.LCPI.*}}, %xmm1
122; X32-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
123; X32-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
124; X32-SSE2-NEXT: pmuludq %xmm1, %xmm0
125; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
126; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
127; X32-SSE2-NEXT: pmuludq %xmm3, %xmm1
128; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
129; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
130; X32-SSE2-NEXT: orps %xmm0, %xmm2
131; X32-SSE2-NEXT: movaps %xmm2, %xmm0
132; X32-SSE2-NEXT: retl
133;
134; X64-AVX2-LABEL: rotl_v4i32:
135; X64-AVX2: # %bb.0:
136; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
137; X64-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
138; X64-AVX2-NEXT: vpsllvd %xmm3, %xmm0, %xmm3
Sanjay Patel8aac22e2018-08-01 17:17:08 +0000139; X64-AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
Sanjay Patelc71adc82018-07-16 22:59:31 +0000140; X64-AVX2-NEXT: vpsubd %xmm1, %xmm4, %xmm1
141; X64-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
142; X64-AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
143; X64-AVX2-NEXT: vpor %xmm0, %xmm3, %xmm0
144; X64-AVX2-NEXT: retq
145 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
146 ret <4 x i32> %f
147}
148
149; Vector rotate by constant splat amount.
150
151define <4 x i32> @rotl_v4i32_const_shift(<4 x i32> %x) nounwind {
152; X32-SSE2-LABEL: rotl_v4i32_const_shift:
153; X32-SSE2: # %bb.0:
154; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
155; X32-SSE2-NEXT: psrld $29, %xmm1
156; X32-SSE2-NEXT: pslld $3, %xmm0
157; X32-SSE2-NEXT: por %xmm1, %xmm0
158; X32-SSE2-NEXT: retl
159;
160; X64-AVX2-LABEL: rotl_v4i32_const_shift:
161; X64-AVX2: # %bb.0:
162; X64-AVX2-NEXT: vpsrld $29, %xmm0, %xmm1
163; X64-AVX2-NEXT: vpslld $3, %xmm0, %xmm0
164; X64-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
165; X64-AVX2-NEXT: retq
166 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
167 ret <4 x i32> %f
168}
169
170; Repeat everything for funnel shift right.
171
172define i8 @rotr_i8_const_shift(i8 %x) nounwind {
173; X32-SSE2-LABEL: rotr_i8_const_shift:
174; X32-SSE2: # %bb.0:
175; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %al
Sanjay Patel215dcbf2018-07-25 21:38:30 +0000176; X32-SSE2-NEXT: rorb $3, %al
Sanjay Patelc71adc82018-07-16 22:59:31 +0000177; X32-SSE2-NEXT: retl
178;
179; X64-AVX2-LABEL: rotr_i8_const_shift:
180; X64-AVX2: # %bb.0:
Sanjay Patelc71adc82018-07-16 22:59:31 +0000181; X64-AVX2-NEXT: movl %edi, %eax
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +0000182; X64-AVX2-NEXT: rorb $3, %al
183; X64-AVX2-NEXT: # kill: def $al killed $al killed $eax
Sanjay Patelc71adc82018-07-16 22:59:31 +0000184; X64-AVX2-NEXT: retq
185 %f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3)
186 ret i8 %f
187}
188
189define i32 @rotr_i32_const_shift(i32 %x) nounwind {
190; X32-SSE2-LABEL: rotr_i32_const_shift:
191; X32-SSE2: # %bb.0:
192; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
Sanjay Patel215dcbf2018-07-25 21:38:30 +0000193; X32-SSE2-NEXT: rorl $3, %eax
Sanjay Patelc71adc82018-07-16 22:59:31 +0000194; X32-SSE2-NEXT: retl
195;
196; X64-AVX2-LABEL: rotr_i32_const_shift:
197; X64-AVX2: # %bb.0:
Sanjay Patelc71adc82018-07-16 22:59:31 +0000198; X64-AVX2-NEXT: movl %edi, %eax
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +0000199; X64-AVX2-NEXT: rorl $3, %eax
Sanjay Patelc71adc82018-07-16 22:59:31 +0000200; X64-AVX2-NEXT: retq
201 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 3)
202 ret i32 %f
203}
204
205; When first 2 operands match, it's a rotate (by variable amount).
206
207define i16 @rotr_i16(i16 %x, i16 %z) nounwind {
208; X32-SSE2-LABEL: rotr_i16:
209; X32-SSE2: # %bb.0:
210; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
211; X32-SSE2-NEXT: movzwl {{[0-9]+}}(%esp), %eax
212; X32-SSE2-NEXT: rorw %cl, %ax
213; X32-SSE2-NEXT: retl
214;
215; X64-AVX2-LABEL: rotr_i16:
216; X64-AVX2: # %bb.0:
217; X64-AVX2-NEXT: movl %esi, %ecx
Sanjay Patelc71adc82018-07-16 22:59:31 +0000218; X64-AVX2-NEXT: movl %edi, %eax
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +0000219; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
220; X64-AVX2-NEXT: rorw %cl, %ax
221; X64-AVX2-NEXT: # kill: def $ax killed $ax killed $eax
Sanjay Patelc71adc82018-07-16 22:59:31 +0000222; X64-AVX2-NEXT: retq
223 %f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z)
224 ret i16 %f
225}
226
227define i64 @rotr_i64(i64 %x, i64 %z) nounwind {
228; X32-SSE2-LABEL: rotr_i64:
229; X32-SSE2: # %bb.0:
230; X32-SSE2-NEXT: pushl %ebp
231; X32-SSE2-NEXT: pushl %ebx
232; X32-SSE2-NEXT: pushl %edi
233; X32-SSE2-NEXT: pushl %esi
Sanjay Patele767bf42018-12-08 16:07:38 +0000234; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
Sanjay Patelc71adc82018-07-16 22:59:31 +0000235; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
236; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
Sanjay Patelc71adc82018-07-16 22:59:31 +0000237; X32-SSE2-NEXT: movl %edx, %edi
238; X32-SSE2-NEXT: shrl %cl, %edi
Sanjay Patele767bf42018-12-08 16:07:38 +0000239; X32-SSE2-NEXT: movl %esi, %ebx
240; X32-SSE2-NEXT: shrdl %cl, %edx, %ebx
241; X32-SSE2-NEXT: xorl %ebp, %ebp
Sanjay Patelc71adc82018-07-16 22:59:31 +0000242; X32-SSE2-NEXT: testb $32, %cl
Sanjay Patele767bf42018-12-08 16:07:38 +0000243; X32-SSE2-NEXT: cmovnel %edi, %ebx
244; X32-SSE2-NEXT: cmovnel %ebp, %edi
245; X32-SSE2-NEXT: negb %cl
Sanjay Patelc71adc82018-07-16 22:59:31 +0000246; X32-SSE2-NEXT: movl %esi, %eax
247; X32-SSE2-NEXT: shll %cl, %eax
248; X32-SSE2-NEXT: shldl %cl, %esi, %edx
Sanjay Patele767bf42018-12-08 16:07:38 +0000249; X32-SSE2-NEXT: testb $32, %cl
Sanjay Patelc71adc82018-07-16 22:59:31 +0000250; X32-SSE2-NEXT: cmovnel %eax, %edx
Sanjay Patele767bf42018-12-08 16:07:38 +0000251; X32-SSE2-NEXT: cmovnel %ebp, %eax
252; X32-SSE2-NEXT: orl %ebx, %eax
Sanjay Patelc71adc82018-07-16 22:59:31 +0000253; X32-SSE2-NEXT: orl %edi, %edx
254; X32-SSE2-NEXT: popl %esi
255; X32-SSE2-NEXT: popl %edi
256; X32-SSE2-NEXT: popl %ebx
257; X32-SSE2-NEXT: popl %ebp
258; X32-SSE2-NEXT: retl
259;
260; X64-AVX2-LABEL: rotr_i64:
261; X64-AVX2: # %bb.0:
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +0000262; X64-AVX2-NEXT: movq %rsi, %rcx
Sanjay Patelc71adc82018-07-16 22:59:31 +0000263; X64-AVX2-NEXT: movq %rdi, %rax
Simon Pilgrim2d0f20c2018-09-19 18:59:08 +0000264; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $rcx
265; X64-AVX2-NEXT: rorq %cl, %rax
Sanjay Patelc71adc82018-07-16 22:59:31 +0000266; X64-AVX2-NEXT: retq
267 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z)
268 ret i64 %f
269}
270
271; Vector rotate.
272
273define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) nounwind {
274; X32-SSE2-LABEL: rotr_v4i32:
275; X32-SSE2: # %bb.0:
276; X32-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [31,31,31,31]
Sanjay Patel8aac22e2018-08-01 17:17:08 +0000277; X32-SSE2-NEXT: pxor %xmm3, %xmm3
Sanjay Patelc71adc82018-07-16 22:59:31 +0000278; X32-SSE2-NEXT: psubd %xmm1, %xmm3
279; X32-SSE2-NEXT: movdqa %xmm1, %xmm4
280; X32-SSE2-NEXT: pand %xmm2, %xmm4
281; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm4[2,3,3,3,4,5,6,7]
282; X32-SSE2-NEXT: movdqa %xmm0, %xmm5
283; X32-SSE2-NEXT: psrld %xmm1, %xmm5
284; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm6 = xmm4[0,1,1,1,4,5,6,7]
285; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
286; X32-SSE2-NEXT: psrld %xmm6, %xmm1
287; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0]
288; X32-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
289; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm5 = xmm4[2,3,3,3,4,5,6,7]
290; X32-SSE2-NEXT: movdqa %xmm0, %xmm6
291; X32-SSE2-NEXT: psrld %xmm5, %xmm6
292; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,1,1,1,4,5,6,7]
293; X32-SSE2-NEXT: movdqa %xmm0, %xmm5
294; X32-SSE2-NEXT: psrld %xmm4, %xmm5
295; X32-SSE2-NEXT: punpckhqdq {{.*#+}} xmm5 = xmm5[1],xmm6[1]
296; X32-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],xmm5[0,3]
297; X32-SSE2-NEXT: pand %xmm2, %xmm3
298; X32-SSE2-NEXT: pslld $23, %xmm3
299; X32-SSE2-NEXT: paddd {{\.LCPI.*}}, %xmm3
300; X32-SSE2-NEXT: cvttps2dq %xmm3, %xmm2
301; X32-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
302; X32-SSE2-NEXT: pmuludq %xmm2, %xmm0
303; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
304; X32-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
305; X32-SSE2-NEXT: pmuludq %xmm3, %xmm2
306; X32-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
307; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
308; X32-SSE2-NEXT: orps %xmm0, %xmm1
309; X32-SSE2-NEXT: movaps %xmm1, %xmm0
310; X32-SSE2-NEXT: retl
311;
312; X64-AVX2-LABEL: rotr_v4i32:
313; X64-AVX2: # %bb.0:
314; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
315; X64-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
316; X64-AVX2-NEXT: vpsrlvd %xmm3, %xmm0, %xmm3
Sanjay Patel8aac22e2018-08-01 17:17:08 +0000317; X64-AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
Sanjay Patelc71adc82018-07-16 22:59:31 +0000318; X64-AVX2-NEXT: vpsubd %xmm1, %xmm4, %xmm1
319; X64-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
320; X64-AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
321; X64-AVX2-NEXT: vpor %xmm3, %xmm0, %xmm0
322; X64-AVX2-NEXT: retq
323 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
324 ret <4 x i32> %f
325}
326
327; Vector rotate by constant splat amount.
328
329define <4 x i32> @rotr_v4i32_const_shift(<4 x i32> %x) nounwind {
330; X32-SSE2-LABEL: rotr_v4i32_const_shift:
331; X32-SSE2: # %bb.0:
332; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
333; X32-SSE2-NEXT: psrld $3, %xmm1
334; X32-SSE2-NEXT: pslld $29, %xmm0
335; X32-SSE2-NEXT: por %xmm1, %xmm0
336; X32-SSE2-NEXT: retl
337;
338; X64-AVX2-LABEL: rotr_v4i32_const_shift:
339; X64-AVX2: # %bb.0:
340; X64-AVX2-NEXT: vpsrld $3, %xmm0, %xmm1
341; X64-AVX2-NEXT: vpslld $29, %xmm0, %xmm0
342; X64-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
343; X64-AVX2-NEXT: retq
344 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
345 ret <4 x i32> %f
346}
347
348define i32 @rotl_i32_shift_by_bitwidth(i32 %x) nounwind {
349; X32-SSE2-LABEL: rotl_i32_shift_by_bitwidth:
350; X32-SSE2: # %bb.0:
351; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
352; X32-SSE2-NEXT: retl
353;
354; X64-AVX2-LABEL: rotl_i32_shift_by_bitwidth:
355; X64-AVX2: # %bb.0:
356; X64-AVX2-NEXT: movl %edi, %eax
357; X64-AVX2-NEXT: retq
358 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 32)
359 ret i32 %f
360}
361
362define i32 @rotr_i32_shift_by_bitwidth(i32 %x) nounwind {
363; X32-SSE2-LABEL: rotr_i32_shift_by_bitwidth:
364; X32-SSE2: # %bb.0:
365; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
366; X32-SSE2-NEXT: retl
367;
368; X64-AVX2-LABEL: rotr_i32_shift_by_bitwidth:
369; X64-AVX2: # %bb.0:
370; X64-AVX2-NEXT: movl %edi, %eax
371; X64-AVX2-NEXT: retq
372 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 32)
373 ret i32 %f
374}
375
376define <4 x i32> @rotl_v4i32_shift_by_bitwidth(<4 x i32> %x) nounwind {
377; ANY-LABEL: rotl_v4i32_shift_by_bitwidth:
378; ANY: # %bb.0:
379; ANY-NEXT: ret{{[l|q]}}
380 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
381 ret <4 x i32> %f
382}
383
384define <4 x i32> @rotr_v4i32_shift_by_bitwidth(<4 x i32> %x) nounwind {
385; ANY-LABEL: rotr_v4i32_shift_by_bitwidth:
386; ANY: # %bb.0:
387; ANY-NEXT: ret{{[l|q]}}
388 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
389 ret <4 x i32> %f
390}
391
Sanjay Pateld5ae1832018-08-01 17:18:50 +0000392; Non power-of-2 types can't use the negated shift amount to avoid a select.
Sanjay Patel6d302c92018-08-01 16:59:54 +0000393
394declare i7 @llvm.fshl.i7(i7, i7, i7)
395declare i7 @llvm.fshr.i7(i7, i7, i7)
396
397; extract(concat(0b1110000, 0b1110000) << 9) = 0b1000011
398; Try an oversized shift to test modulo functionality.
399
400define i7 @fshl_i7() {
401; ANY-LABEL: fshl_i7:
402; ANY: # %bb.0:
Sanjay Patel8aac22e2018-08-01 17:17:08 +0000403; ANY-NEXT: movb $67, %al
Sanjay Patel6d302c92018-08-01 16:59:54 +0000404; ANY-NEXT: ret{{[l|q]}}
405 %f = call i7 @llvm.fshl.i7(i7 112, i7 112, i7 9)
406 ret i7 %f
407}
408
409; extract(concat(0b1110001, 0b1110001) >> 16) = 0b0111100
410; Try an oversized shift to test modulo functionality.
411
412define i7 @fshr_i7() {
413; ANY-LABEL: fshr_i7:
414; ANY: # %bb.0:
Sanjay Patel8aac22e2018-08-01 17:17:08 +0000415; ANY-NEXT: movb $60, %al
Sanjay Patel6d302c92018-08-01 16:59:54 +0000416; ANY-NEXT: ret{{[l|q]}}
417 %f = call i7 @llvm.fshr.i7(i7 113, i7 113, i7 16)
418 ret i7 %f
419}
420