blob: 90574ef45ef9a2ee45d46f2f954959b22b226533 [file] [log] [blame]
Sanjay Patelc71adc82018-07-16 22:59:31 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-- -mattr=sse2 | FileCheck %s --check-prefixes=ANY,X32-SSE2
3; RUN: llc < %s -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=ANY,X64-AVX2
4
5declare i8 @llvm.fshl.i8(i8, i8, i8)
6declare i16 @llvm.fshl.i16(i16, i16, i16)
7declare i32 @llvm.fshl.i32(i32, i32, i32)
8declare i64 @llvm.fshl.i64(i64, i64, i64)
9declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
10
11declare i8 @llvm.fshr.i8(i8, i8, i8)
12declare i16 @llvm.fshr.i16(i16, i16, i16)
13declare i32 @llvm.fshr.i32(i32, i32, i32)
14declare i64 @llvm.fshr.i64(i64, i64, i64)
15declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
16
17; When first 2 operands match, it's a rotate.
18
19define i8 @rotl_i8_const_shift(i8 %x) nounwind {
20; X32-SSE2-LABEL: rotl_i8_const_shift:
21; X32-SSE2: # %bb.0:
22; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %al
23; X32-SSE2-NEXT: rolb $3, %al
24; X32-SSE2-NEXT: retl
25;
26; X64-AVX2-LABEL: rotl_i8_const_shift:
27; X64-AVX2: # %bb.0:
28; X64-AVX2-NEXT: rolb $3, %dil
29; X64-AVX2-NEXT: movl %edi, %eax
30; X64-AVX2-NEXT: retq
31 %f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3)
32 ret i8 %f
33}
34
35define i64 @rotl_i64_const_shift(i64 %x) nounwind {
36; X32-SSE2-LABEL: rotl_i64_const_shift:
37; X32-SSE2: # %bb.0:
38; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
39; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
40; X32-SSE2-NEXT: movl %ecx, %eax
41; X32-SSE2-NEXT: shldl $3, %edx, %eax
42; X32-SSE2-NEXT: shldl $3, %ecx, %edx
43; X32-SSE2-NEXT: retl
44;
45; X64-AVX2-LABEL: rotl_i64_const_shift:
46; X64-AVX2: # %bb.0:
47; X64-AVX2-NEXT: rolq $3, %rdi
48; X64-AVX2-NEXT: movq %rdi, %rax
49; X64-AVX2-NEXT: retq
50 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 3)
51 ret i64 %f
52}
53
54define i16 @rotl_i16(i16 %x, i16 %z) nounwind {
55; X32-SSE2-LABEL: rotl_i16:
56; X32-SSE2: # %bb.0:
57; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
58; X32-SSE2-NEXT: movzwl {{[0-9]+}}(%esp), %eax
59; X32-SSE2-NEXT: rolw %cl, %ax
60; X32-SSE2-NEXT: retl
61;
62; X64-AVX2-LABEL: rotl_i16:
63; X64-AVX2: # %bb.0:
64; X64-AVX2-NEXT: movl %esi, %ecx
65; X64-AVX2-NEXT: rolw %cl, %di
66; X64-AVX2-NEXT: movl %edi, %eax
67; X64-AVX2-NEXT: retq
68 %f = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 %z)
69 ret i16 %f
70}
71
72define i32 @rotl_i32(i32 %x, i32 %z) nounwind {
73; X32-SSE2-LABEL: rotl_i32:
74; X32-SSE2: # %bb.0:
75; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
76; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
77; X32-SSE2-NEXT: roll %cl, %eax
78; X32-SSE2-NEXT: retl
79;
80; X64-AVX2-LABEL: rotl_i32:
81; X64-AVX2: # %bb.0:
82; X64-AVX2-NEXT: movl %esi, %ecx
83; X64-AVX2-NEXT: roll %cl, %edi
84; X64-AVX2-NEXT: movl %edi, %eax
85; X64-AVX2-NEXT: retq
86 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z)
87 ret i32 %f
88}
89
90; Vector rotate.
91
92define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) nounwind {
93; X32-SSE2-LABEL: rotl_v4i32:
94; X32-SSE2: # %bb.0:
Sanjay Patel8aac22e2018-08-01 17:17:08 +000095; X32-SSE2-NEXT: pxor %xmm3, %xmm3
Sanjay Patelc71adc82018-07-16 22:59:31 +000096; X32-SSE2-NEXT: psubd %xmm1, %xmm3
97; X32-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [31,31,31,31]
98; X32-SSE2-NEXT: pand %xmm4, %xmm3
99; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm3[2,3,3,3,4,5,6,7]
100; X32-SSE2-NEXT: movdqa %xmm0, %xmm5
101; X32-SSE2-NEXT: psrld %xmm2, %xmm5
102; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm6 = xmm3[0,1,1,1,4,5,6,7]
103; X32-SSE2-NEXT: movdqa %xmm0, %xmm2
104; X32-SSE2-NEXT: psrld %xmm6, %xmm2
105; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm5[0]
106; X32-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
107; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm5 = xmm3[2,3,3,3,4,5,6,7]
108; X32-SSE2-NEXT: movdqa %xmm0, %xmm6
109; X32-SSE2-NEXT: psrld %xmm5, %xmm6
110; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,1,1,4,5,6,7]
111; X32-SSE2-NEXT: movdqa %xmm0, %xmm5
112; X32-SSE2-NEXT: psrld %xmm3, %xmm5
113; X32-SSE2-NEXT: punpckhqdq {{.*#+}} xmm5 = xmm5[1],xmm6[1]
114; X32-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,3],xmm5[0,3]
115; X32-SSE2-NEXT: pand %xmm4, %xmm1
116; X32-SSE2-NEXT: pslld $23, %xmm1
117; X32-SSE2-NEXT: paddd {{\.LCPI.*}}, %xmm1
118; X32-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
119; X32-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
120; X32-SSE2-NEXT: pmuludq %xmm1, %xmm0
121; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
122; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
123; X32-SSE2-NEXT: pmuludq %xmm3, %xmm1
124; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
125; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
126; X32-SSE2-NEXT: orps %xmm0, %xmm2
127; X32-SSE2-NEXT: movaps %xmm2, %xmm0
128; X32-SSE2-NEXT: retl
129;
130; X64-AVX2-LABEL: rotl_v4i32:
131; X64-AVX2: # %bb.0:
132; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
133; X64-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
134; X64-AVX2-NEXT: vpsllvd %xmm3, %xmm0, %xmm3
Sanjay Patel8aac22e2018-08-01 17:17:08 +0000135; X64-AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
Sanjay Patelc71adc82018-07-16 22:59:31 +0000136; X64-AVX2-NEXT: vpsubd %xmm1, %xmm4, %xmm1
137; X64-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
138; X64-AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
139; X64-AVX2-NEXT: vpor %xmm0, %xmm3, %xmm0
140; X64-AVX2-NEXT: retq
141 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
142 ret <4 x i32> %f
143}
144
145; Vector rotate by constant splat amount.
146
147define <4 x i32> @rotl_v4i32_const_shift(<4 x i32> %x) nounwind {
148; X32-SSE2-LABEL: rotl_v4i32_const_shift:
149; X32-SSE2: # %bb.0:
150; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
151; X32-SSE2-NEXT: psrld $29, %xmm1
152; X32-SSE2-NEXT: pslld $3, %xmm0
153; X32-SSE2-NEXT: por %xmm1, %xmm0
154; X32-SSE2-NEXT: retl
155;
156; X64-AVX2-LABEL: rotl_v4i32_const_shift:
157; X64-AVX2: # %bb.0:
158; X64-AVX2-NEXT: vpsrld $29, %xmm0, %xmm1
159; X64-AVX2-NEXT: vpslld $3, %xmm0, %xmm0
160; X64-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
161; X64-AVX2-NEXT: retq
162 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
163 ret <4 x i32> %f
164}
165
166; Repeat everything for funnel shift right.
167
168define i8 @rotr_i8_const_shift(i8 %x) nounwind {
169; X32-SSE2-LABEL: rotr_i8_const_shift:
170; X32-SSE2: # %bb.0:
171; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %al
Sanjay Patel215dcbf2018-07-25 21:38:30 +0000172; X32-SSE2-NEXT: rorb $3, %al
Sanjay Patelc71adc82018-07-16 22:59:31 +0000173; X32-SSE2-NEXT: retl
174;
175; X64-AVX2-LABEL: rotr_i8_const_shift:
176; X64-AVX2: # %bb.0:
Sanjay Patel215dcbf2018-07-25 21:38:30 +0000177; X64-AVX2-NEXT: rorb $3, %dil
Sanjay Patelc71adc82018-07-16 22:59:31 +0000178; X64-AVX2-NEXT: movl %edi, %eax
179; X64-AVX2-NEXT: retq
180 %f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3)
181 ret i8 %f
182}
183
184define i32 @rotr_i32_const_shift(i32 %x) nounwind {
185; X32-SSE2-LABEL: rotr_i32_const_shift:
186; X32-SSE2: # %bb.0:
187; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
Sanjay Patel215dcbf2018-07-25 21:38:30 +0000188; X32-SSE2-NEXT: rorl $3, %eax
Sanjay Patelc71adc82018-07-16 22:59:31 +0000189; X32-SSE2-NEXT: retl
190;
191; X64-AVX2-LABEL: rotr_i32_const_shift:
192; X64-AVX2: # %bb.0:
Sanjay Patel215dcbf2018-07-25 21:38:30 +0000193; X64-AVX2-NEXT: rorl $3, %edi
Sanjay Patelc71adc82018-07-16 22:59:31 +0000194; X64-AVX2-NEXT: movl %edi, %eax
195; X64-AVX2-NEXT: retq
196 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 3)
197 ret i32 %f
198}
199
200; When first 2 operands match, it's a rotate (by variable amount).
201
202define i16 @rotr_i16(i16 %x, i16 %z) nounwind {
203; X32-SSE2-LABEL: rotr_i16:
204; X32-SSE2: # %bb.0:
205; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
206; X32-SSE2-NEXT: movzwl {{[0-9]+}}(%esp), %eax
207; X32-SSE2-NEXT: rorw %cl, %ax
208; X32-SSE2-NEXT: retl
209;
210; X64-AVX2-LABEL: rotr_i16:
211; X64-AVX2: # %bb.0:
212; X64-AVX2-NEXT: movl %esi, %ecx
213; X64-AVX2-NEXT: rorw %cl, %di
214; X64-AVX2-NEXT: movl %edi, %eax
215; X64-AVX2-NEXT: retq
216 %f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z)
217 ret i16 %f
218}
219
220define i64 @rotr_i64(i64 %x, i64 %z) nounwind {
221; X32-SSE2-LABEL: rotr_i64:
222; X32-SSE2: # %bb.0:
223; X32-SSE2-NEXT: pushl %ebp
224; X32-SSE2-NEXT: pushl %ebx
225; X32-SSE2-NEXT: pushl %edi
226; X32-SSE2-NEXT: pushl %esi
227; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
228; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
Sanjay Patel8aac22e2018-08-01 17:17:08 +0000229; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx
230; X32-SSE2-NEXT: movl %ebx, %ecx
Sanjay Patelc71adc82018-07-16 22:59:31 +0000231; X32-SSE2-NEXT: andl $63, %ecx
232; X32-SSE2-NEXT: movl %edx, %edi
233; X32-SSE2-NEXT: shrl %cl, %edi
Sanjay Patel8aac22e2018-08-01 17:17:08 +0000234; X32-SSE2-NEXT: movl %esi, %ebp
235; X32-SSE2-NEXT: shrdl %cl, %edx, %ebp
236; X32-SSE2-NEXT: xorl %eax, %eax
Sanjay Patelc71adc82018-07-16 22:59:31 +0000237; X32-SSE2-NEXT: testb $32, %cl
Sanjay Patel8aac22e2018-08-01 17:17:08 +0000238; X32-SSE2-NEXT: cmovnel %edi, %ebp
239; X32-SSE2-NEXT: cmovnel %eax, %edi
240; X32-SSE2-NEXT: negl %ebx
241; X32-SSE2-NEXT: andl $63, %ebx
Sanjay Patelc71adc82018-07-16 22:59:31 +0000242; X32-SSE2-NEXT: movl %esi, %eax
Sanjay Patel8aac22e2018-08-01 17:17:08 +0000243; X32-SSE2-NEXT: movl %ebx, %ecx
Sanjay Patelc71adc82018-07-16 22:59:31 +0000244; X32-SSE2-NEXT: shll %cl, %eax
245; X32-SSE2-NEXT: shldl %cl, %esi, %edx
Sanjay Patel8aac22e2018-08-01 17:17:08 +0000246; X32-SSE2-NEXT: testb $32, %bl
Sanjay Patelc71adc82018-07-16 22:59:31 +0000247; X32-SSE2-NEXT: cmovnel %eax, %edx
Sanjay Patel8aac22e2018-08-01 17:17:08 +0000248; X32-SSE2-NEXT: movl $0, %ecx
249; X32-SSE2-NEXT: cmovnel %ecx, %eax
250; X32-SSE2-NEXT: orl %ebp, %eax
Sanjay Patelc71adc82018-07-16 22:59:31 +0000251; X32-SSE2-NEXT: orl %edi, %edx
252; X32-SSE2-NEXT: popl %esi
253; X32-SSE2-NEXT: popl %edi
254; X32-SSE2-NEXT: popl %ebx
255; X32-SSE2-NEXT: popl %ebp
256; X32-SSE2-NEXT: retl
257;
258; X64-AVX2-LABEL: rotr_i64:
259; X64-AVX2: # %bb.0:
260; X64-AVX2-NEXT: movl %esi, %ecx
261; X64-AVX2-NEXT: rorq %cl, %rdi
262; X64-AVX2-NEXT: movq %rdi, %rax
263; X64-AVX2-NEXT: retq
264 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z)
265 ret i64 %f
266}
267
268; Vector rotate.
269
270define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) nounwind {
271; X32-SSE2-LABEL: rotr_v4i32:
272; X32-SSE2: # %bb.0:
273; X32-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [31,31,31,31]
Sanjay Patel8aac22e2018-08-01 17:17:08 +0000274; X32-SSE2-NEXT: pxor %xmm3, %xmm3
Sanjay Patelc71adc82018-07-16 22:59:31 +0000275; X32-SSE2-NEXT: psubd %xmm1, %xmm3
276; X32-SSE2-NEXT: movdqa %xmm1, %xmm4
277; X32-SSE2-NEXT: pand %xmm2, %xmm4
278; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm4[2,3,3,3,4,5,6,7]
279; X32-SSE2-NEXT: movdqa %xmm0, %xmm5
280; X32-SSE2-NEXT: psrld %xmm1, %xmm5
281; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm6 = xmm4[0,1,1,1,4,5,6,7]
282; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
283; X32-SSE2-NEXT: psrld %xmm6, %xmm1
284; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0]
285; X32-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
286; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm5 = xmm4[2,3,3,3,4,5,6,7]
287; X32-SSE2-NEXT: movdqa %xmm0, %xmm6
288; X32-SSE2-NEXT: psrld %xmm5, %xmm6
289; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,1,1,1,4,5,6,7]
290; X32-SSE2-NEXT: movdqa %xmm0, %xmm5
291; X32-SSE2-NEXT: psrld %xmm4, %xmm5
292; X32-SSE2-NEXT: punpckhqdq {{.*#+}} xmm5 = xmm5[1],xmm6[1]
293; X32-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],xmm5[0,3]
294; X32-SSE2-NEXT: pand %xmm2, %xmm3
295; X32-SSE2-NEXT: pslld $23, %xmm3
296; X32-SSE2-NEXT: paddd {{\.LCPI.*}}, %xmm3
297; X32-SSE2-NEXT: cvttps2dq %xmm3, %xmm2
298; X32-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
299; X32-SSE2-NEXT: pmuludq %xmm2, %xmm0
300; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
301; X32-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
302; X32-SSE2-NEXT: pmuludq %xmm3, %xmm2
303; X32-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
304; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
305; X32-SSE2-NEXT: orps %xmm0, %xmm1
306; X32-SSE2-NEXT: movaps %xmm1, %xmm0
307; X32-SSE2-NEXT: retl
308;
309; X64-AVX2-LABEL: rotr_v4i32:
310; X64-AVX2: # %bb.0:
311; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
312; X64-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
313; X64-AVX2-NEXT: vpsrlvd %xmm3, %xmm0, %xmm3
Sanjay Patel8aac22e2018-08-01 17:17:08 +0000314; X64-AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
Sanjay Patelc71adc82018-07-16 22:59:31 +0000315; X64-AVX2-NEXT: vpsubd %xmm1, %xmm4, %xmm1
316; X64-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
317; X64-AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
318; X64-AVX2-NEXT: vpor %xmm3, %xmm0, %xmm0
319; X64-AVX2-NEXT: retq
320 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
321 ret <4 x i32> %f
322}
323
324; Vector rotate by constant splat amount.
325
326define <4 x i32> @rotr_v4i32_const_shift(<4 x i32> %x) nounwind {
327; X32-SSE2-LABEL: rotr_v4i32_const_shift:
328; X32-SSE2: # %bb.0:
329; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
330; X32-SSE2-NEXT: psrld $3, %xmm1
331; X32-SSE2-NEXT: pslld $29, %xmm0
332; X32-SSE2-NEXT: por %xmm1, %xmm0
333; X32-SSE2-NEXT: retl
334;
335; X64-AVX2-LABEL: rotr_v4i32_const_shift:
336; X64-AVX2: # %bb.0:
337; X64-AVX2-NEXT: vpsrld $3, %xmm0, %xmm1
338; X64-AVX2-NEXT: vpslld $29, %xmm0, %xmm0
339; X64-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
340; X64-AVX2-NEXT: retq
341 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
342 ret <4 x i32> %f
343}
344
345define i32 @rotl_i32_shift_by_bitwidth(i32 %x) nounwind {
346; X32-SSE2-LABEL: rotl_i32_shift_by_bitwidth:
347; X32-SSE2: # %bb.0:
348; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
349; X32-SSE2-NEXT: retl
350;
351; X64-AVX2-LABEL: rotl_i32_shift_by_bitwidth:
352; X64-AVX2: # %bb.0:
353; X64-AVX2-NEXT: movl %edi, %eax
354; X64-AVX2-NEXT: retq
355 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 32)
356 ret i32 %f
357}
358
359define i32 @rotr_i32_shift_by_bitwidth(i32 %x) nounwind {
360; X32-SSE2-LABEL: rotr_i32_shift_by_bitwidth:
361; X32-SSE2: # %bb.0:
362; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
363; X32-SSE2-NEXT: retl
364;
365; X64-AVX2-LABEL: rotr_i32_shift_by_bitwidth:
366; X64-AVX2: # %bb.0:
367; X64-AVX2-NEXT: movl %edi, %eax
368; X64-AVX2-NEXT: retq
369 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 32)
370 ret i32 %f
371}
372
373define <4 x i32> @rotl_v4i32_shift_by_bitwidth(<4 x i32> %x) nounwind {
374; ANY-LABEL: rotl_v4i32_shift_by_bitwidth:
375; ANY: # %bb.0:
376; ANY-NEXT: ret{{[l|q]}}
377 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
378 ret <4 x i32> %f
379}
380
381define <4 x i32> @rotr_v4i32_shift_by_bitwidth(<4 x i32> %x) nounwind {
382; ANY-LABEL: rotr_v4i32_shift_by_bitwidth:
383; ANY: # %bb.0:
384; ANY-NEXT: ret{{[l|q]}}
385 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
386 ret <4 x i32> %f
387}
388
Sanjay Patel6d302c92018-08-01 16:59:54 +0000389; FIXME: Non power-of-2 types can't use the negated shift amount to avoid a select.
390
391declare i7 @llvm.fshl.i7(i7, i7, i7)
392declare i7 @llvm.fshr.i7(i7, i7, i7)
393
394; extract(concat(0b1110000, 0b1110000) << 9) = 0b1000011
395; Try an oversized shift to test modulo functionality.
396
397define i7 @fshl_i7() {
398; ANY-LABEL: fshl_i7:
399; ANY: # %bb.0:
Sanjay Patel8aac22e2018-08-01 17:17:08 +0000400; ANY-NEXT: movb $67, %al
Sanjay Patel6d302c92018-08-01 16:59:54 +0000401; ANY-NEXT: ret{{[l|q]}}
402 %f = call i7 @llvm.fshl.i7(i7 112, i7 112, i7 9)
403 ret i7 %f
404}
405
406; extract(concat(0b1110001, 0b1110001) >> 16) = 0b0111100
407; Try an oversized shift to test modulo functionality.
408
409define i7 @fshr_i7() {
410; ANY-LABEL: fshr_i7:
411; ANY: # %bb.0:
Sanjay Patel8aac22e2018-08-01 17:17:08 +0000412; ANY-NEXT: movb $60, %al
Sanjay Patel6d302c92018-08-01 16:59:54 +0000413; ANY-NEXT: ret{{[l|q]}}
414 %f = call i7 @llvm.fshr.i7(i7 113, i7 113, i7 16)
415 ret i7 %f
416}
417