blob: b24c8b5aa7a46dccaa5f3e5648b78210a977fe2e [file] [log] [blame]
Simon Pilgrimb87ffe82016-03-30 14:14:00 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
Simon Pilgrim2d0104c2016-04-24 15:45:06 +00002; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
Simon Pilgrimb87ffe82016-03-30 14:14:00 +00006; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2
8
Simon Pilgrim2d0104c2016-04-24 15:45:06 +00009define i8 @test_bitreverse_i8(i8 %a) nounwind {
10; SSE-LABEL: test_bitreverse_i8:
11; SSE: # BB#0:
12; SSE-NEXT: movb %dil, %al
13; SSE-NEXT: shlb $7, %al
14; SSE-NEXT: movb %dil, %cl
15; SSE-NEXT: shlb $5, %cl
16; SSE-NEXT: andb $64, %cl
17; SSE-NEXT: movb %dil, %dl
18; SSE-NEXT: shlb $3, %dl
19; SSE-NEXT: andb $32, %dl
20; SSE-NEXT: orb %cl, %dl
21; SSE-NEXT: movb %dil, %cl
22; SSE-NEXT: addb %cl, %cl
23; SSE-NEXT: andb $16, %cl
24; SSE-NEXT: orb %dl, %cl
25; SSE-NEXT: movb %dil, %dl
26; SSE-NEXT: shrb %dl
27; SSE-NEXT: andb $8, %dl
28; SSE-NEXT: orb %cl, %dl
29; SSE-NEXT: movb %dil, %cl
30; SSE-NEXT: shrb $3, %cl
31; SSE-NEXT: andb $4, %cl
32; SSE-NEXT: orb %dl, %cl
33; SSE-NEXT: movb %dil, %dl
34; SSE-NEXT: shrb $5, %dl
35; SSE-NEXT: andb $2, %dl
36; SSE-NEXT: orb %cl, %dl
37; SSE-NEXT: shrb $7, %dil
38; SSE-NEXT: orb %dl, %dil
39; SSE-NEXT: orb %al, %dil
40; SSE-NEXT: movb %dil, %al
41; SSE-NEXT: retq
42;
43; AVX-LABEL: test_bitreverse_i8:
44; AVX: # BB#0:
45; AVX-NEXT: movb %dil, %al
46; AVX-NEXT: shlb $7, %al
47; AVX-NEXT: movb %dil, %cl
48; AVX-NEXT: shlb $5, %cl
49; AVX-NEXT: andb $64, %cl
50; AVX-NEXT: movb %dil, %dl
51; AVX-NEXT: shlb $3, %dl
52; AVX-NEXT: andb $32, %dl
53; AVX-NEXT: orb %cl, %dl
54; AVX-NEXT: movb %dil, %cl
55; AVX-NEXT: addb %cl, %cl
56; AVX-NEXT: andb $16, %cl
57; AVX-NEXT: orb %dl, %cl
58; AVX-NEXT: movb %dil, %dl
59; AVX-NEXT: shrb %dl
60; AVX-NEXT: andb $8, %dl
61; AVX-NEXT: orb %cl, %dl
62; AVX-NEXT: movb %dil, %cl
63; AVX-NEXT: shrb $3, %cl
64; AVX-NEXT: andb $4, %cl
65; AVX-NEXT: orb %dl, %cl
66; AVX-NEXT: movb %dil, %dl
67; AVX-NEXT: shrb $5, %dl
68; AVX-NEXT: andb $2, %dl
69; AVX-NEXT: orb %cl, %dl
70; AVX-NEXT: shrb $7, %dil
71; AVX-NEXT: orb %dl, %dil
72; AVX-NEXT: orb %al, %dil
73; AVX-NEXT: movb %dil, %al
74; AVX-NEXT: retq
75;
76; XOP-LABEL: test_bitreverse_i8:
77; XOP: # BB#0:
78; XOP-NEXT: vmovd %edi, %xmm0
79; XOP-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0
80; XOP-NEXT: vpextrb $0, %xmm0, %eax
81; XOP-NEXT: retq
Simon Pilgrimb87ffe82016-03-30 14:14:00 +000082 %b = call i8 @llvm.bitreverse.i8(i8 %a)
83 ret i8 %b
84}
85
Simon Pilgrim2d0104c2016-04-24 15:45:06 +000086define i16 @test_bitreverse_i16(i16 %a) nounwind {
87; SSE-LABEL: test_bitreverse_i16:
88; SSE: # BB#0:
89; SSE-NEXT: movl %edi, %ecx
90; SSE-NEXT: andl $32768, %ecx # imm = 0x8000
91; SSE-NEXT: movl %edi, %eax
92; SSE-NEXT: shll $15, %eax
93; SSE-NEXT: movl %edi, %edx
94; SSE-NEXT: andl $2, %edx
95; SSE-NEXT: shll $13, %edx
96; SSE-NEXT: leal (%rdx,%rax), %eax
97; SSE-NEXT: movl %edi, %edx
98; SSE-NEXT: andl $4, %edx
99; SSE-NEXT: shll $11, %edx
100; SSE-NEXT: orl %edx, %eax
101; SSE-NEXT: movl %edi, %edx
102; SSE-NEXT: andl $8, %edx
103; SSE-NEXT: shll $9, %edx
104; SSE-NEXT: orl %edx, %eax
105; SSE-NEXT: movl %edi, %edx
106; SSE-NEXT: andl $16, %edx
107; SSE-NEXT: shll $7, %edx
108; SSE-NEXT: orl %edx, %eax
109; SSE-NEXT: movl %edi, %edx
110; SSE-NEXT: andl $32, %edx
111; SSE-NEXT: shll $5, %edx
112; SSE-NEXT: orl %edx, %eax
113; SSE-NEXT: movl %edi, %edx
114; SSE-NEXT: andl $64, %edx
115; SSE-NEXT: shll $3, %edx
116; SSE-NEXT: leal (%rdi,%rdi), %esi
117; SSE-NEXT: andl $256, %esi # imm = 0x100
118; SSE-NEXT: orl %edx, %esi
119; SSE-NEXT: movl %edi, %edx
120; SSE-NEXT: shrl %edx
121; SSE-NEXT: andl $128, %edx
122; SSE-NEXT: orl %esi, %edx
123; SSE-NEXT: movl %edi, %esi
124; SSE-NEXT: shrl $3, %esi
125; SSE-NEXT: andl $64, %esi
126; SSE-NEXT: orl %edx, %esi
127; SSE-NEXT: movl %edi, %edx
128; SSE-NEXT: shrl $5, %edx
129; SSE-NEXT: andl $32, %edx
130; SSE-NEXT: orl %esi, %edx
131; SSE-NEXT: movl %edi, %esi
132; SSE-NEXT: shrl $7, %esi
133; SSE-NEXT: andl $16, %esi
134; SSE-NEXT: orl %edx, %esi
135; SSE-NEXT: movl %edi, %edx
136; SSE-NEXT: shrl $9, %edx
137; SSE-NEXT: andl $8, %edx
138; SSE-NEXT: orl %esi, %edx
139; SSE-NEXT: movl %edi, %esi
140; SSE-NEXT: shrl $11, %esi
141; SSE-NEXT: andl $4, %esi
142; SSE-NEXT: orl %edx, %esi
143; SSE-NEXT: shrl $13, %edi
144; SSE-NEXT: andl $2, %edi
145; SSE-NEXT: orl %esi, %edi
146; SSE-NEXT: shrl $15, %ecx
147; SSE-NEXT: orl %edi, %ecx
148; SSE-NEXT: orl %ecx, %eax
149; SSE-NEXT: retq
150;
151; AVX-LABEL: test_bitreverse_i16:
152; AVX: # BB#0:
153; AVX-NEXT: movl %edi, %ecx
154; AVX-NEXT: andl $32768, %ecx # imm = 0x8000
155; AVX-NEXT: movl %edi, %eax
156; AVX-NEXT: shll $15, %eax
157; AVX-NEXT: movl %edi, %edx
158; AVX-NEXT: andl $2, %edx
159; AVX-NEXT: shll $13, %edx
160; AVX-NEXT: leal (%rdx,%rax), %eax
161; AVX-NEXT: movl %edi, %edx
162; AVX-NEXT: andl $4, %edx
163; AVX-NEXT: shll $11, %edx
164; AVX-NEXT: orl %edx, %eax
165; AVX-NEXT: movl %edi, %edx
166; AVX-NEXT: andl $8, %edx
167; AVX-NEXT: shll $9, %edx
168; AVX-NEXT: orl %edx, %eax
169; AVX-NEXT: movl %edi, %edx
170; AVX-NEXT: andl $16, %edx
171; AVX-NEXT: shll $7, %edx
172; AVX-NEXT: orl %edx, %eax
173; AVX-NEXT: movl %edi, %edx
174; AVX-NEXT: andl $32, %edx
175; AVX-NEXT: shll $5, %edx
176; AVX-NEXT: orl %edx, %eax
177; AVX-NEXT: movl %edi, %edx
178; AVX-NEXT: andl $64, %edx
179; AVX-NEXT: shll $3, %edx
180; AVX-NEXT: leal (%rdi,%rdi), %esi
181; AVX-NEXT: andl $256, %esi # imm = 0x100
182; AVX-NEXT: orl %edx, %esi
183; AVX-NEXT: movl %edi, %edx
184; AVX-NEXT: shrl %edx
185; AVX-NEXT: andl $128, %edx
186; AVX-NEXT: orl %esi, %edx
187; AVX-NEXT: movl %edi, %esi
188; AVX-NEXT: shrl $3, %esi
189; AVX-NEXT: andl $64, %esi
190; AVX-NEXT: orl %edx, %esi
191; AVX-NEXT: movl %edi, %edx
192; AVX-NEXT: shrl $5, %edx
193; AVX-NEXT: andl $32, %edx
194; AVX-NEXT: orl %esi, %edx
195; AVX-NEXT: movl %edi, %esi
196; AVX-NEXT: shrl $7, %esi
197; AVX-NEXT: andl $16, %esi
198; AVX-NEXT: orl %edx, %esi
199; AVX-NEXT: movl %edi, %edx
200; AVX-NEXT: shrl $9, %edx
201; AVX-NEXT: andl $8, %edx
202; AVX-NEXT: orl %esi, %edx
203; AVX-NEXT: movl %edi, %esi
204; AVX-NEXT: shrl $11, %esi
205; AVX-NEXT: andl $4, %esi
206; AVX-NEXT: orl %edx, %esi
207; AVX-NEXT: shrl $13, %edi
208; AVX-NEXT: andl $2, %edi
209; AVX-NEXT: orl %esi, %edi
210; AVX-NEXT: shrl $15, %ecx
211; AVX-NEXT: orl %edi, %ecx
212; AVX-NEXT: orl %ecx, %eax
213; AVX-NEXT: retq
214;
215; XOP-LABEL: test_bitreverse_i16:
216; XOP: # BB#0:
217; XOP-NEXT: vmovd %edi, %xmm0
218; XOP-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0
219; XOP-NEXT: vmovd %xmm0, %eax
220; XOP-NEXT: retq
Simon Pilgrimb87ffe82016-03-30 14:14:00 +0000221 %b = call i16 @llvm.bitreverse.i16(i16 %a)
222 ret i16 %b
223}
224
Simon Pilgrim2d0104c2016-04-24 15:45:06 +0000225define i32 @test_bitreverse_i32(i32 %a) nounwind {
226; SSE-LABEL: test_bitreverse_i32:
227; SSE: # BB#0:
228; SSE-NEXT: movl %edi, %eax
229; SSE-NEXT: shll $31, %eax
230; SSE-NEXT: movl %edi, %ecx
231; SSE-NEXT: andl $2, %ecx
232; SSE-NEXT: shll $29, %ecx
233; SSE-NEXT: leal (%rcx,%rax), %eax
234; SSE-NEXT: movl %edi, %ecx
235; SSE-NEXT: andl $4, %ecx
236; SSE-NEXT: shll $27, %ecx
237; SSE-NEXT: orl %ecx, %eax
238; SSE-NEXT: movl %edi, %ecx
239; SSE-NEXT: andl $8, %ecx
240; SSE-NEXT: shll $25, %ecx
241; SSE-NEXT: orl %ecx, %eax
242; SSE-NEXT: movl %edi, %ecx
243; SSE-NEXT: andl $16, %ecx
244; SSE-NEXT: shll $23, %ecx
245; SSE-NEXT: orl %ecx, %eax
246; SSE-NEXT: movl %edi, %ecx
247; SSE-NEXT: andl $32, %ecx
248; SSE-NEXT: shll $21, %ecx
249; SSE-NEXT: orl %ecx, %eax
250; SSE-NEXT: movl %edi, %ecx
251; SSE-NEXT: andl $64, %ecx
252; SSE-NEXT: shll $19, %ecx
253; SSE-NEXT: movl %edi, %edx
254; SSE-NEXT: shll $17, %edx
255; SSE-NEXT: andl $16777216, %edx # imm = 0x1000000
256; SSE-NEXT: orl %ecx, %edx
257; SSE-NEXT: movl %edi, %ecx
258; SSE-NEXT: shll $15, %ecx
259; SSE-NEXT: andl $8388608, %ecx # imm = 0x800000
260; SSE-NEXT: orl %edx, %ecx
261; SSE-NEXT: movl %edi, %edx
262; SSE-NEXT: shll $13, %edx
263; SSE-NEXT: andl $4194304, %edx # imm = 0x400000
264; SSE-NEXT: orl %ecx, %edx
265; SSE-NEXT: movl %edi, %ecx
266; SSE-NEXT: shll $11, %ecx
267; SSE-NEXT: andl $2097152, %ecx # imm = 0x200000
268; SSE-NEXT: orl %edx, %ecx
269; SSE-NEXT: movl %edi, %edx
270; SSE-NEXT: shll $9, %edx
271; SSE-NEXT: andl $1048576, %edx # imm = 0x100000
272; SSE-NEXT: orl %ecx, %edx
273; SSE-NEXT: movl %edi, %ecx
274; SSE-NEXT: shll $7, %ecx
275; SSE-NEXT: andl $524288, %ecx # imm = 0x80000
276; SSE-NEXT: orl %edx, %ecx
277; SSE-NEXT: movl %edi, %edx
278; SSE-NEXT: shll $5, %edx
279; SSE-NEXT: andl $262144, %edx # imm = 0x40000
280; SSE-NEXT: orl %ecx, %edx
281; SSE-NEXT: leal (,%rdi,8), %ecx
282; SSE-NEXT: andl $131072, %ecx # imm = 0x20000
283; SSE-NEXT: orl %edx, %ecx
284; SSE-NEXT: leal (%rdi,%rdi), %edx
285; SSE-NEXT: andl $65536, %edx # imm = 0x10000
286; SSE-NEXT: orl %ecx, %edx
287; SSE-NEXT: movl %edi, %ecx
288; SSE-NEXT: shrl %ecx
289; SSE-NEXT: andl $32768, %ecx # imm = 0x8000
290; SSE-NEXT: orl %edx, %ecx
291; SSE-NEXT: movl %edi, %edx
292; SSE-NEXT: shrl $3, %edx
293; SSE-NEXT: andl $16384, %edx # imm = 0x4000
294; SSE-NEXT: orl %ecx, %edx
295; SSE-NEXT: movl %edi, %ecx
296; SSE-NEXT: shrl $5, %ecx
297; SSE-NEXT: andl $8192, %ecx # imm = 0x2000
298; SSE-NEXT: orl %edx, %ecx
299; SSE-NEXT: movl %edi, %edx
300; SSE-NEXT: shrl $7, %edx
301; SSE-NEXT: andl $4096, %edx # imm = 0x1000
302; SSE-NEXT: orl %ecx, %edx
303; SSE-NEXT: movl %edi, %ecx
304; SSE-NEXT: shrl $9, %ecx
305; SSE-NEXT: andl $2048, %ecx # imm = 0x800
306; SSE-NEXT: orl %edx, %ecx
307; SSE-NEXT: movl %edi, %edx
308; SSE-NEXT: shrl $11, %edx
309; SSE-NEXT: andl $1024, %edx # imm = 0x400
310; SSE-NEXT: orl %ecx, %edx
311; SSE-NEXT: movl %edi, %ecx
312; SSE-NEXT: shrl $13, %ecx
313; SSE-NEXT: andl $512, %ecx # imm = 0x200
314; SSE-NEXT: orl %edx, %ecx
315; SSE-NEXT: movl %edi, %edx
316; SSE-NEXT: shrl $15, %edx
317; SSE-NEXT: andl $256, %edx # imm = 0x100
318; SSE-NEXT: orl %ecx, %edx
319; SSE-NEXT: movl %edi, %ecx
320; SSE-NEXT: shrl $17, %ecx
321; SSE-NEXT: andl $128, %ecx
322; SSE-NEXT: orl %edx, %ecx
323; SSE-NEXT: movl %edi, %edx
324; SSE-NEXT: shrl $19, %edx
325; SSE-NEXT: andl $64, %edx
326; SSE-NEXT: orl %ecx, %edx
327; SSE-NEXT: movl %edi, %ecx
328; SSE-NEXT: shrl $21, %ecx
329; SSE-NEXT: andl $32, %ecx
330; SSE-NEXT: orl %edx, %ecx
331; SSE-NEXT: movl %edi, %edx
332; SSE-NEXT: shrl $23, %edx
333; SSE-NEXT: andl $16, %edx
334; SSE-NEXT: orl %ecx, %edx
335; SSE-NEXT: movl %edi, %ecx
336; SSE-NEXT: shrl $25, %ecx
337; SSE-NEXT: andl $8, %ecx
338; SSE-NEXT: orl %edx, %ecx
339; SSE-NEXT: movl %edi, %edx
340; SSE-NEXT: shrl $27, %edx
341; SSE-NEXT: andl $4, %edx
342; SSE-NEXT: orl %ecx, %edx
343; SSE-NEXT: movl %edi, %ecx
344; SSE-NEXT: shrl $29, %ecx
345; SSE-NEXT: andl $2, %ecx
346; SSE-NEXT: orl %edx, %ecx
347; SSE-NEXT: shrl $31, %edi
348; SSE-NEXT: orl %ecx, %edi
349; SSE-NEXT: orl %edi, %eax
350; SSE-NEXT: retq
351;
352; AVX-LABEL: test_bitreverse_i32:
353; AVX: # BB#0:
354; AVX-NEXT: movl %edi, %eax
355; AVX-NEXT: shll $31, %eax
356; AVX-NEXT: movl %edi, %ecx
357; AVX-NEXT: andl $2, %ecx
358; AVX-NEXT: shll $29, %ecx
359; AVX-NEXT: leal (%rcx,%rax), %eax
360; AVX-NEXT: movl %edi, %ecx
361; AVX-NEXT: andl $4, %ecx
362; AVX-NEXT: shll $27, %ecx
363; AVX-NEXT: orl %ecx, %eax
364; AVX-NEXT: movl %edi, %ecx
365; AVX-NEXT: andl $8, %ecx
366; AVX-NEXT: shll $25, %ecx
367; AVX-NEXT: orl %ecx, %eax
368; AVX-NEXT: movl %edi, %ecx
369; AVX-NEXT: andl $16, %ecx
370; AVX-NEXT: shll $23, %ecx
371; AVX-NEXT: orl %ecx, %eax
372; AVX-NEXT: movl %edi, %ecx
373; AVX-NEXT: andl $32, %ecx
374; AVX-NEXT: shll $21, %ecx
375; AVX-NEXT: orl %ecx, %eax
376; AVX-NEXT: movl %edi, %ecx
377; AVX-NEXT: andl $64, %ecx
378; AVX-NEXT: shll $19, %ecx
379; AVX-NEXT: movl %edi, %edx
380; AVX-NEXT: shll $17, %edx
381; AVX-NEXT: andl $16777216, %edx # imm = 0x1000000
382; AVX-NEXT: orl %ecx, %edx
383; AVX-NEXT: movl %edi, %ecx
384; AVX-NEXT: shll $15, %ecx
385; AVX-NEXT: andl $8388608, %ecx # imm = 0x800000
386; AVX-NEXT: orl %edx, %ecx
387; AVX-NEXT: movl %edi, %edx
388; AVX-NEXT: shll $13, %edx
389; AVX-NEXT: andl $4194304, %edx # imm = 0x400000
390; AVX-NEXT: orl %ecx, %edx
391; AVX-NEXT: movl %edi, %ecx
392; AVX-NEXT: shll $11, %ecx
393; AVX-NEXT: andl $2097152, %ecx # imm = 0x200000
394; AVX-NEXT: orl %edx, %ecx
395; AVX-NEXT: movl %edi, %edx
396; AVX-NEXT: shll $9, %edx
397; AVX-NEXT: andl $1048576, %edx # imm = 0x100000
398; AVX-NEXT: orl %ecx, %edx
399; AVX-NEXT: movl %edi, %ecx
400; AVX-NEXT: shll $7, %ecx
401; AVX-NEXT: andl $524288, %ecx # imm = 0x80000
402; AVX-NEXT: orl %edx, %ecx
403; AVX-NEXT: movl %edi, %edx
404; AVX-NEXT: shll $5, %edx
405; AVX-NEXT: andl $262144, %edx # imm = 0x40000
406; AVX-NEXT: orl %ecx, %edx
407; AVX-NEXT: leal (,%rdi,8), %ecx
408; AVX-NEXT: andl $131072, %ecx # imm = 0x20000
409; AVX-NEXT: orl %edx, %ecx
410; AVX-NEXT: leal (%rdi,%rdi), %edx
411; AVX-NEXT: andl $65536, %edx # imm = 0x10000
412; AVX-NEXT: orl %ecx, %edx
413; AVX-NEXT: movl %edi, %ecx
414; AVX-NEXT: shrl %ecx
415; AVX-NEXT: andl $32768, %ecx # imm = 0x8000
416; AVX-NEXT: orl %edx, %ecx
417; AVX-NEXT: movl %edi, %edx
418; AVX-NEXT: shrl $3, %edx
419; AVX-NEXT: andl $16384, %edx # imm = 0x4000
420; AVX-NEXT: orl %ecx, %edx
421; AVX-NEXT: movl %edi, %ecx
422; AVX-NEXT: shrl $5, %ecx
423; AVX-NEXT: andl $8192, %ecx # imm = 0x2000
424; AVX-NEXT: orl %edx, %ecx
425; AVX-NEXT: movl %edi, %edx
426; AVX-NEXT: shrl $7, %edx
427; AVX-NEXT: andl $4096, %edx # imm = 0x1000
428; AVX-NEXT: orl %ecx, %edx
429; AVX-NEXT: movl %edi, %ecx
430; AVX-NEXT: shrl $9, %ecx
431; AVX-NEXT: andl $2048, %ecx # imm = 0x800
432; AVX-NEXT: orl %edx, %ecx
433; AVX-NEXT: movl %edi, %edx
434; AVX-NEXT: shrl $11, %edx
435; AVX-NEXT: andl $1024, %edx # imm = 0x400
436; AVX-NEXT: orl %ecx, %edx
437; AVX-NEXT: movl %edi, %ecx
438; AVX-NEXT: shrl $13, %ecx
439; AVX-NEXT: andl $512, %ecx # imm = 0x200
440; AVX-NEXT: orl %edx, %ecx
441; AVX-NEXT: movl %edi, %edx
442; AVX-NEXT: shrl $15, %edx
443; AVX-NEXT: andl $256, %edx # imm = 0x100
444; AVX-NEXT: orl %ecx, %edx
445; AVX-NEXT: movl %edi, %ecx
446; AVX-NEXT: shrl $17, %ecx
447; AVX-NEXT: andl $128, %ecx
448; AVX-NEXT: orl %edx, %ecx
449; AVX-NEXT: movl %edi, %edx
450; AVX-NEXT: shrl $19, %edx
451; AVX-NEXT: andl $64, %edx
452; AVX-NEXT: orl %ecx, %edx
453; AVX-NEXT: movl %edi, %ecx
454; AVX-NEXT: shrl $21, %ecx
455; AVX-NEXT: andl $32, %ecx
456; AVX-NEXT: orl %edx, %ecx
457; AVX-NEXT: movl %edi, %edx
458; AVX-NEXT: shrl $23, %edx
459; AVX-NEXT: andl $16, %edx
460; AVX-NEXT: orl %ecx, %edx
461; AVX-NEXT: movl %edi, %ecx
462; AVX-NEXT: shrl $25, %ecx
463; AVX-NEXT: andl $8, %ecx
464; AVX-NEXT: orl %edx, %ecx
465; AVX-NEXT: movl %edi, %edx
466; AVX-NEXT: shrl $27, %edx
467; AVX-NEXT: andl $4, %edx
468; AVX-NEXT: orl %ecx, %edx
469; AVX-NEXT: movl %edi, %ecx
470; AVX-NEXT: shrl $29, %ecx
471; AVX-NEXT: andl $2, %ecx
472; AVX-NEXT: orl %edx, %ecx
473; AVX-NEXT: shrl $31, %edi
474; AVX-NEXT: orl %ecx, %edi
475; AVX-NEXT: orl %edi, %eax
476; AVX-NEXT: retq
477;
478; XOP-LABEL: test_bitreverse_i32:
479; XOP: # BB#0:
480; XOP-NEXT: vmovd %edi, %xmm0
481; XOP-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0
482; XOP-NEXT: vmovd %xmm0, %eax
483; XOP-NEXT: retq
Simon Pilgrimb87ffe82016-03-30 14:14:00 +0000484 %b = call i32 @llvm.bitreverse.i32(i32 %a)
485 ret i32 %b
486}
487
Simon Pilgrim2d0104c2016-04-24 15:45:06 +0000488define i64 @test_bitreverse_i64(i64 %a) nounwind {
489; SSE-LABEL: test_bitreverse_i64:
490; SSE: # BB#0:
491; SSE-NEXT: leaq (%rdi,%rdi), %rax
492; SSE-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000
493; SSE-NEXT: andq %rax, %rcx
494; SSE-NEXT: movq %rdi, %rax
495; SSE-NEXT: shlq $63, %rax
496; SSE-NEXT: movq %rdi, %rdx
497; SSE-NEXT: andq $2, %rdx
498; SSE-NEXT: shlq $61, %rdx
499; SSE-NEXT: leaq (%rdx,%rax), %rax
500; SSE-NEXT: movq %rdi, %rdx
501; SSE-NEXT: andq $4, %rdx
502; SSE-NEXT: shlq $59, %rdx
503; SSE-NEXT: orq %rdx, %rax
504; SSE-NEXT: movq %rdi, %rdx
505; SSE-NEXT: andq $8, %rdx
506; SSE-NEXT: shlq $57, %rdx
507; SSE-NEXT: orq %rdx, %rax
508; SSE-NEXT: movq %rdi, %rdx
509; SSE-NEXT: andq $16, %rdx
510; SSE-NEXT: shlq $55, %rdx
511; SSE-NEXT: orq %rdx, %rax
512; SSE-NEXT: movq %rdi, %rdx
513; SSE-NEXT: andq $32, %rdx
514; SSE-NEXT: shlq $53, %rdx
515; SSE-NEXT: orq %rdx, %rax
516; SSE-NEXT: movq %rdi, %rdx
517; SSE-NEXT: andq $64, %rdx
518; SSE-NEXT: shlq $51, %rdx
519; SSE-NEXT: movq %rdi, %rsi
520; SSE-NEXT: andq $128, %rsi
521; SSE-NEXT: shlq $49, %rsi
522; SSE-NEXT: orq %rdx, %rsi
523; SSE-NEXT: movq %rdi, %rdx
524; SSE-NEXT: andq $256, %rdx # imm = 0x100
525; SSE-NEXT: shlq $47, %rdx
526; SSE-NEXT: orq %rsi, %rdx
527; SSE-NEXT: movq %rdi, %rsi
528; SSE-NEXT: andq $512, %rsi # imm = 0x200
529; SSE-NEXT: shlq $45, %rsi
530; SSE-NEXT: orq %rdx, %rsi
531; SSE-NEXT: movq %rdi, %rdx
532; SSE-NEXT: andq $1024, %rdx # imm = 0x400
533; SSE-NEXT: shlq $43, %rdx
534; SSE-NEXT: orq %rsi, %rdx
535; SSE-NEXT: movq %rdi, %rsi
536; SSE-NEXT: andq $2048, %rsi # imm = 0x800
537; SSE-NEXT: shlq $41, %rsi
538; SSE-NEXT: orq %rdx, %rsi
539; SSE-NEXT: movq %rdi, %rdx
540; SSE-NEXT: andq $4096, %rdx # imm = 0x1000
541; SSE-NEXT: shlq $39, %rdx
542; SSE-NEXT: orq %rsi, %rdx
543; SSE-NEXT: movq %rdi, %rsi
544; SSE-NEXT: andq $8192, %rsi # imm = 0x2000
545; SSE-NEXT: shlq $37, %rsi
546; SSE-NEXT: orq %rdx, %rsi
547; SSE-NEXT: movq %rdi, %rdx
548; SSE-NEXT: andq $16384, %rdx # imm = 0x4000
549; SSE-NEXT: shlq $35, %rdx
550; SSE-NEXT: orq %rsi, %rdx
551; SSE-NEXT: movq %rdi, %rsi
552; SSE-NEXT: andq $32768, %rsi # imm = 0x8000
553; SSE-NEXT: shlq $33, %rsi
554; SSE-NEXT: orq %rdx, %rsi
555; SSE-NEXT: movq %rdi, %rdx
556; SSE-NEXT: andq $65536, %rdx # imm = 0x10000
557; SSE-NEXT: shlq $31, %rdx
558; SSE-NEXT: orq %rsi, %rdx
559; SSE-NEXT: movq %rdi, %rsi
560; SSE-NEXT: andq $131072, %rsi # imm = 0x20000
561; SSE-NEXT: shlq $29, %rsi
562; SSE-NEXT: orq %rdx, %rsi
563; SSE-NEXT: movq %rdi, %rdx
564; SSE-NEXT: andq $262144, %rdx # imm = 0x40000
565; SSE-NEXT: shlq $27, %rdx
566; SSE-NEXT: orq %rsi, %rdx
567; SSE-NEXT: movq %rdi, %rsi
568; SSE-NEXT: andq $524288, %rsi # imm = 0x80000
569; SSE-NEXT: shlq $25, %rsi
570; SSE-NEXT: orq %rdx, %rsi
571; SSE-NEXT: movq %rdi, %rdx
572; SSE-NEXT: andq $1048576, %rdx # imm = 0x100000
573; SSE-NEXT: shlq $23, %rdx
574; SSE-NEXT: orq %rsi, %rdx
575; SSE-NEXT: movq %rdi, %rsi
576; SSE-NEXT: andq $2097152, %rsi # imm = 0x200000
577; SSE-NEXT: shlq $21, %rsi
578; SSE-NEXT: orq %rdx, %rsi
579; SSE-NEXT: movq %rdi, %rdx
580; SSE-NEXT: andq $4194304, %rdx # imm = 0x400000
581; SSE-NEXT: shlq $19, %rdx
582; SSE-NEXT: orq %rsi, %rdx
583; SSE-NEXT: movq %rdi, %rsi
584; SSE-NEXT: andq $8388608, %rsi # imm = 0x800000
585; SSE-NEXT: shlq $17, %rsi
586; SSE-NEXT: orq %rdx, %rsi
587; SSE-NEXT: movq %rdi, %rdx
588; SSE-NEXT: andq $16777216, %rdx # imm = 0x1000000
589; SSE-NEXT: shlq $15, %rdx
590; SSE-NEXT: orq %rsi, %rdx
591; SSE-NEXT: movq %rdi, %rsi
592; SSE-NEXT: andq $33554432, %rsi # imm = 0x2000000
593; SSE-NEXT: shlq $13, %rsi
594; SSE-NEXT: orq %rdx, %rsi
595; SSE-NEXT: movq %rdi, %rdx
596; SSE-NEXT: andq $67108864, %rdx # imm = 0x4000000
597; SSE-NEXT: shlq $11, %rdx
598; SSE-NEXT: orq %rsi, %rdx
599; SSE-NEXT: movq %rdi, %rsi
600; SSE-NEXT: andq $134217728, %rsi # imm = 0x8000000
601; SSE-NEXT: shlq $9, %rsi
602; SSE-NEXT: orq %rdx, %rsi
603; SSE-NEXT: movq %rdi, %rdx
604; SSE-NEXT: andq $268435456, %rdx # imm = 0x10000000
605; SSE-NEXT: shlq $7, %rdx
606; SSE-NEXT: orq %rsi, %rdx
607; SSE-NEXT: movq %rdi, %rsi
608; SSE-NEXT: andq $536870912, %rsi # imm = 0x20000000
609; SSE-NEXT: shlq $5, %rsi
610; SSE-NEXT: orq %rdx, %rsi
611; SSE-NEXT: movq %rdi, %rdx
612; SSE-NEXT: andq $1073741824, %rdx # imm = 0x40000000
613; SSE-NEXT: shlq $3, %rdx
614; SSE-NEXT: orq %rsi, %rdx
615; SSE-NEXT: orq %rcx, %rdx
616; SSE-NEXT: movq %rdi, %rcx
617; SSE-NEXT: shrq %rcx
618; SSE-NEXT: andl $-2147483648, %ecx # imm = 0xFFFFFFFF80000000
619; SSE-NEXT: orq %rdx, %rcx
620; SSE-NEXT: movq %rdi, %rdx
621; SSE-NEXT: shrq $3, %rdx
622; SSE-NEXT: andl $1073741824, %edx # imm = 0x40000000
623; SSE-NEXT: orq %rcx, %rdx
624; SSE-NEXT: movq %rdi, %rcx
625; SSE-NEXT: shrq $5, %rcx
626; SSE-NEXT: andl $536870912, %ecx # imm = 0x20000000
627; SSE-NEXT: orq %rdx, %rcx
628; SSE-NEXT: movq %rdi, %rdx
629; SSE-NEXT: shrq $7, %rdx
630; SSE-NEXT: andl $268435456, %edx # imm = 0x10000000
631; SSE-NEXT: orq %rcx, %rdx
632; SSE-NEXT: movq %rdi, %rcx
633; SSE-NEXT: shrq $9, %rcx
634; SSE-NEXT: andl $134217728, %ecx # imm = 0x8000000
635; SSE-NEXT: orq %rdx, %rcx
636; SSE-NEXT: movq %rdi, %rdx
637; SSE-NEXT: shrq $11, %rdx
638; SSE-NEXT: andl $67108864, %edx # imm = 0x4000000
639; SSE-NEXT: orq %rcx, %rdx
640; SSE-NEXT: movq %rdi, %rcx
641; SSE-NEXT: shrq $13, %rcx
642; SSE-NEXT: andl $33554432, %ecx # imm = 0x2000000
643; SSE-NEXT: orq %rdx, %rcx
644; SSE-NEXT: movq %rdi, %rdx
645; SSE-NEXT: shrq $15, %rdx
646; SSE-NEXT: andl $16777216, %edx # imm = 0x1000000
647; SSE-NEXT: orq %rcx, %rdx
648; SSE-NEXT: movq %rdi, %rcx
649; SSE-NEXT: shrq $17, %rcx
650; SSE-NEXT: andl $8388608, %ecx # imm = 0x800000
651; SSE-NEXT: orq %rdx, %rcx
652; SSE-NEXT: movq %rdi, %rdx
653; SSE-NEXT: shrq $19, %rdx
654; SSE-NEXT: andl $4194304, %edx # imm = 0x400000
655; SSE-NEXT: orq %rcx, %rdx
656; SSE-NEXT: movq %rdi, %rcx
657; SSE-NEXT: shrq $21, %rcx
658; SSE-NEXT: andl $2097152, %ecx # imm = 0x200000
659; SSE-NEXT: orq %rdx, %rcx
660; SSE-NEXT: movq %rdi, %rdx
661; SSE-NEXT: shrq $23, %rdx
662; SSE-NEXT: andl $1048576, %edx # imm = 0x100000
663; SSE-NEXT: orq %rcx, %rdx
664; SSE-NEXT: movq %rdi, %rcx
665; SSE-NEXT: shrq $25, %rcx
666; SSE-NEXT: andl $524288, %ecx # imm = 0x80000
667; SSE-NEXT: orq %rdx, %rcx
668; SSE-NEXT: movq %rdi, %rdx
669; SSE-NEXT: shrq $27, %rdx
670; SSE-NEXT: andl $262144, %edx # imm = 0x40000
671; SSE-NEXT: orq %rcx, %rdx
672; SSE-NEXT: movq %rdi, %rcx
673; SSE-NEXT: shrq $29, %rcx
674; SSE-NEXT: andl $131072, %ecx # imm = 0x20000
675; SSE-NEXT: orq %rdx, %rcx
676; SSE-NEXT: movq %rdi, %rdx
677; SSE-NEXT: shrq $31, %rdx
678; SSE-NEXT: andl $65536, %edx # imm = 0x10000
679; SSE-NEXT: orq %rcx, %rdx
680; SSE-NEXT: movq %rdi, %rcx
681; SSE-NEXT: shrq $33, %rcx
682; SSE-NEXT: andl $32768, %ecx # imm = 0x8000
683; SSE-NEXT: orq %rdx, %rcx
684; SSE-NEXT: movq %rdi, %rdx
685; SSE-NEXT: shrq $35, %rdx
686; SSE-NEXT: andl $16384, %edx # imm = 0x4000
687; SSE-NEXT: orq %rcx, %rdx
688; SSE-NEXT: movq %rdi, %rcx
689; SSE-NEXT: shrq $37, %rcx
690; SSE-NEXT: andl $8192, %ecx # imm = 0x2000
691; SSE-NEXT: orq %rdx, %rcx
692; SSE-NEXT: movq %rdi, %rdx
693; SSE-NEXT: shrq $39, %rdx
694; SSE-NEXT: andl $4096, %edx # imm = 0x1000
695; SSE-NEXT: orq %rcx, %rdx
696; SSE-NEXT: movq %rdi, %rcx
697; SSE-NEXT: shrq $41, %rcx
698; SSE-NEXT: andl $2048, %ecx # imm = 0x800
699; SSE-NEXT: orq %rdx, %rcx
700; SSE-NEXT: movq %rdi, %rdx
701; SSE-NEXT: shrq $43, %rdx
702; SSE-NEXT: andl $1024, %edx # imm = 0x400
703; SSE-NEXT: orq %rcx, %rdx
704; SSE-NEXT: movq %rdi, %rcx
705; SSE-NEXT: shrq $45, %rcx
706; SSE-NEXT: andl $512, %ecx # imm = 0x200
707; SSE-NEXT: orq %rdx, %rcx
708; SSE-NEXT: movq %rdi, %rdx
709; SSE-NEXT: shrq $47, %rdx
710; SSE-NEXT: andl $256, %edx # imm = 0x100
711; SSE-NEXT: orq %rcx, %rdx
712; SSE-NEXT: movq %rdi, %rcx
713; SSE-NEXT: shrq $49, %rcx
714; SSE-NEXT: andl $128, %ecx
715; SSE-NEXT: orq %rdx, %rcx
716; SSE-NEXT: movq %rdi, %rdx
717; SSE-NEXT: shrq $51, %rdx
718; SSE-NEXT: andl $64, %edx
719; SSE-NEXT: orq %rcx, %rdx
720; SSE-NEXT: movq %rdi, %rcx
721; SSE-NEXT: shrq $53, %rcx
722; SSE-NEXT: andl $32, %ecx
723; SSE-NEXT: orq %rdx, %rcx
724; SSE-NEXT: movq %rdi, %rdx
725; SSE-NEXT: shrq $55, %rdx
726; SSE-NEXT: andl $16, %edx
727; SSE-NEXT: orq %rcx, %rdx
728; SSE-NEXT: movq %rdi, %rcx
729; SSE-NEXT: shrq $57, %rcx
730; SSE-NEXT: andl $8, %ecx
731; SSE-NEXT: orq %rdx, %rcx
732; SSE-NEXT: movq %rdi, %rdx
733; SSE-NEXT: shrq $59, %rdx
734; SSE-NEXT: andl $4, %edx
735; SSE-NEXT: orq %rcx, %rdx
736; SSE-NEXT: movq %rdi, %rcx
737; SSE-NEXT: shrq $61, %rcx
738; SSE-NEXT: andl $2, %ecx
739; SSE-NEXT: orq %rdx, %rcx
740; SSE-NEXT: shrq $63, %rdi
741; SSE-NEXT: orq %rcx, %rdi
742; SSE-NEXT: orq %rdi, %rax
743; SSE-NEXT: retq
744;
745; AVX-LABEL: test_bitreverse_i64:
746; AVX: # BB#0:
747; AVX-NEXT: leaq (%rdi,%rdi), %rax
748; AVX-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000
749; AVX-NEXT: andq %rax, %rcx
750; AVX-NEXT: movq %rdi, %rax
751; AVX-NEXT: shlq $63, %rax
752; AVX-NEXT: movq %rdi, %rdx
753; AVX-NEXT: andq $2, %rdx
754; AVX-NEXT: shlq $61, %rdx
755; AVX-NEXT: leaq (%rdx,%rax), %rax
756; AVX-NEXT: movq %rdi, %rdx
757; AVX-NEXT: andq $4, %rdx
758; AVX-NEXT: shlq $59, %rdx
759; AVX-NEXT: orq %rdx, %rax
760; AVX-NEXT: movq %rdi, %rdx
761; AVX-NEXT: andq $8, %rdx
762; AVX-NEXT: shlq $57, %rdx
763; AVX-NEXT: orq %rdx, %rax
764; AVX-NEXT: movq %rdi, %rdx
765; AVX-NEXT: andq $16, %rdx
766; AVX-NEXT: shlq $55, %rdx
767; AVX-NEXT: orq %rdx, %rax
768; AVX-NEXT: movq %rdi, %rdx
769; AVX-NEXT: andq $32, %rdx
770; AVX-NEXT: shlq $53, %rdx
771; AVX-NEXT: orq %rdx, %rax
772; AVX-NEXT: movq %rdi, %rdx
773; AVX-NEXT: andq $64, %rdx
774; AVX-NEXT: shlq $51, %rdx
775; AVX-NEXT: movq %rdi, %rsi
776; AVX-NEXT: andq $128, %rsi
777; AVX-NEXT: shlq $49, %rsi
778; AVX-NEXT: orq %rdx, %rsi
779; AVX-NEXT: movq %rdi, %rdx
780; AVX-NEXT: andq $256, %rdx # imm = 0x100
781; AVX-NEXT: shlq $47, %rdx
782; AVX-NEXT: orq %rsi, %rdx
783; AVX-NEXT: movq %rdi, %rsi
784; AVX-NEXT: andq $512, %rsi # imm = 0x200
785; AVX-NEXT: shlq $45, %rsi
786; AVX-NEXT: orq %rdx, %rsi
787; AVX-NEXT: movq %rdi, %rdx
788; AVX-NEXT: andq $1024, %rdx # imm = 0x400
789; AVX-NEXT: shlq $43, %rdx
790; AVX-NEXT: orq %rsi, %rdx
791; AVX-NEXT: movq %rdi, %rsi
792; AVX-NEXT: andq $2048, %rsi # imm = 0x800
793; AVX-NEXT: shlq $41, %rsi
794; AVX-NEXT: orq %rdx, %rsi
795; AVX-NEXT: movq %rdi, %rdx
796; AVX-NEXT: andq $4096, %rdx # imm = 0x1000
797; AVX-NEXT: shlq $39, %rdx
798; AVX-NEXT: orq %rsi, %rdx
799; AVX-NEXT: movq %rdi, %rsi
800; AVX-NEXT: andq $8192, %rsi # imm = 0x2000
801; AVX-NEXT: shlq $37, %rsi
802; AVX-NEXT: orq %rdx, %rsi
803; AVX-NEXT: movq %rdi, %rdx
804; AVX-NEXT: andq $16384, %rdx # imm = 0x4000
805; AVX-NEXT: shlq $35, %rdx
806; AVX-NEXT: orq %rsi, %rdx
807; AVX-NEXT: movq %rdi, %rsi
808; AVX-NEXT: andq $32768, %rsi # imm = 0x8000
809; AVX-NEXT: shlq $33, %rsi
810; AVX-NEXT: orq %rdx, %rsi
811; AVX-NEXT: movq %rdi, %rdx
812; AVX-NEXT: andq $65536, %rdx # imm = 0x10000
813; AVX-NEXT: shlq $31, %rdx
814; AVX-NEXT: orq %rsi, %rdx
815; AVX-NEXT: movq %rdi, %rsi
816; AVX-NEXT: andq $131072, %rsi # imm = 0x20000
817; AVX-NEXT: shlq $29, %rsi
818; AVX-NEXT: orq %rdx, %rsi
819; AVX-NEXT: movq %rdi, %rdx
820; AVX-NEXT: andq $262144, %rdx # imm = 0x40000
821; AVX-NEXT: shlq $27, %rdx
822; AVX-NEXT: orq %rsi, %rdx
823; AVX-NEXT: movq %rdi, %rsi
824; AVX-NEXT: andq $524288, %rsi # imm = 0x80000
825; AVX-NEXT: shlq $25, %rsi
826; AVX-NEXT: orq %rdx, %rsi
827; AVX-NEXT: movq %rdi, %rdx
828; AVX-NEXT: andq $1048576, %rdx # imm = 0x100000
829; AVX-NEXT: shlq $23, %rdx
830; AVX-NEXT: orq %rsi, %rdx
831; AVX-NEXT: movq %rdi, %rsi
832; AVX-NEXT: andq $2097152, %rsi # imm = 0x200000
833; AVX-NEXT: shlq $21, %rsi
834; AVX-NEXT: orq %rdx, %rsi
835; AVX-NEXT: movq %rdi, %rdx
836; AVX-NEXT: andq $4194304, %rdx # imm = 0x400000
837; AVX-NEXT: shlq $19, %rdx
838; AVX-NEXT: orq %rsi, %rdx
839; AVX-NEXT: movq %rdi, %rsi
840; AVX-NEXT: andq $8388608, %rsi # imm = 0x800000
841; AVX-NEXT: shlq $17, %rsi
842; AVX-NEXT: orq %rdx, %rsi
843; AVX-NEXT: movq %rdi, %rdx
844; AVX-NEXT: andq $16777216, %rdx # imm = 0x1000000
845; AVX-NEXT: shlq $15, %rdx
846; AVX-NEXT: orq %rsi, %rdx
847; AVX-NEXT: movq %rdi, %rsi
848; AVX-NEXT: andq $33554432, %rsi # imm = 0x2000000
849; AVX-NEXT: shlq $13, %rsi
850; AVX-NEXT: orq %rdx, %rsi
851; AVX-NEXT: movq %rdi, %rdx
852; AVX-NEXT: andq $67108864, %rdx # imm = 0x4000000
853; AVX-NEXT: shlq $11, %rdx
854; AVX-NEXT: orq %rsi, %rdx
855; AVX-NEXT: movq %rdi, %rsi
856; AVX-NEXT: andq $134217728, %rsi # imm = 0x8000000
857; AVX-NEXT: shlq $9, %rsi
858; AVX-NEXT: orq %rdx, %rsi
859; AVX-NEXT: movq %rdi, %rdx
860; AVX-NEXT: andq $268435456, %rdx # imm = 0x10000000
861; AVX-NEXT: shlq $7, %rdx
862; AVX-NEXT: orq %rsi, %rdx
863; AVX-NEXT: movq %rdi, %rsi
864; AVX-NEXT: andq $536870912, %rsi # imm = 0x20000000
865; AVX-NEXT: shlq $5, %rsi
866; AVX-NEXT: orq %rdx, %rsi
867; AVX-NEXT: movq %rdi, %rdx
868; AVX-NEXT: andq $1073741824, %rdx # imm = 0x40000000
869; AVX-NEXT: shlq $3, %rdx
870; AVX-NEXT: orq %rsi, %rdx
871; AVX-NEXT: orq %rcx, %rdx
872; AVX-NEXT: movq %rdi, %rcx
873; AVX-NEXT: shrq %rcx
874; AVX-NEXT: andl $-2147483648, %ecx # imm = 0xFFFFFFFF80000000
875; AVX-NEXT: orq %rdx, %rcx
876; AVX-NEXT: movq %rdi, %rdx
877; AVX-NEXT: shrq $3, %rdx
878; AVX-NEXT: andl $1073741824, %edx # imm = 0x40000000
879; AVX-NEXT: orq %rcx, %rdx
880; AVX-NEXT: movq %rdi, %rcx
881; AVX-NEXT: shrq $5, %rcx
882; AVX-NEXT: andl $536870912, %ecx # imm = 0x20000000
883; AVX-NEXT: orq %rdx, %rcx
884; AVX-NEXT: movq %rdi, %rdx
885; AVX-NEXT: shrq $7, %rdx
886; AVX-NEXT: andl $268435456, %edx # imm = 0x10000000
887; AVX-NEXT: orq %rcx, %rdx
888; AVX-NEXT: movq %rdi, %rcx
889; AVX-NEXT: shrq $9, %rcx
890; AVX-NEXT: andl $134217728, %ecx # imm = 0x8000000
891; AVX-NEXT: orq %rdx, %rcx
892; AVX-NEXT: movq %rdi, %rdx
893; AVX-NEXT: shrq $11, %rdx
894; AVX-NEXT: andl $67108864, %edx # imm = 0x4000000
895; AVX-NEXT: orq %rcx, %rdx
896; AVX-NEXT: movq %rdi, %rcx
897; AVX-NEXT: shrq $13, %rcx
898; AVX-NEXT: andl $33554432, %ecx # imm = 0x2000000
899; AVX-NEXT: orq %rdx, %rcx
900; AVX-NEXT: movq %rdi, %rdx
901; AVX-NEXT: shrq $15, %rdx
902; AVX-NEXT: andl $16777216, %edx # imm = 0x1000000
903; AVX-NEXT: orq %rcx, %rdx
904; AVX-NEXT: movq %rdi, %rcx
905; AVX-NEXT: shrq $17, %rcx
906; AVX-NEXT: andl $8388608, %ecx # imm = 0x800000
907; AVX-NEXT: orq %rdx, %rcx
908; AVX-NEXT: movq %rdi, %rdx
909; AVX-NEXT: shrq $19, %rdx
910; AVX-NEXT: andl $4194304, %edx # imm = 0x400000
911; AVX-NEXT: orq %rcx, %rdx
912; AVX-NEXT: movq %rdi, %rcx
913; AVX-NEXT: shrq $21, %rcx
914; AVX-NEXT: andl $2097152, %ecx # imm = 0x200000
915; AVX-NEXT: orq %rdx, %rcx
916; AVX-NEXT: movq %rdi, %rdx
917; AVX-NEXT: shrq $23, %rdx
918; AVX-NEXT: andl $1048576, %edx # imm = 0x100000
919; AVX-NEXT: orq %rcx, %rdx
920; AVX-NEXT: movq %rdi, %rcx
921; AVX-NEXT: shrq $25, %rcx
922; AVX-NEXT: andl $524288, %ecx # imm = 0x80000
923; AVX-NEXT: orq %rdx, %rcx
924; AVX-NEXT: movq %rdi, %rdx
925; AVX-NEXT: shrq $27, %rdx
926; AVX-NEXT: andl $262144, %edx # imm = 0x40000
927; AVX-NEXT: orq %rcx, %rdx
928; AVX-NEXT: movq %rdi, %rcx
929; AVX-NEXT: shrq $29, %rcx
930; AVX-NEXT: andl $131072, %ecx # imm = 0x20000
931; AVX-NEXT: orq %rdx, %rcx
932; AVX-NEXT: movq %rdi, %rdx
933; AVX-NEXT: shrq $31, %rdx
934; AVX-NEXT: andl $65536, %edx # imm = 0x10000
935; AVX-NEXT: orq %rcx, %rdx
936; AVX-NEXT: movq %rdi, %rcx
937; AVX-NEXT: shrq $33, %rcx
938; AVX-NEXT: andl $32768, %ecx # imm = 0x8000
939; AVX-NEXT: orq %rdx, %rcx
940; AVX-NEXT: movq %rdi, %rdx
941; AVX-NEXT: shrq $35, %rdx
942; AVX-NEXT: andl $16384, %edx # imm = 0x4000
943; AVX-NEXT: orq %rcx, %rdx
944; AVX-NEXT: movq %rdi, %rcx
945; AVX-NEXT: shrq $37, %rcx
946; AVX-NEXT: andl $8192, %ecx # imm = 0x2000
947; AVX-NEXT: orq %rdx, %rcx
948; AVX-NEXT: movq %rdi, %rdx
949; AVX-NEXT: shrq $39, %rdx
950; AVX-NEXT: andl $4096, %edx # imm = 0x1000
951; AVX-NEXT: orq %rcx, %rdx
952; AVX-NEXT: movq %rdi, %rcx
953; AVX-NEXT: shrq $41, %rcx
954; AVX-NEXT: andl $2048, %ecx # imm = 0x800
955; AVX-NEXT: orq %rdx, %rcx
956; AVX-NEXT: movq %rdi, %rdx
957; AVX-NEXT: shrq $43, %rdx
958; AVX-NEXT: andl $1024, %edx # imm = 0x400
959; AVX-NEXT: orq %rcx, %rdx
960; AVX-NEXT: movq %rdi, %rcx
961; AVX-NEXT: shrq $45, %rcx
962; AVX-NEXT: andl $512, %ecx # imm = 0x200
963; AVX-NEXT: orq %rdx, %rcx
964; AVX-NEXT: movq %rdi, %rdx
965; AVX-NEXT: shrq $47, %rdx
966; AVX-NEXT: andl $256, %edx # imm = 0x100
967; AVX-NEXT: orq %rcx, %rdx
968; AVX-NEXT: movq %rdi, %rcx
969; AVX-NEXT: shrq $49, %rcx
970; AVX-NEXT: andl $128, %ecx
971; AVX-NEXT: orq %rdx, %rcx
972; AVX-NEXT: movq %rdi, %rdx
973; AVX-NEXT: shrq $51, %rdx
974; AVX-NEXT: andl $64, %edx
975; AVX-NEXT: orq %rcx, %rdx
976; AVX-NEXT: movq %rdi, %rcx
977; AVX-NEXT: shrq $53, %rcx
978; AVX-NEXT: andl $32, %ecx
979; AVX-NEXT: orq %rdx, %rcx
980; AVX-NEXT: movq %rdi, %rdx
981; AVX-NEXT: shrq $55, %rdx
982; AVX-NEXT: andl $16, %edx
983; AVX-NEXT: orq %rcx, %rdx
984; AVX-NEXT: movq %rdi, %rcx
985; AVX-NEXT: shrq $57, %rcx
986; AVX-NEXT: andl $8, %ecx
987; AVX-NEXT: orq %rdx, %rcx
988; AVX-NEXT: movq %rdi, %rdx
989; AVX-NEXT: shrq $59, %rdx
990; AVX-NEXT: andl $4, %edx
991; AVX-NEXT: orq %rcx, %rdx
992; AVX-NEXT: movq %rdi, %rcx
993; AVX-NEXT: shrq $61, %rcx
994; AVX-NEXT: andl $2, %ecx
995; AVX-NEXT: orq %rdx, %rcx
996; AVX-NEXT: shrq $63, %rdi
997; AVX-NEXT: orq %rcx, %rdi
998; AVX-NEXT: orq %rdi, %rax
999; AVX-NEXT: retq
1000;
1001; XOP-LABEL: test_bitreverse_i64:
1002; XOP: # BB#0:
1003; XOP-NEXT: vmovq %rdi, %xmm0
1004; XOP-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0
1005; XOP-NEXT: vmovq %xmm0, %rax
1006; XOP-NEXT: retq
Simon Pilgrimb87ffe82016-03-30 14:14:00 +00001007 %b = call i64 @llvm.bitreverse.i64(i64 %a)
1008 ret i64 %b
1009}
1010
Simon Pilgrim2d0104c2016-04-24 15:45:06 +00001011define <16 x i8> @test_bitreverse_v16i8(<16 x i8> %a) nounwind {
1012; SSE-LABEL: test_bitreverse_v16i8:
1013; SSE: # BB#0:
1014; SSE-NEXT: pushq %rbp
1015; SSE-NEXT: pushq %r15
1016; SSE-NEXT: pushq %r14
1017; SSE-NEXT: pushq %rbx
1018; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1019; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %cl
1020; SSE-NEXT: movb %cl, %bl
1021; SSE-NEXT: shlb $7, %bl
1022; SSE-NEXT: movb %cl, %dl
1023; SSE-NEXT: shlb $5, %dl
1024; SSE-NEXT: andb $64, %dl
1025; SSE-NEXT: movb %cl, %al
1026; SSE-NEXT: shlb $3, %al
1027; SSE-NEXT: andb $32, %al
1028; SSE-NEXT: orb %dl, %al
1029; SSE-NEXT: movb %cl, %dl
1030; SSE-NEXT: addb %dl, %dl
1031; SSE-NEXT: andb $16, %dl
1032; SSE-NEXT: orb %al, %dl
1033; SSE-NEXT: movb %cl, %al
1034; SSE-NEXT: shrb %al
1035; SSE-NEXT: andb $8, %al
1036; SSE-NEXT: orb %dl, %al
1037; SSE-NEXT: movb %cl, %dl
1038; SSE-NEXT: shrb $3, %dl
1039; SSE-NEXT: andb $4, %dl
1040; SSE-NEXT: orb %al, %dl
1041; SSE-NEXT: movb %cl, %al
1042; SSE-NEXT: shrb $5, %al
1043; SSE-NEXT: andb $2, %al
1044; SSE-NEXT: orb %dl, %al
1045; SSE-NEXT: shrb $7, %cl
1046; SSE-NEXT: orb %al, %cl
1047; SSE-NEXT: orb %bl, %cl
1048; SSE-NEXT: movzbl %cl, %eax
1049; SSE-NEXT: movd %eax, %xmm0
1050; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %r10b
1051; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %r14b
1052; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %dil
1053; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %r11b
1054; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %r9b
1055; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %bpl
1056; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %sil
1057; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %dl
1058; SSE-NEXT: movb %dl, %r8b
1059; SSE-NEXT: shlb $7, %r8b
1060; SSE-NEXT: movb %dl, %bl
1061; SSE-NEXT: shlb $5, %bl
1062; SSE-NEXT: andb $64, %bl
1063; SSE-NEXT: movb %dl, %al
1064; SSE-NEXT: shlb $3, %al
1065; SSE-NEXT: andb $32, %al
1066; SSE-NEXT: orb %bl, %al
1067; SSE-NEXT: movb %dl, %cl
1068; SSE-NEXT: addb %cl, %cl
1069; SSE-NEXT: andb $16, %cl
1070; SSE-NEXT: orb %al, %cl
1071; SSE-NEXT: movb %dl, %al
1072; SSE-NEXT: shrb %al
1073; SSE-NEXT: andb $8, %al
1074; SSE-NEXT: orb %cl, %al
1075; SSE-NEXT: movb %dl, %cl
1076; SSE-NEXT: shrb $3, %cl
1077; SSE-NEXT: andb $4, %cl
1078; SSE-NEXT: orb %al, %cl
1079; SSE-NEXT: movb %dl, %al
1080; SSE-NEXT: shrb $5, %al
1081; SSE-NEXT: andb $2, %al
1082; SSE-NEXT: orb %cl, %al
1083; SSE-NEXT: shrb $7, %dl
1084; SSE-NEXT: orb %al, %dl
1085; SSE-NEXT: orb %r8b, %dl
1086; SSE-NEXT: movzbl %dl, %eax
1087; SSE-NEXT: movd %eax, %xmm1
1088; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1089; SSE-NEXT: movb %r11b, %al
1090; SSE-NEXT: shlb $7, %al
1091; SSE-NEXT: movb %r11b, %cl
1092; SSE-NEXT: shlb $5, %cl
1093; SSE-NEXT: andb $64, %cl
1094; SSE-NEXT: movb %r11b, %dl
1095; SSE-NEXT: shlb $3, %dl
1096; SSE-NEXT: andb $32, %dl
1097; SSE-NEXT: orb %cl, %dl
1098; SSE-NEXT: movb %r11b, %cl
1099; SSE-NEXT: addb %cl, %cl
1100; SSE-NEXT: andb $16, %cl
1101; SSE-NEXT: orb %dl, %cl
1102; SSE-NEXT: movb %r11b, %dl
1103; SSE-NEXT: shrb %dl
1104; SSE-NEXT: andb $8, %dl
1105; SSE-NEXT: orb %cl, %dl
1106; SSE-NEXT: movb %r11b, %cl
1107; SSE-NEXT: shrb $3, %cl
1108; SSE-NEXT: andb $4, %cl
1109; SSE-NEXT: orb %dl, %cl
1110; SSE-NEXT: movb %r11b, %dl
1111; SSE-NEXT: shrb $5, %dl
1112; SSE-NEXT: andb $2, %dl
1113; SSE-NEXT: orb %cl, %dl
1114; SSE-NEXT: shrb $7, %r11b
1115; SSE-NEXT: orb %dl, %r11b
1116; SSE-NEXT: orb %al, %r11b
1117; SSE-NEXT: movzbl %r11b, %eax
1118; SSE-NEXT: movd %eax, %xmm2
1119; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %r11b
1120; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %dl
1121; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %r8b
1122; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %bl
1123; SSE-NEXT: movb %bl, %r15b
1124; SSE-NEXT: shlb $7, %r15b
1125; SSE-NEXT: movb %bl, %cl
1126; SSE-NEXT: shlb $5, %cl
1127; SSE-NEXT: andb $64, %cl
1128; SSE-NEXT: movb %bl, %al
1129; SSE-NEXT: shlb $3, %al
1130; SSE-NEXT: andb $32, %al
1131; SSE-NEXT: orb %cl, %al
1132; SSE-NEXT: movb %bl, %cl
1133; SSE-NEXT: addb %cl, %cl
1134; SSE-NEXT: andb $16, %cl
1135; SSE-NEXT: orb %al, %cl
1136; SSE-NEXT: movb %bl, %al
1137; SSE-NEXT: shrb %al
1138; SSE-NEXT: andb $8, %al
1139; SSE-NEXT: orb %cl, %al
1140; SSE-NEXT: movb %bl, %cl
1141; SSE-NEXT: shrb $3, %cl
1142; SSE-NEXT: andb $4, %cl
1143; SSE-NEXT: orb %al, %cl
1144; SSE-NEXT: movb %bl, %al
1145; SSE-NEXT: shrb $5, %al
1146; SSE-NEXT: andb $2, %al
1147; SSE-NEXT: orb %cl, %al
1148; SSE-NEXT: shrb $7, %bl
1149; SSE-NEXT: orb %al, %bl
1150; SSE-NEXT: orb %r15b, %bl
1151; SSE-NEXT: movzbl %bl, %eax
1152; SSE-NEXT: movd %eax, %xmm0
1153; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1154; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1155; SSE-NEXT: movb %r14b, %al
1156; SSE-NEXT: shlb $7, %al
1157; SSE-NEXT: movb %r14b, %cl
1158; SSE-NEXT: shlb $5, %cl
1159; SSE-NEXT: andb $64, %cl
1160; SSE-NEXT: movb %r14b, %bl
1161; SSE-NEXT: shlb $3, %bl
1162; SSE-NEXT: andb $32, %bl
1163; SSE-NEXT: orb %cl, %bl
1164; SSE-NEXT: movb %r14b, %cl
1165; SSE-NEXT: addb %cl, %cl
1166; SSE-NEXT: andb $16, %cl
1167; SSE-NEXT: orb %bl, %cl
1168; SSE-NEXT: movb %r14b, %bl
1169; SSE-NEXT: shrb %bl
1170; SSE-NEXT: andb $8, %bl
1171; SSE-NEXT: orb %cl, %bl
1172; SSE-NEXT: movb %r14b, %cl
1173; SSE-NEXT: shrb $3, %cl
1174; SSE-NEXT: andb $4, %cl
1175; SSE-NEXT: orb %bl, %cl
1176; SSE-NEXT: movb %r14b, %bl
1177; SSE-NEXT: shrb $5, %bl
1178; SSE-NEXT: andb $2, %bl
1179; SSE-NEXT: orb %cl, %bl
1180; SSE-NEXT: shrb $7, %r14b
1181; SSE-NEXT: orb %bl, %r14b
1182; SSE-NEXT: orb %al, %r14b
1183; SSE-NEXT: movzbl %r14b, %eax
1184; SSE-NEXT: movd %eax, %xmm1
1185; SSE-NEXT: movb %dl, %al
1186; SSE-NEXT: shlb $7, %al
1187; SSE-NEXT: movb %dl, %cl
1188; SSE-NEXT: shlb $5, %cl
1189; SSE-NEXT: andb $64, %cl
1190; SSE-NEXT: movb %dl, %bl
1191; SSE-NEXT: shlb $3, %bl
1192; SSE-NEXT: andb $32, %bl
1193; SSE-NEXT: orb %cl, %bl
1194; SSE-NEXT: movb %dl, %cl
1195; SSE-NEXT: addb %cl, %cl
1196; SSE-NEXT: andb $16, %cl
1197; SSE-NEXT: orb %bl, %cl
1198; SSE-NEXT: movb %dl, %bl
1199; SSE-NEXT: shrb %bl
1200; SSE-NEXT: andb $8, %bl
1201; SSE-NEXT: orb %cl, %bl
1202; SSE-NEXT: movb %dl, %cl
1203; SSE-NEXT: shrb $3, %cl
1204; SSE-NEXT: andb $4, %cl
1205; SSE-NEXT: orb %bl, %cl
1206; SSE-NEXT: movb %dl, %bl
1207; SSE-NEXT: shrb $5, %bl
1208; SSE-NEXT: andb $2, %bl
1209; SSE-NEXT: orb %cl, %bl
1210; SSE-NEXT: shrb $7, %dl
1211; SSE-NEXT: orb %bl, %dl
1212; SSE-NEXT: orb %al, %dl
1213; SSE-NEXT: movzbl %dl, %eax
1214; SSE-NEXT: movd %eax, %xmm2
1215; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
1216; SSE-NEXT: movb %bpl, %al
1217; SSE-NEXT: shlb $7, %al
1218; SSE-NEXT: movb %bpl, %cl
1219; SSE-NEXT: shlb $5, %cl
1220; SSE-NEXT: andb $64, %cl
1221; SSE-NEXT: movb %bpl, %dl
1222; SSE-NEXT: shlb $3, %dl
1223; SSE-NEXT: andb $32, %dl
1224; SSE-NEXT: orb %cl, %dl
1225; SSE-NEXT: movb %bpl, %cl
1226; SSE-NEXT: addb %cl, %cl
1227; SSE-NEXT: andb $16, %cl
1228; SSE-NEXT: orb %dl, %cl
1229; SSE-NEXT: movb %bpl, %dl
1230; SSE-NEXT: shrb %dl
1231; SSE-NEXT: andb $8, %dl
1232; SSE-NEXT: orb %cl, %dl
1233; SSE-NEXT: movb %bpl, %cl
1234; SSE-NEXT: shrb $3, %cl
1235; SSE-NEXT: andb $4, %cl
1236; SSE-NEXT: orb %dl, %cl
1237; SSE-NEXT: movb %bpl, %dl
1238; SSE-NEXT: shrb $5, %dl
1239; SSE-NEXT: andb $2, %dl
1240; SSE-NEXT: orb %cl, %dl
1241; SSE-NEXT: shrb $7, %bpl
1242; SSE-NEXT: orb %dl, %bpl
1243; SSE-NEXT: orb %al, %bpl
1244; SSE-NEXT: movzbl %bpl, %eax
1245; SSE-NEXT: movd %eax, %xmm3
1246; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %bpl
1247; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %dl
1248; SSE-NEXT: movb %dl, %al
1249; SSE-NEXT: shlb $7, %al
1250; SSE-NEXT: movb %dl, %bl
1251; SSE-NEXT: shlb $5, %bl
1252; SSE-NEXT: andb $64, %bl
1253; SSE-NEXT: movb %dl, %cl
1254; SSE-NEXT: shlb $3, %cl
1255; SSE-NEXT: andb $32, %cl
1256; SSE-NEXT: orb %bl, %cl
1257; SSE-NEXT: movb %dl, %bl
1258; SSE-NEXT: addb %bl, %bl
1259; SSE-NEXT: andb $16, %bl
1260; SSE-NEXT: orb %cl, %bl
1261; SSE-NEXT: movb %dl, %cl
1262; SSE-NEXT: shrb %cl
1263; SSE-NEXT: andb $8, %cl
1264; SSE-NEXT: orb %bl, %cl
1265; SSE-NEXT: movb %dl, %bl
1266; SSE-NEXT: shrb $3, %bl
1267; SSE-NEXT: andb $4, %bl
1268; SSE-NEXT: orb %cl, %bl
1269; SSE-NEXT: movb %dl, %cl
1270; SSE-NEXT: shrb $5, %cl
1271; SSE-NEXT: andb $2, %cl
1272; SSE-NEXT: orb %bl, %cl
1273; SSE-NEXT: shrb $7, %dl
1274; SSE-NEXT: orb %cl, %dl
1275; SSE-NEXT: orb %al, %dl
1276; SSE-NEXT: movzbl %dl, %eax
1277; SSE-NEXT: movd %eax, %xmm1
1278; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
1279; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1280; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1281; SSE-NEXT: movb %r10b, %cl
1282; SSE-NEXT: shlb $7, %cl
1283; SSE-NEXT: movb %r10b, %al
1284; SSE-NEXT: shlb $5, %al
1285; SSE-NEXT: andb $64, %al
1286; SSE-NEXT: movb %r10b, %dl
1287; SSE-NEXT: shlb $3, %dl
1288; SSE-NEXT: andb $32, %dl
1289; SSE-NEXT: orb %al, %dl
1290; SSE-NEXT: movb %r10b, %al
1291; SSE-NEXT: addb %al, %al
1292; SSE-NEXT: andb $16, %al
1293; SSE-NEXT: orb %dl, %al
1294; SSE-NEXT: movb %r10b, %dl
1295; SSE-NEXT: shrb %dl
1296; SSE-NEXT: andb $8, %dl
1297; SSE-NEXT: orb %al, %dl
1298; SSE-NEXT: movb %r10b, %al
1299; SSE-NEXT: shrb $3, %al
1300; SSE-NEXT: andb $4, %al
1301; SSE-NEXT: orb %dl, %al
1302; SSE-NEXT: movb %r10b, %dl
1303; SSE-NEXT: shrb $5, %dl
1304; SSE-NEXT: andb $2, %dl
1305; SSE-NEXT: orb %al, %dl
1306; SSE-NEXT: shrb $7, %r10b
1307; SSE-NEXT: orb %dl, %r10b
1308; SSE-NEXT: orb %cl, %r10b
1309; SSE-NEXT: movzbl %r10b, %eax
1310; SSE-NEXT: movd %eax, %xmm2
1311; SSE-NEXT: movb %r11b, %cl
1312; SSE-NEXT: shlb $7, %cl
1313; SSE-NEXT: movb %r11b, %al
1314; SSE-NEXT: shlb $5, %al
1315; SSE-NEXT: andb $64, %al
1316; SSE-NEXT: movb %r11b, %dl
1317; SSE-NEXT: shlb $3, %dl
1318; SSE-NEXT: andb $32, %dl
1319; SSE-NEXT: orb %al, %dl
1320; SSE-NEXT: movb %r11b, %al
1321; SSE-NEXT: addb %al, %al
1322; SSE-NEXT: andb $16, %al
1323; SSE-NEXT: orb %dl, %al
1324; SSE-NEXT: movb %r11b, %dl
1325; SSE-NEXT: shrb %dl
1326; SSE-NEXT: andb $8, %dl
1327; SSE-NEXT: orb %al, %dl
1328; SSE-NEXT: movb %r11b, %al
1329; SSE-NEXT: shrb $3, %al
1330; SSE-NEXT: andb $4, %al
1331; SSE-NEXT: orb %dl, %al
1332; SSE-NEXT: movb %r11b, %dl
1333; SSE-NEXT: shrb $5, %dl
1334; SSE-NEXT: andb $2, %dl
1335; SSE-NEXT: orb %al, %dl
1336; SSE-NEXT: shrb $7, %r11b
1337; SSE-NEXT: orb %dl, %r11b
1338; SSE-NEXT: orb %cl, %r11b
1339; SSE-NEXT: movzbl %r11b, %eax
1340; SSE-NEXT: movd %eax, %xmm0
1341; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1342; SSE-NEXT: movb %r9b, %cl
1343; SSE-NEXT: shlb $7, %cl
1344; SSE-NEXT: movb %r9b, %al
1345; SSE-NEXT: shlb $5, %al
1346; SSE-NEXT: andb $64, %al
1347; SSE-NEXT: movb %r9b, %dl
1348; SSE-NEXT: shlb $3, %dl
1349; SSE-NEXT: andb $32, %dl
1350; SSE-NEXT: orb %al, %dl
1351; SSE-NEXT: movb %r9b, %al
1352; SSE-NEXT: addb %al, %al
1353; SSE-NEXT: andb $16, %al
1354; SSE-NEXT: orb %dl, %al
1355; SSE-NEXT: movb %r9b, %dl
1356; SSE-NEXT: shrb %dl
1357; SSE-NEXT: andb $8, %dl
1358; SSE-NEXT: orb %al, %dl
1359; SSE-NEXT: movb %r9b, %al
1360; SSE-NEXT: shrb $3, %al
1361; SSE-NEXT: andb $4, %al
1362; SSE-NEXT: orb %dl, %al
1363; SSE-NEXT: movb %r9b, %dl
1364; SSE-NEXT: shrb $5, %dl
1365; SSE-NEXT: andb $2, %dl
1366; SSE-NEXT: orb %al, %dl
1367; SSE-NEXT: shrb $7, %r9b
1368; SSE-NEXT: orb %dl, %r9b
1369; SSE-NEXT: orb %cl, %r9b
1370; SSE-NEXT: movzbl %r9b, %eax
1371; SSE-NEXT: movd %eax, %xmm3
1372; SSE-NEXT: movb %bpl, %cl
1373; SSE-NEXT: shlb $7, %cl
1374; SSE-NEXT: movb %bpl, %al
1375; SSE-NEXT: shlb $5, %al
1376; SSE-NEXT: andb $64, %al
1377; SSE-NEXT: movb %bpl, %dl
1378; SSE-NEXT: shlb $3, %dl
1379; SSE-NEXT: andb $32, %dl
1380; SSE-NEXT: orb %al, %dl
1381; SSE-NEXT: movb %bpl, %al
1382; SSE-NEXT: addb %al, %al
1383; SSE-NEXT: andb $16, %al
1384; SSE-NEXT: orb %dl, %al
1385; SSE-NEXT: movb %bpl, %dl
1386; SSE-NEXT: shrb %dl
1387; SSE-NEXT: andb $8, %dl
1388; SSE-NEXT: orb %al, %dl
1389; SSE-NEXT: movb %bpl, %al
1390; SSE-NEXT: shrb $3, %al
1391; SSE-NEXT: andb $4, %al
1392; SSE-NEXT: orb %dl, %al
1393; SSE-NEXT: movb %bpl, %dl
1394; SSE-NEXT: shrb $5, %dl
1395; SSE-NEXT: andb $2, %dl
1396; SSE-NEXT: orb %al, %dl
1397; SSE-NEXT: shrb $7, %bpl
1398; SSE-NEXT: orb %dl, %bpl
1399; SSE-NEXT: orb %cl, %bpl
1400; SSE-NEXT: movzbl %bpl, %eax
1401; SSE-NEXT: movd %eax, %xmm2
1402; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
1403; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
1404; SSE-NEXT: movb %dil, %al
1405; SSE-NEXT: shlb $7, %al
1406; SSE-NEXT: movb %dil, %cl
1407; SSE-NEXT: shlb $5, %cl
1408; SSE-NEXT: andb $64, %cl
1409; SSE-NEXT: movb %dil, %dl
1410; SSE-NEXT: shlb $3, %dl
1411; SSE-NEXT: andb $32, %dl
1412; SSE-NEXT: orb %cl, %dl
1413; SSE-NEXT: movb %dil, %cl
1414; SSE-NEXT: addb %cl, %cl
1415; SSE-NEXT: andb $16, %cl
1416; SSE-NEXT: orb %dl, %cl
1417; SSE-NEXT: movb %dil, %dl
1418; SSE-NEXT: shrb %dl
1419; SSE-NEXT: andb $8, %dl
1420; SSE-NEXT: orb %cl, %dl
1421; SSE-NEXT: movb %dil, %cl
1422; SSE-NEXT: shrb $3, %cl
1423; SSE-NEXT: andb $4, %cl
1424; SSE-NEXT: orb %dl, %cl
1425; SSE-NEXT: movb %dil, %dl
1426; SSE-NEXT: shrb $5, %dl
1427; SSE-NEXT: andb $2, %dl
1428; SSE-NEXT: orb %cl, %dl
1429; SSE-NEXT: shrb $7, %dil
1430; SSE-NEXT: orb %dl, %dil
1431; SSE-NEXT: orb %al, %dil
1432; SSE-NEXT: movzbl %dil, %eax
1433; SSE-NEXT: movd %eax, %xmm0
1434; SSE-NEXT: movb %r8b, %al
1435; SSE-NEXT: shlb $7, %al
1436; SSE-NEXT: movb %r8b, %cl
1437; SSE-NEXT: shlb $5, %cl
1438; SSE-NEXT: andb $64, %cl
1439; SSE-NEXT: movb %r8b, %dl
1440; SSE-NEXT: shlb $3, %dl
1441; SSE-NEXT: andb $32, %dl
1442; SSE-NEXT: orb %cl, %dl
1443; SSE-NEXT: movb %r8b, %cl
1444; SSE-NEXT: addb %cl, %cl
1445; SSE-NEXT: andb $16, %cl
1446; SSE-NEXT: orb %dl, %cl
1447; SSE-NEXT: movb %r8b, %dl
1448; SSE-NEXT: shrb %dl
1449; SSE-NEXT: andb $8, %dl
1450; SSE-NEXT: orb %cl, %dl
1451; SSE-NEXT: movb %r8b, %cl
1452; SSE-NEXT: shrb $3, %cl
1453; SSE-NEXT: andb $4, %cl
1454; SSE-NEXT: orb %dl, %cl
1455; SSE-NEXT: movb %r8b, %dl
1456; SSE-NEXT: shrb $5, %dl
1457; SSE-NEXT: andb $2, %dl
1458; SSE-NEXT: orb %cl, %dl
1459; SSE-NEXT: shrb $7, %r8b
1460; SSE-NEXT: orb %dl, %r8b
1461; SSE-NEXT: orb %al, %r8b
1462; SSE-NEXT: movzbl %r8b, %eax
1463; SSE-NEXT: movd %eax, %xmm3
1464; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
1465; SSE-NEXT: movb %sil, %al
1466; SSE-NEXT: shlb $7, %al
1467; SSE-NEXT: movb %sil, %cl
1468; SSE-NEXT: shlb $5, %cl
1469; SSE-NEXT: andb $64, %cl
1470; SSE-NEXT: movb %sil, %dl
1471; SSE-NEXT: shlb $3, %dl
1472; SSE-NEXT: andb $32, %dl
1473; SSE-NEXT: orb %cl, %dl
1474; SSE-NEXT: movb %sil, %cl
1475; SSE-NEXT: addb %cl, %cl
1476; SSE-NEXT: andb $16, %cl
1477; SSE-NEXT: orb %dl, %cl
1478; SSE-NEXT: movb %sil, %dl
1479; SSE-NEXT: shrb %dl
1480; SSE-NEXT: andb $8, %dl
1481; SSE-NEXT: orb %cl, %dl
1482; SSE-NEXT: movb %sil, %cl
1483; SSE-NEXT: shrb $3, %cl
1484; SSE-NEXT: andb $4, %cl
1485; SSE-NEXT: orb %dl, %cl
1486; SSE-NEXT: movb %sil, %dl
1487; SSE-NEXT: shrb $5, %dl
1488; SSE-NEXT: andb $2, %dl
1489; SSE-NEXT: orb %cl, %dl
1490; SSE-NEXT: shrb $7, %sil
1491; SSE-NEXT: orb %dl, %sil
1492; SSE-NEXT: orb %al, %sil
1493; SSE-NEXT: movzbl %sil, %eax
1494; SSE-NEXT: movd %eax, %xmm4
1495; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %cl
1496; SSE-NEXT: movb %cl, %bl
1497; SSE-NEXT: shlb $7, %bl
1498; SSE-NEXT: movb %cl, %dl
1499; SSE-NEXT: shlb $5, %dl
1500; SSE-NEXT: andb $64, %dl
1501; SSE-NEXT: movb %cl, %al
1502; SSE-NEXT: shlb $3, %al
1503; SSE-NEXT: andb $32, %al
1504; SSE-NEXT: orb %dl, %al
1505; SSE-NEXT: movb %cl, %dl
1506; SSE-NEXT: addb %dl, %dl
1507; SSE-NEXT: andb $16, %dl
1508; SSE-NEXT: orb %al, %dl
1509; SSE-NEXT: movb %cl, %al
1510; SSE-NEXT: shrb %al
1511; SSE-NEXT: andb $8, %al
1512; SSE-NEXT: orb %dl, %al
1513; SSE-NEXT: movb %cl, %dl
1514; SSE-NEXT: shrb $3, %dl
1515; SSE-NEXT: andb $4, %dl
1516; SSE-NEXT: orb %al, %dl
1517; SSE-NEXT: movb %cl, %al
1518; SSE-NEXT: shrb $5, %al
1519; SSE-NEXT: andb $2, %al
1520; SSE-NEXT: orb %dl, %al
1521; SSE-NEXT: shrb $7, %cl
1522; SSE-NEXT: orb %al, %cl
1523; SSE-NEXT: orb %bl, %cl
1524; SSE-NEXT: movzbl %cl, %eax
1525; SSE-NEXT: movd %eax, %xmm0
1526; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
1527; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
1528; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1529; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1530; SSE-NEXT: popq %rbx
1531; SSE-NEXT: popq %r14
1532; SSE-NEXT: popq %r15
1533; SSE-NEXT: popq %rbp
1534; SSE-NEXT: retq
1535;
1536; AVX-LABEL: test_bitreverse_v16i8:
1537; AVX: # BB#0:
1538; AVX-NEXT: vpextrb $1, %xmm0, %eax
1539; AVX-NEXT: movb %al, %sil
1540; AVX-NEXT: shlb $7, %sil
1541; AVX-NEXT: movb %al, %dl
1542; AVX-NEXT: shlb $5, %dl
1543; AVX-NEXT: andb $64, %dl
1544; AVX-NEXT: movb %al, %cl
1545; AVX-NEXT: shlb $3, %cl
1546; AVX-NEXT: andb $32, %cl
1547; AVX-NEXT: orb %dl, %cl
1548; AVX-NEXT: movb %al, %dl
1549; AVX-NEXT: addb %dl, %dl
1550; AVX-NEXT: andb $16, %dl
1551; AVX-NEXT: orb %cl, %dl
1552; AVX-NEXT: movb %al, %cl
1553; AVX-NEXT: shrb %cl
1554; AVX-NEXT: andb $8, %cl
1555; AVX-NEXT: orb %dl, %cl
1556; AVX-NEXT: movb %al, %dl
1557; AVX-NEXT: shrb $3, %dl
1558; AVX-NEXT: andb $4, %dl
1559; AVX-NEXT: orb %cl, %dl
1560; AVX-NEXT: movb %al, %cl
1561; AVX-NEXT: shrb $5, %cl
1562; AVX-NEXT: andb $2, %cl
1563; AVX-NEXT: orb %dl, %cl
1564; AVX-NEXT: shrb $7, %al
1565; AVX-NEXT: orb %cl, %al
1566; AVX-NEXT: orb %sil, %al
1567; AVX-NEXT: movzbl %al, %esi
1568; AVX-NEXT: vpextrb $0, %xmm0, %ecx
1569; AVX-NEXT: movb %cl, %dil
1570; AVX-NEXT: shlb $7, %dil
1571; AVX-NEXT: movb %cl, %al
1572; AVX-NEXT: shlb $5, %al
1573; AVX-NEXT: andb $64, %al
1574; AVX-NEXT: movb %cl, %dl
1575; AVX-NEXT: shlb $3, %dl
1576; AVX-NEXT: andb $32, %dl
1577; AVX-NEXT: orb %al, %dl
1578; AVX-NEXT: movb %cl, %al
1579; AVX-NEXT: addb %al, %al
1580; AVX-NEXT: andb $16, %al
1581; AVX-NEXT: orb %dl, %al
1582; AVX-NEXT: movb %cl, %dl
1583; AVX-NEXT: shrb %dl
1584; AVX-NEXT: andb $8, %dl
1585; AVX-NEXT: orb %al, %dl
1586; AVX-NEXT: movb %cl, %al
1587; AVX-NEXT: shrb $3, %al
1588; AVX-NEXT: andb $4, %al
1589; AVX-NEXT: orb %dl, %al
1590; AVX-NEXT: movb %cl, %dl
1591; AVX-NEXT: shrb $5, %dl
1592; AVX-NEXT: andb $2, %dl
1593; AVX-NEXT: orb %al, %dl
1594; AVX-NEXT: shrb $7, %cl
1595; AVX-NEXT: orb %dl, %cl
1596; AVX-NEXT: orb %dil, %cl
1597; AVX-NEXT: movzbl %cl, %eax
1598; AVX-NEXT: vmovd %eax, %xmm1
1599; AVX-NEXT: vpinsrb $1, %esi, %xmm1, %xmm1
1600; AVX-NEXT: vpextrb $2, %xmm0, %eax
1601; AVX-NEXT: movb %al, %sil
1602; AVX-NEXT: shlb $7, %sil
1603; AVX-NEXT: movb %al, %dl
1604; AVX-NEXT: shlb $5, %dl
1605; AVX-NEXT: andb $64, %dl
1606; AVX-NEXT: movb %al, %cl
1607; AVX-NEXT: shlb $3, %cl
1608; AVX-NEXT: andb $32, %cl
1609; AVX-NEXT: orb %dl, %cl
1610; AVX-NEXT: movb %al, %dl
1611; AVX-NEXT: addb %dl, %dl
1612; AVX-NEXT: andb $16, %dl
1613; AVX-NEXT: orb %cl, %dl
1614; AVX-NEXT: movb %al, %cl
1615; AVX-NEXT: shrb %cl
1616; AVX-NEXT: andb $8, %cl
1617; AVX-NEXT: orb %dl, %cl
1618; AVX-NEXT: movb %al, %dl
1619; AVX-NEXT: shrb $3, %dl
1620; AVX-NEXT: andb $4, %dl
1621; AVX-NEXT: orb %cl, %dl
1622; AVX-NEXT: movb %al, %cl
1623; AVX-NEXT: shrb $5, %cl
1624; AVX-NEXT: andb $2, %cl
1625; AVX-NEXT: orb %dl, %cl
1626; AVX-NEXT: shrb $7, %al
1627; AVX-NEXT: orb %cl, %al
1628; AVX-NEXT: orb %sil, %al
1629; AVX-NEXT: movzbl %al, %eax
1630; AVX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
1631; AVX-NEXT: vpextrb $3, %xmm0, %eax
1632; AVX-NEXT: movb %al, %sil
1633; AVX-NEXT: shlb $7, %sil
1634; AVX-NEXT: movb %al, %dl
1635; AVX-NEXT: shlb $5, %dl
1636; AVX-NEXT: andb $64, %dl
1637; AVX-NEXT: movb %al, %cl
1638; AVX-NEXT: shlb $3, %cl
1639; AVX-NEXT: andb $32, %cl
1640; AVX-NEXT: orb %dl, %cl
1641; AVX-NEXT: movb %al, %dl
1642; AVX-NEXT: addb %dl, %dl
1643; AVX-NEXT: andb $16, %dl
1644; AVX-NEXT: orb %cl, %dl
1645; AVX-NEXT: movb %al, %cl
1646; AVX-NEXT: shrb %cl
1647; AVX-NEXT: andb $8, %cl
1648; AVX-NEXT: orb %dl, %cl
1649; AVX-NEXT: movb %al, %dl
1650; AVX-NEXT: shrb $3, %dl
1651; AVX-NEXT: andb $4, %dl
1652; AVX-NEXT: orb %cl, %dl
1653; AVX-NEXT: movb %al, %cl
1654; AVX-NEXT: shrb $5, %cl
1655; AVX-NEXT: andb $2, %cl
1656; AVX-NEXT: orb %dl, %cl
1657; AVX-NEXT: shrb $7, %al
1658; AVX-NEXT: orb %cl, %al
1659; AVX-NEXT: orb %sil, %al
1660; AVX-NEXT: movzbl %al, %eax
1661; AVX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
1662; AVX-NEXT: vpextrb $4, %xmm0, %eax
1663; AVX-NEXT: movb %al, %sil
1664; AVX-NEXT: shlb $7, %sil
1665; AVX-NEXT: movb %al, %dl
1666; AVX-NEXT: shlb $5, %dl
1667; AVX-NEXT: andb $64, %dl
1668; AVX-NEXT: movb %al, %cl
1669; AVX-NEXT: shlb $3, %cl
1670; AVX-NEXT: andb $32, %cl
1671; AVX-NEXT: orb %dl, %cl
1672; AVX-NEXT: movb %al, %dl
1673; AVX-NEXT: addb %dl, %dl
1674; AVX-NEXT: andb $16, %dl
1675; AVX-NEXT: orb %cl, %dl
1676; AVX-NEXT: movb %al, %cl
1677; AVX-NEXT: shrb %cl
1678; AVX-NEXT: andb $8, %cl
1679; AVX-NEXT: orb %dl, %cl
1680; AVX-NEXT: movb %al, %dl
1681; AVX-NEXT: shrb $3, %dl
1682; AVX-NEXT: andb $4, %dl
1683; AVX-NEXT: orb %cl, %dl
1684; AVX-NEXT: movb %al, %cl
1685; AVX-NEXT: shrb $5, %cl
1686; AVX-NEXT: andb $2, %cl
1687; AVX-NEXT: orb %dl, %cl
1688; AVX-NEXT: shrb $7, %al
1689; AVX-NEXT: orb %cl, %al
1690; AVX-NEXT: orb %sil, %al
1691; AVX-NEXT: movzbl %al, %eax
1692; AVX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
1693; AVX-NEXT: vpextrb $5, %xmm0, %eax
1694; AVX-NEXT: movb %al, %sil
1695; AVX-NEXT: shlb $7, %sil
1696; AVX-NEXT: movb %al, %dl
1697; AVX-NEXT: shlb $5, %dl
1698; AVX-NEXT: andb $64, %dl
1699; AVX-NEXT: movb %al, %cl
1700; AVX-NEXT: shlb $3, %cl
1701; AVX-NEXT: andb $32, %cl
1702; AVX-NEXT: orb %dl, %cl
1703; AVX-NEXT: movb %al, %dl
1704; AVX-NEXT: addb %dl, %dl
1705; AVX-NEXT: andb $16, %dl
1706; AVX-NEXT: orb %cl, %dl
1707; AVX-NEXT: movb %al, %cl
1708; AVX-NEXT: shrb %cl
1709; AVX-NEXT: andb $8, %cl
1710; AVX-NEXT: orb %dl, %cl
1711; AVX-NEXT: movb %al, %dl
1712; AVX-NEXT: shrb $3, %dl
1713; AVX-NEXT: andb $4, %dl
1714; AVX-NEXT: orb %cl, %dl
1715; AVX-NEXT: movb %al, %cl
1716; AVX-NEXT: shrb $5, %cl
1717; AVX-NEXT: andb $2, %cl
1718; AVX-NEXT: orb %dl, %cl
1719; AVX-NEXT: shrb $7, %al
1720; AVX-NEXT: orb %cl, %al
1721; AVX-NEXT: orb %sil, %al
1722; AVX-NEXT: movzbl %al, %eax
1723; AVX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
1724; AVX-NEXT: vpextrb $6, %xmm0, %eax
1725; AVX-NEXT: movb %al, %sil
1726; AVX-NEXT: shlb $7, %sil
1727; AVX-NEXT: movb %al, %dl
1728; AVX-NEXT: shlb $5, %dl
1729; AVX-NEXT: andb $64, %dl
1730; AVX-NEXT: movb %al, %cl
1731; AVX-NEXT: shlb $3, %cl
1732; AVX-NEXT: andb $32, %cl
1733; AVX-NEXT: orb %dl, %cl
1734; AVX-NEXT: movb %al, %dl
1735; AVX-NEXT: addb %dl, %dl
1736; AVX-NEXT: andb $16, %dl
1737; AVX-NEXT: orb %cl, %dl
1738; AVX-NEXT: movb %al, %cl
1739; AVX-NEXT: shrb %cl
1740; AVX-NEXT: andb $8, %cl
1741; AVX-NEXT: orb %dl, %cl
1742; AVX-NEXT: movb %al, %dl
1743; AVX-NEXT: shrb $3, %dl
1744; AVX-NEXT: andb $4, %dl
1745; AVX-NEXT: orb %cl, %dl
1746; AVX-NEXT: movb %al, %cl
1747; AVX-NEXT: shrb $5, %cl
1748; AVX-NEXT: andb $2, %cl
1749; AVX-NEXT: orb %dl, %cl
1750; AVX-NEXT: shrb $7, %al
1751; AVX-NEXT: orb %cl, %al
1752; AVX-NEXT: orb %sil, %al
1753; AVX-NEXT: movzbl %al, %eax
1754; AVX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
1755; AVX-NEXT: vpextrb $7, %xmm0, %eax
1756; AVX-NEXT: movb %al, %sil
1757; AVX-NEXT: shlb $7, %sil
1758; AVX-NEXT: movb %al, %dl
1759; AVX-NEXT: shlb $5, %dl
1760; AVX-NEXT: andb $64, %dl
1761; AVX-NEXT: movb %al, %cl
1762; AVX-NEXT: shlb $3, %cl
1763; AVX-NEXT: andb $32, %cl
1764; AVX-NEXT: orb %dl, %cl
1765; AVX-NEXT: movb %al, %dl
1766; AVX-NEXT: addb %dl, %dl
1767; AVX-NEXT: andb $16, %dl
1768; AVX-NEXT: orb %cl, %dl
1769; AVX-NEXT: movb %al, %cl
1770; AVX-NEXT: shrb %cl
1771; AVX-NEXT: andb $8, %cl
1772; AVX-NEXT: orb %dl, %cl
1773; AVX-NEXT: movb %al, %dl
1774; AVX-NEXT: shrb $3, %dl
1775; AVX-NEXT: andb $4, %dl
1776; AVX-NEXT: orb %cl, %dl
1777; AVX-NEXT: movb %al, %cl
1778; AVX-NEXT: shrb $5, %cl
1779; AVX-NEXT: andb $2, %cl
1780; AVX-NEXT: orb %dl, %cl
1781; AVX-NEXT: shrb $7, %al
1782; AVX-NEXT: orb %cl, %al
1783; AVX-NEXT: orb %sil, %al
1784; AVX-NEXT: movzbl %al, %eax
1785; AVX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
1786; AVX-NEXT: vpextrb $8, %xmm0, %eax
1787; AVX-NEXT: movb %al, %sil
1788; AVX-NEXT: shlb $7, %sil
1789; AVX-NEXT: movb %al, %dl
1790; AVX-NEXT: shlb $5, %dl
1791; AVX-NEXT: andb $64, %dl
1792; AVX-NEXT: movb %al, %cl
1793; AVX-NEXT: shlb $3, %cl
1794; AVX-NEXT: andb $32, %cl
1795; AVX-NEXT: orb %dl, %cl
1796; AVX-NEXT: movb %al, %dl
1797; AVX-NEXT: addb %dl, %dl
1798; AVX-NEXT: andb $16, %dl
1799; AVX-NEXT: orb %cl, %dl
1800; AVX-NEXT: movb %al, %cl
1801; AVX-NEXT: shrb %cl
1802; AVX-NEXT: andb $8, %cl
1803; AVX-NEXT: orb %dl, %cl
1804; AVX-NEXT: movb %al, %dl
1805; AVX-NEXT: shrb $3, %dl
1806; AVX-NEXT: andb $4, %dl
1807; AVX-NEXT: orb %cl, %dl
1808; AVX-NEXT: movb %al, %cl
1809; AVX-NEXT: shrb $5, %cl
1810; AVX-NEXT: andb $2, %cl
1811; AVX-NEXT: orb %dl, %cl
1812; AVX-NEXT: shrb $7, %al
1813; AVX-NEXT: orb %cl, %al
1814; AVX-NEXT: orb %sil, %al
1815; AVX-NEXT: movzbl %al, %eax
1816; AVX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
1817; AVX-NEXT: vpextrb $9, %xmm0, %eax
1818; AVX-NEXT: movb %al, %sil
1819; AVX-NEXT: shlb $7, %sil
1820; AVX-NEXT: movb %al, %dl
1821; AVX-NEXT: shlb $5, %dl
1822; AVX-NEXT: andb $64, %dl
1823; AVX-NEXT: movb %al, %cl
1824; AVX-NEXT: shlb $3, %cl
1825; AVX-NEXT: andb $32, %cl
1826; AVX-NEXT: orb %dl, %cl
1827; AVX-NEXT: movb %al, %dl
1828; AVX-NEXT: addb %dl, %dl
1829; AVX-NEXT: andb $16, %dl
1830; AVX-NEXT: orb %cl, %dl
1831; AVX-NEXT: movb %al, %cl
1832; AVX-NEXT: shrb %cl
1833; AVX-NEXT: andb $8, %cl
1834; AVX-NEXT: orb %dl, %cl
1835; AVX-NEXT: movb %al, %dl
1836; AVX-NEXT: shrb $3, %dl
1837; AVX-NEXT: andb $4, %dl
1838; AVX-NEXT: orb %cl, %dl
1839; AVX-NEXT: movb %al, %cl
1840; AVX-NEXT: shrb $5, %cl
1841; AVX-NEXT: andb $2, %cl
1842; AVX-NEXT: orb %dl, %cl
1843; AVX-NEXT: shrb $7, %al
1844; AVX-NEXT: orb %cl, %al
1845; AVX-NEXT: orb %sil, %al
1846; AVX-NEXT: movzbl %al, %eax
1847; AVX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
1848; AVX-NEXT: vpextrb $10, %xmm0, %eax
1849; AVX-NEXT: movb %al, %sil
1850; AVX-NEXT: shlb $7, %sil
1851; AVX-NEXT: movb %al, %dl
1852; AVX-NEXT: shlb $5, %dl
1853; AVX-NEXT: andb $64, %dl
1854; AVX-NEXT: movb %al, %cl
1855; AVX-NEXT: shlb $3, %cl
1856; AVX-NEXT: andb $32, %cl
1857; AVX-NEXT: orb %dl, %cl
1858; AVX-NEXT: movb %al, %dl
1859; AVX-NEXT: addb %dl, %dl
1860; AVX-NEXT: andb $16, %dl
1861; AVX-NEXT: orb %cl, %dl
1862; AVX-NEXT: movb %al, %cl
1863; AVX-NEXT: shrb %cl
1864; AVX-NEXT: andb $8, %cl
1865; AVX-NEXT: orb %dl, %cl
1866; AVX-NEXT: movb %al, %dl
1867; AVX-NEXT: shrb $3, %dl
1868; AVX-NEXT: andb $4, %dl
1869; AVX-NEXT: orb %cl, %dl
1870; AVX-NEXT: movb %al, %cl
1871; AVX-NEXT: shrb $5, %cl
1872; AVX-NEXT: andb $2, %cl
1873; AVX-NEXT: orb %dl, %cl
1874; AVX-NEXT: shrb $7, %al
1875; AVX-NEXT: orb %cl, %al
1876; AVX-NEXT: orb %sil, %al
1877; AVX-NEXT: movzbl %al, %eax
1878; AVX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
1879; AVX-NEXT: vpextrb $11, %xmm0, %eax
1880; AVX-NEXT: movb %al, %sil
1881; AVX-NEXT: shlb $7, %sil
1882; AVX-NEXT: movb %al, %dl
1883; AVX-NEXT: shlb $5, %dl
1884; AVX-NEXT: andb $64, %dl
1885; AVX-NEXT: movb %al, %cl
1886; AVX-NEXT: shlb $3, %cl
1887; AVX-NEXT: andb $32, %cl
1888; AVX-NEXT: orb %dl, %cl
1889; AVX-NEXT: movb %al, %dl
1890; AVX-NEXT: addb %dl, %dl
1891; AVX-NEXT: andb $16, %dl
1892; AVX-NEXT: orb %cl, %dl
1893; AVX-NEXT: movb %al, %cl
1894; AVX-NEXT: shrb %cl
1895; AVX-NEXT: andb $8, %cl
1896; AVX-NEXT: orb %dl, %cl
1897; AVX-NEXT: movb %al, %dl
1898; AVX-NEXT: shrb $3, %dl
1899; AVX-NEXT: andb $4, %dl
1900; AVX-NEXT: orb %cl, %dl
1901; AVX-NEXT: movb %al, %cl
1902; AVX-NEXT: shrb $5, %cl
1903; AVX-NEXT: andb $2, %cl
1904; AVX-NEXT: orb %dl, %cl
1905; AVX-NEXT: shrb $7, %al
1906; AVX-NEXT: orb %cl, %al
1907; AVX-NEXT: orb %sil, %al
1908; AVX-NEXT: movzbl %al, %eax
1909; AVX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
1910; AVX-NEXT: vpextrb $12, %xmm0, %eax
1911; AVX-NEXT: movb %al, %sil
1912; AVX-NEXT: shlb $7, %sil
1913; AVX-NEXT: movb %al, %dl
1914; AVX-NEXT: shlb $5, %dl
1915; AVX-NEXT: andb $64, %dl
1916; AVX-NEXT: movb %al, %cl
1917; AVX-NEXT: shlb $3, %cl
1918; AVX-NEXT: andb $32, %cl
1919; AVX-NEXT: orb %dl, %cl
1920; AVX-NEXT: movb %al, %dl
1921; AVX-NEXT: addb %dl, %dl
1922; AVX-NEXT: andb $16, %dl
1923; AVX-NEXT: orb %cl, %dl
1924; AVX-NEXT: movb %al, %cl
1925; AVX-NEXT: shrb %cl
1926; AVX-NEXT: andb $8, %cl
1927; AVX-NEXT: orb %dl, %cl
1928; AVX-NEXT: movb %al, %dl
1929; AVX-NEXT: shrb $3, %dl
1930; AVX-NEXT: andb $4, %dl
1931; AVX-NEXT: orb %cl, %dl
1932; AVX-NEXT: movb %al, %cl
1933; AVX-NEXT: shrb $5, %cl
1934; AVX-NEXT: andb $2, %cl
1935; AVX-NEXT: orb %dl, %cl
1936; AVX-NEXT: shrb $7, %al
1937; AVX-NEXT: orb %cl, %al
1938; AVX-NEXT: orb %sil, %al
1939; AVX-NEXT: movzbl %al, %eax
1940; AVX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
1941; AVX-NEXT: vpextrb $13, %xmm0, %eax
1942; AVX-NEXT: movb %al, %sil
1943; AVX-NEXT: shlb $7, %sil
1944; AVX-NEXT: movb %al, %dl
1945; AVX-NEXT: shlb $5, %dl
1946; AVX-NEXT: andb $64, %dl
1947; AVX-NEXT: movb %al, %cl
1948; AVX-NEXT: shlb $3, %cl
1949; AVX-NEXT: andb $32, %cl
1950; AVX-NEXT: orb %dl, %cl
1951; AVX-NEXT: movb %al, %dl
1952; AVX-NEXT: addb %dl, %dl
1953; AVX-NEXT: andb $16, %dl
1954; AVX-NEXT: orb %cl, %dl
1955; AVX-NEXT: movb %al, %cl
1956; AVX-NEXT: shrb %cl
1957; AVX-NEXT: andb $8, %cl
1958; AVX-NEXT: orb %dl, %cl
1959; AVX-NEXT: movb %al, %dl
1960; AVX-NEXT: shrb $3, %dl
1961; AVX-NEXT: andb $4, %dl
1962; AVX-NEXT: orb %cl, %dl
1963; AVX-NEXT: movb %al, %cl
1964; AVX-NEXT: shrb $5, %cl
1965; AVX-NEXT: andb $2, %cl
1966; AVX-NEXT: orb %dl, %cl
1967; AVX-NEXT: shrb $7, %al
1968; AVX-NEXT: orb %cl, %al
1969; AVX-NEXT: orb %sil, %al
1970; AVX-NEXT: movzbl %al, %eax
1971; AVX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
1972; AVX-NEXT: vpextrb $14, %xmm0, %eax
1973; AVX-NEXT: movb %al, %sil
1974; AVX-NEXT: shlb $7, %sil
1975; AVX-NEXT: movb %al, %dl
1976; AVX-NEXT: shlb $5, %dl
1977; AVX-NEXT: andb $64, %dl
1978; AVX-NEXT: movb %al, %cl
1979; AVX-NEXT: shlb $3, %cl
1980; AVX-NEXT: andb $32, %cl
1981; AVX-NEXT: orb %dl, %cl
1982; AVX-NEXT: movb %al, %dl
1983; AVX-NEXT: addb %dl, %dl
1984; AVX-NEXT: andb $16, %dl
1985; AVX-NEXT: orb %cl, %dl
1986; AVX-NEXT: movb %al, %cl
1987; AVX-NEXT: shrb %cl
1988; AVX-NEXT: andb $8, %cl
1989; AVX-NEXT: orb %dl, %cl
1990; AVX-NEXT: movb %al, %dl
1991; AVX-NEXT: shrb $3, %dl
1992; AVX-NEXT: andb $4, %dl
1993; AVX-NEXT: orb %cl, %dl
1994; AVX-NEXT: movb %al, %cl
1995; AVX-NEXT: shrb $5, %cl
1996; AVX-NEXT: andb $2, %cl
1997; AVX-NEXT: orb %dl, %cl
1998; AVX-NEXT: shrb $7, %al
1999; AVX-NEXT: orb %cl, %al
2000; AVX-NEXT: orb %sil, %al
2001; AVX-NEXT: movzbl %al, %eax
2002; AVX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
2003; AVX-NEXT: vpextrb $15, %xmm0, %eax
2004; AVX-NEXT: movb %al, %sil
2005; AVX-NEXT: shlb $7, %sil
2006; AVX-NEXT: movb %al, %dl
2007; AVX-NEXT: shlb $5, %dl
2008; AVX-NEXT: andb $64, %dl
2009; AVX-NEXT: movb %al, %cl
2010; AVX-NEXT: shlb $3, %cl
2011; AVX-NEXT: andb $32, %cl
2012; AVX-NEXT: orb %dl, %cl
2013; AVX-NEXT: movb %al, %dl
2014; AVX-NEXT: addb %dl, %dl
2015; AVX-NEXT: andb $16, %dl
2016; AVX-NEXT: orb %cl, %dl
2017; AVX-NEXT: movb %al, %cl
2018; AVX-NEXT: shrb %cl
2019; AVX-NEXT: andb $8, %cl
2020; AVX-NEXT: orb %dl, %cl
2021; AVX-NEXT: movb %al, %dl
2022; AVX-NEXT: shrb $3, %dl
2023; AVX-NEXT: andb $4, %dl
2024; AVX-NEXT: orb %cl, %dl
2025; AVX-NEXT: movb %al, %cl
2026; AVX-NEXT: shrb $5, %cl
2027; AVX-NEXT: andb $2, %cl
2028; AVX-NEXT: orb %dl, %cl
2029; AVX-NEXT: shrb $7, %al
2030; AVX-NEXT: orb %cl, %al
2031; AVX-NEXT: orb %sil, %al
2032; AVX-NEXT: movzbl %al, %eax
2033; AVX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm0
2034; AVX-NEXT: retq
2035;
2036; XOP-LABEL: test_bitreverse_v16i8:
2037; XOP: # BB#0:
2038; XOP-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0
2039; XOP-NEXT: retq
Simon Pilgrimb87ffe82016-03-30 14:14:00 +00002040 %b = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a)
2041 ret <16 x i8> %b
2042}
2043
Simon Pilgrim2d0104c2016-04-24 15:45:06 +00002044define <8 x i16> @test_bitreverse_v8i16(<8 x i16> %a) nounwind {
2045; SSE-LABEL: test_bitreverse_v8i16:
2046; SSE: # BB#0:
2047; SSE-NEXT: pextrw $7, %xmm0, %eax
2048; SSE-NEXT: movl %eax, %ecx
2049; SSE-NEXT: shll $15, %ecx
2050; SSE-NEXT: movl %eax, %edx
2051; SSE-NEXT: andl $2, %edx
2052; SSE-NEXT: shll $13, %edx
2053; SSE-NEXT: orl %ecx, %edx
2054; SSE-NEXT: movl %eax, %ecx
2055; SSE-NEXT: andl $4, %ecx
2056; SSE-NEXT: shll $11, %ecx
2057; SSE-NEXT: orl %edx, %ecx
2058; SSE-NEXT: movl %eax, %edx
2059; SSE-NEXT: andl $8, %edx
2060; SSE-NEXT: shll $9, %edx
2061; SSE-NEXT: orl %ecx, %edx
2062; SSE-NEXT: movl %eax, %esi
2063; SSE-NEXT: andl $16, %esi
2064; SSE-NEXT: shll $7, %esi
2065; SSE-NEXT: orl %edx, %esi
2066; SSE-NEXT: movl %eax, %ecx
2067; SSE-NEXT: andl $32, %ecx
2068; SSE-NEXT: shll $5, %ecx
2069; SSE-NEXT: orl %esi, %ecx
2070; SSE-NEXT: movl %eax, %edx
2071; SSE-NEXT: andl $64, %edx
2072; SSE-NEXT: shll $3, %edx
2073; SSE-NEXT: leal (%rax,%rax), %esi
2074; SSE-NEXT: andl $256, %esi # imm = 0x100
2075; SSE-NEXT: orl %edx, %esi
2076; SSE-NEXT: movl %eax, %edx
2077; SSE-NEXT: shrl %edx
2078; SSE-NEXT: andl $128, %edx
2079; SSE-NEXT: orl %esi, %edx
2080; SSE-NEXT: movl %eax, %esi
2081; SSE-NEXT: shrl $3, %esi
2082; SSE-NEXT: andl $64, %esi
2083; SSE-NEXT: orl %edx, %esi
2084; SSE-NEXT: movl %eax, %edx
2085; SSE-NEXT: shrl $5, %edx
2086; SSE-NEXT: andl $32, %edx
2087; SSE-NEXT: orl %esi, %edx
2088; SSE-NEXT: movl %eax, %esi
2089; SSE-NEXT: shrl $7, %esi
2090; SSE-NEXT: andl $16, %esi
2091; SSE-NEXT: orl %edx, %esi
2092; SSE-NEXT: movl %eax, %edx
2093; SSE-NEXT: shrl $9, %edx
2094; SSE-NEXT: andl $8, %edx
2095; SSE-NEXT: orl %esi, %edx
2096; SSE-NEXT: movl %eax, %esi
2097; SSE-NEXT: shrl $11, %esi
2098; SSE-NEXT: andl $4, %esi
2099; SSE-NEXT: orl %edx, %esi
2100; SSE-NEXT: movl %eax, %edx
2101; SSE-NEXT: shrl $13, %edx
2102; SSE-NEXT: andl $2, %edx
2103; SSE-NEXT: orl %esi, %edx
2104; SSE-NEXT: shrl $15, %eax
2105; SSE-NEXT: orl %edx, %eax
2106; SSE-NEXT: orl %ecx, %eax
2107; SSE-NEXT: movd %eax, %xmm1
2108; SSE-NEXT: pextrw $3, %xmm0, %eax
2109; SSE-NEXT: movl %eax, %ecx
2110; SSE-NEXT: shll $15, %ecx
2111; SSE-NEXT: movl %eax, %edx
2112; SSE-NEXT: andl $2, %edx
2113; SSE-NEXT: shll $13, %edx
2114; SSE-NEXT: orl %ecx, %edx
2115; SSE-NEXT: movl %eax, %ecx
2116; SSE-NEXT: andl $4, %ecx
2117; SSE-NEXT: shll $11, %ecx
2118; SSE-NEXT: orl %edx, %ecx
2119; SSE-NEXT: movl %eax, %edx
2120; SSE-NEXT: andl $8, %edx
2121; SSE-NEXT: shll $9, %edx
2122; SSE-NEXT: orl %ecx, %edx
2123; SSE-NEXT: movl %eax, %esi
2124; SSE-NEXT: andl $16, %esi
2125; SSE-NEXT: shll $7, %esi
2126; SSE-NEXT: orl %edx, %esi
2127; SSE-NEXT: movl %eax, %ecx
2128; SSE-NEXT: andl $32, %ecx
2129; SSE-NEXT: shll $5, %ecx
2130; SSE-NEXT: orl %esi, %ecx
2131; SSE-NEXT: movl %eax, %edx
2132; SSE-NEXT: andl $64, %edx
2133; SSE-NEXT: shll $3, %edx
2134; SSE-NEXT: leal (%rax,%rax), %esi
2135; SSE-NEXT: andl $256, %esi # imm = 0x100
2136; SSE-NEXT: orl %edx, %esi
2137; SSE-NEXT: movl %eax, %edx
2138; SSE-NEXT: shrl %edx
2139; SSE-NEXT: andl $128, %edx
2140; SSE-NEXT: orl %esi, %edx
2141; SSE-NEXT: movl %eax, %esi
2142; SSE-NEXT: shrl $3, %esi
2143; SSE-NEXT: andl $64, %esi
2144; SSE-NEXT: orl %edx, %esi
2145; SSE-NEXT: movl %eax, %edx
2146; SSE-NEXT: shrl $5, %edx
2147; SSE-NEXT: andl $32, %edx
2148; SSE-NEXT: orl %esi, %edx
2149; SSE-NEXT: movl %eax, %esi
2150; SSE-NEXT: shrl $7, %esi
2151; SSE-NEXT: andl $16, %esi
2152; SSE-NEXT: orl %edx, %esi
2153; SSE-NEXT: movl %eax, %edx
2154; SSE-NEXT: shrl $9, %edx
2155; SSE-NEXT: andl $8, %edx
2156; SSE-NEXT: orl %esi, %edx
2157; SSE-NEXT: movl %eax, %esi
2158; SSE-NEXT: shrl $11, %esi
2159; SSE-NEXT: andl $4, %esi
2160; SSE-NEXT: orl %edx, %esi
2161; SSE-NEXT: movl %eax, %edx
2162; SSE-NEXT: shrl $13, %edx
2163; SSE-NEXT: andl $2, %edx
2164; SSE-NEXT: orl %esi, %edx
2165; SSE-NEXT: shrl $15, %eax
2166; SSE-NEXT: orl %edx, %eax
2167; SSE-NEXT: orl %ecx, %eax
2168; SSE-NEXT: movd %eax, %xmm2
2169; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2170; SSE-NEXT: pextrw $5, %xmm0, %eax
2171; SSE-NEXT: movl %eax, %ecx
2172; SSE-NEXT: shll $15, %ecx
2173; SSE-NEXT: movl %eax, %edx
2174; SSE-NEXT: andl $2, %edx
2175; SSE-NEXT: shll $13, %edx
2176; SSE-NEXT: orl %ecx, %edx
2177; SSE-NEXT: movl %eax, %ecx
2178; SSE-NEXT: andl $4, %ecx
2179; SSE-NEXT: shll $11, %ecx
2180; SSE-NEXT: orl %edx, %ecx
2181; SSE-NEXT: movl %eax, %edx
2182; SSE-NEXT: andl $8, %edx
2183; SSE-NEXT: shll $9, %edx
2184; SSE-NEXT: orl %ecx, %edx
2185; SSE-NEXT: movl %eax, %esi
2186; SSE-NEXT: andl $16, %esi
2187; SSE-NEXT: shll $7, %esi
2188; SSE-NEXT: orl %edx, %esi
2189; SSE-NEXT: movl %eax, %ecx
2190; SSE-NEXT: andl $32, %ecx
2191; SSE-NEXT: shll $5, %ecx
2192; SSE-NEXT: orl %esi, %ecx
2193; SSE-NEXT: movl %eax, %edx
2194; SSE-NEXT: andl $64, %edx
2195; SSE-NEXT: shll $3, %edx
2196; SSE-NEXT: leal (%rax,%rax), %esi
2197; SSE-NEXT: andl $256, %esi # imm = 0x100
2198; SSE-NEXT: orl %edx, %esi
2199; SSE-NEXT: movl %eax, %edx
2200; SSE-NEXT: shrl %edx
2201; SSE-NEXT: andl $128, %edx
2202; SSE-NEXT: orl %esi, %edx
2203; SSE-NEXT: movl %eax, %esi
2204; SSE-NEXT: shrl $3, %esi
2205; SSE-NEXT: andl $64, %esi
2206; SSE-NEXT: orl %edx, %esi
2207; SSE-NEXT: movl %eax, %edx
2208; SSE-NEXT: shrl $5, %edx
2209; SSE-NEXT: andl $32, %edx
2210; SSE-NEXT: orl %esi, %edx
2211; SSE-NEXT: movl %eax, %esi
2212; SSE-NEXT: shrl $7, %esi
2213; SSE-NEXT: andl $16, %esi
2214; SSE-NEXT: orl %edx, %esi
2215; SSE-NEXT: movl %eax, %edx
2216; SSE-NEXT: shrl $9, %edx
2217; SSE-NEXT: andl $8, %edx
2218; SSE-NEXT: orl %esi, %edx
2219; SSE-NEXT: movl %eax, %esi
2220; SSE-NEXT: shrl $11, %esi
2221; SSE-NEXT: andl $4, %esi
2222; SSE-NEXT: orl %edx, %esi
2223; SSE-NEXT: movl %eax, %edx
2224; SSE-NEXT: shrl $13, %edx
2225; SSE-NEXT: andl $2, %edx
2226; SSE-NEXT: orl %esi, %edx
2227; SSE-NEXT: shrl $15, %eax
2228; SSE-NEXT: orl %edx, %eax
2229; SSE-NEXT: orl %ecx, %eax
2230; SSE-NEXT: movd %eax, %xmm3
2231; SSE-NEXT: pextrw $1, %xmm0, %eax
2232; SSE-NEXT: movl %eax, %ecx
2233; SSE-NEXT: shll $15, %ecx
2234; SSE-NEXT: movl %eax, %edx
2235; SSE-NEXT: andl $2, %edx
2236; SSE-NEXT: shll $13, %edx
2237; SSE-NEXT: orl %ecx, %edx
2238; SSE-NEXT: movl %eax, %ecx
2239; SSE-NEXT: andl $4, %ecx
2240; SSE-NEXT: shll $11, %ecx
2241; SSE-NEXT: orl %edx, %ecx
2242; SSE-NEXT: movl %eax, %edx
2243; SSE-NEXT: andl $8, %edx
2244; SSE-NEXT: shll $9, %edx
2245; SSE-NEXT: orl %ecx, %edx
2246; SSE-NEXT: movl %eax, %esi
2247; SSE-NEXT: andl $16, %esi
2248; SSE-NEXT: shll $7, %esi
2249; SSE-NEXT: orl %edx, %esi
2250; SSE-NEXT: movl %eax, %ecx
2251; SSE-NEXT: andl $32, %ecx
2252; SSE-NEXT: shll $5, %ecx
2253; SSE-NEXT: orl %esi, %ecx
2254; SSE-NEXT: movl %eax, %edx
2255; SSE-NEXT: andl $64, %edx
2256; SSE-NEXT: shll $3, %edx
2257; SSE-NEXT: leal (%rax,%rax), %esi
2258; SSE-NEXT: andl $256, %esi # imm = 0x100
2259; SSE-NEXT: orl %edx, %esi
2260; SSE-NEXT: movl %eax, %edx
2261; SSE-NEXT: shrl %edx
2262; SSE-NEXT: andl $128, %edx
2263; SSE-NEXT: orl %esi, %edx
2264; SSE-NEXT: movl %eax, %esi
2265; SSE-NEXT: shrl $3, %esi
2266; SSE-NEXT: andl $64, %esi
2267; SSE-NEXT: orl %edx, %esi
2268; SSE-NEXT: movl %eax, %edx
2269; SSE-NEXT: shrl $5, %edx
2270; SSE-NEXT: andl $32, %edx
2271; SSE-NEXT: orl %esi, %edx
2272; SSE-NEXT: movl %eax, %esi
2273; SSE-NEXT: shrl $7, %esi
2274; SSE-NEXT: andl $16, %esi
2275; SSE-NEXT: orl %edx, %esi
2276; SSE-NEXT: movl %eax, %edx
2277; SSE-NEXT: shrl $9, %edx
2278; SSE-NEXT: andl $8, %edx
2279; SSE-NEXT: orl %esi, %edx
2280; SSE-NEXT: movl %eax, %esi
2281; SSE-NEXT: shrl $11, %esi
2282; SSE-NEXT: andl $4, %esi
2283; SSE-NEXT: orl %edx, %esi
2284; SSE-NEXT: movl %eax, %edx
2285; SSE-NEXT: shrl $13, %edx
2286; SSE-NEXT: andl $2, %edx
2287; SSE-NEXT: orl %esi, %edx
2288; SSE-NEXT: shrl $15, %eax
2289; SSE-NEXT: orl %edx, %eax
2290; SSE-NEXT: orl %ecx, %eax
2291; SSE-NEXT: movd %eax, %xmm1
2292; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
2293; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
2294; SSE-NEXT: pextrw $6, %xmm0, %eax
2295; SSE-NEXT: movl %eax, %ecx
2296; SSE-NEXT: shll $15, %ecx
2297; SSE-NEXT: movl %eax, %edx
2298; SSE-NEXT: andl $2, %edx
2299; SSE-NEXT: shll $13, %edx
2300; SSE-NEXT: orl %ecx, %edx
2301; SSE-NEXT: movl %eax, %ecx
2302; SSE-NEXT: andl $4, %ecx
2303; SSE-NEXT: shll $11, %ecx
2304; SSE-NEXT: orl %edx, %ecx
2305; SSE-NEXT: movl %eax, %edx
2306; SSE-NEXT: andl $8, %edx
2307; SSE-NEXT: shll $9, %edx
2308; SSE-NEXT: orl %ecx, %edx
2309; SSE-NEXT: movl %eax, %esi
2310; SSE-NEXT: andl $16, %esi
2311; SSE-NEXT: shll $7, %esi
2312; SSE-NEXT: orl %edx, %esi
2313; SSE-NEXT: movl %eax, %ecx
2314; SSE-NEXT: andl $32, %ecx
2315; SSE-NEXT: shll $5, %ecx
2316; SSE-NEXT: orl %esi, %ecx
2317; SSE-NEXT: movl %eax, %edx
2318; SSE-NEXT: andl $64, %edx
2319; SSE-NEXT: shll $3, %edx
2320; SSE-NEXT: leal (%rax,%rax), %esi
2321; SSE-NEXT: andl $256, %esi # imm = 0x100
2322; SSE-NEXT: orl %edx, %esi
2323; SSE-NEXT: movl %eax, %edx
2324; SSE-NEXT: shrl %edx
2325; SSE-NEXT: andl $128, %edx
2326; SSE-NEXT: orl %esi, %edx
2327; SSE-NEXT: movl %eax, %esi
2328; SSE-NEXT: shrl $3, %esi
2329; SSE-NEXT: andl $64, %esi
2330; SSE-NEXT: orl %edx, %esi
2331; SSE-NEXT: movl %eax, %edx
2332; SSE-NEXT: shrl $5, %edx
2333; SSE-NEXT: andl $32, %edx
2334; SSE-NEXT: orl %esi, %edx
2335; SSE-NEXT: movl %eax, %esi
2336; SSE-NEXT: shrl $7, %esi
2337; SSE-NEXT: andl $16, %esi
2338; SSE-NEXT: orl %edx, %esi
2339; SSE-NEXT: movl %eax, %edx
2340; SSE-NEXT: shrl $9, %edx
2341; SSE-NEXT: andl $8, %edx
2342; SSE-NEXT: orl %esi, %edx
2343; SSE-NEXT: movl %eax, %esi
2344; SSE-NEXT: shrl $11, %esi
2345; SSE-NEXT: andl $4, %esi
2346; SSE-NEXT: orl %edx, %esi
2347; SSE-NEXT: movl %eax, %edx
2348; SSE-NEXT: shrl $13, %edx
2349; SSE-NEXT: andl $2, %edx
2350; SSE-NEXT: orl %esi, %edx
2351; SSE-NEXT: shrl $15, %eax
2352; SSE-NEXT: orl %edx, %eax
2353; SSE-NEXT: orl %ecx, %eax
2354; SSE-NEXT: movd %eax, %xmm3
2355; SSE-NEXT: pextrw $2, %xmm0, %eax
2356; SSE-NEXT: movl %eax, %ecx
2357; SSE-NEXT: shll $15, %ecx
2358; SSE-NEXT: movl %eax, %edx
2359; SSE-NEXT: andl $2, %edx
2360; SSE-NEXT: shll $13, %edx
2361; SSE-NEXT: orl %ecx, %edx
2362; SSE-NEXT: movl %eax, %ecx
2363; SSE-NEXT: andl $4, %ecx
2364; SSE-NEXT: shll $11, %ecx
2365; SSE-NEXT: orl %edx, %ecx
2366; SSE-NEXT: movl %eax, %edx
2367; SSE-NEXT: andl $8, %edx
2368; SSE-NEXT: shll $9, %edx
2369; SSE-NEXT: orl %ecx, %edx
2370; SSE-NEXT: movl %eax, %esi
2371; SSE-NEXT: andl $16, %esi
2372; SSE-NEXT: shll $7, %esi
2373; SSE-NEXT: orl %edx, %esi
2374; SSE-NEXT: movl %eax, %ecx
2375; SSE-NEXT: andl $32, %ecx
2376; SSE-NEXT: shll $5, %ecx
2377; SSE-NEXT: orl %esi, %ecx
2378; SSE-NEXT: movl %eax, %edx
2379; SSE-NEXT: andl $64, %edx
2380; SSE-NEXT: shll $3, %edx
2381; SSE-NEXT: leal (%rax,%rax), %esi
2382; SSE-NEXT: andl $256, %esi # imm = 0x100
2383; SSE-NEXT: orl %edx, %esi
2384; SSE-NEXT: movl %eax, %edx
2385; SSE-NEXT: shrl %edx
2386; SSE-NEXT: andl $128, %edx
2387; SSE-NEXT: orl %esi, %edx
2388; SSE-NEXT: movl %eax, %esi
2389; SSE-NEXT: shrl $3, %esi
2390; SSE-NEXT: andl $64, %esi
2391; SSE-NEXT: orl %edx, %esi
2392; SSE-NEXT: movl %eax, %edx
2393; SSE-NEXT: shrl $5, %edx
2394; SSE-NEXT: andl $32, %edx
2395; SSE-NEXT: orl %esi, %edx
2396; SSE-NEXT: movl %eax, %esi
2397; SSE-NEXT: shrl $7, %esi
2398; SSE-NEXT: andl $16, %esi
2399; SSE-NEXT: orl %edx, %esi
2400; SSE-NEXT: movl %eax, %edx
2401; SSE-NEXT: shrl $9, %edx
2402; SSE-NEXT: andl $8, %edx
2403; SSE-NEXT: orl %esi, %edx
2404; SSE-NEXT: movl %eax, %esi
2405; SSE-NEXT: shrl $11, %esi
2406; SSE-NEXT: andl $4, %esi
2407; SSE-NEXT: orl %edx, %esi
2408; SSE-NEXT: movl %eax, %edx
2409; SSE-NEXT: shrl $13, %edx
2410; SSE-NEXT: andl $2, %edx
2411; SSE-NEXT: orl %esi, %edx
2412; SSE-NEXT: shrl $15, %eax
2413; SSE-NEXT: orl %edx, %eax
2414; SSE-NEXT: orl %ecx, %eax
2415; SSE-NEXT: movd %eax, %xmm2
2416; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
2417; SSE-NEXT: pextrw $4, %xmm0, %eax
2418; SSE-NEXT: movl %eax, %ecx
2419; SSE-NEXT: shll $15, %ecx
2420; SSE-NEXT: movl %eax, %edx
2421; SSE-NEXT: andl $2, %edx
2422; SSE-NEXT: shll $13, %edx
2423; SSE-NEXT: orl %ecx, %edx
2424; SSE-NEXT: movl %eax, %ecx
2425; SSE-NEXT: andl $4, %ecx
2426; SSE-NEXT: shll $11, %ecx
2427; SSE-NEXT: orl %edx, %ecx
2428; SSE-NEXT: movl %eax, %edx
2429; SSE-NEXT: andl $8, %edx
2430; SSE-NEXT: shll $9, %edx
2431; SSE-NEXT: orl %ecx, %edx
2432; SSE-NEXT: movl %eax, %esi
2433; SSE-NEXT: andl $16, %esi
2434; SSE-NEXT: shll $7, %esi
2435; SSE-NEXT: orl %edx, %esi
2436; SSE-NEXT: movl %eax, %ecx
2437; SSE-NEXT: andl $32, %ecx
2438; SSE-NEXT: shll $5, %ecx
2439; SSE-NEXT: orl %esi, %ecx
2440; SSE-NEXT: movl %eax, %edx
2441; SSE-NEXT: andl $64, %edx
2442; SSE-NEXT: shll $3, %edx
2443; SSE-NEXT: leal (%rax,%rax), %esi
2444; SSE-NEXT: andl $256, %esi # imm = 0x100
2445; SSE-NEXT: orl %edx, %esi
2446; SSE-NEXT: movl %eax, %edx
2447; SSE-NEXT: shrl %edx
2448; SSE-NEXT: andl $128, %edx
2449; SSE-NEXT: orl %esi, %edx
2450; SSE-NEXT: movl %eax, %esi
2451; SSE-NEXT: shrl $3, %esi
2452; SSE-NEXT: andl $64, %esi
2453; SSE-NEXT: orl %edx, %esi
2454; SSE-NEXT: movl %eax, %edx
2455; SSE-NEXT: shrl $5, %edx
2456; SSE-NEXT: andl $32, %edx
2457; SSE-NEXT: orl %esi, %edx
2458; SSE-NEXT: movl %eax, %esi
2459; SSE-NEXT: shrl $7, %esi
2460; SSE-NEXT: andl $16, %esi
2461; SSE-NEXT: orl %edx, %esi
2462; SSE-NEXT: movl %eax, %edx
2463; SSE-NEXT: shrl $9, %edx
2464; SSE-NEXT: andl $8, %edx
2465; SSE-NEXT: orl %esi, %edx
2466; SSE-NEXT: movl %eax, %esi
2467; SSE-NEXT: shrl $11, %esi
2468; SSE-NEXT: andl $4, %esi
2469; SSE-NEXT: orl %edx, %esi
2470; SSE-NEXT: movl %eax, %edx
2471; SSE-NEXT: shrl $13, %edx
2472; SSE-NEXT: andl $2, %edx
2473; SSE-NEXT: orl %esi, %edx
2474; SSE-NEXT: shrl $15, %eax
2475; SSE-NEXT: orl %edx, %eax
2476; SSE-NEXT: orl %ecx, %eax
2477; SSE-NEXT: movd %eax, %xmm3
2478; SSE-NEXT: movd %xmm0, %eax
2479; SSE-NEXT: movl %eax, %ecx
2480; SSE-NEXT: shll $15, %ecx
2481; SSE-NEXT: movl %eax, %edx
2482; SSE-NEXT: andl $2, %edx
2483; SSE-NEXT: shll $13, %edx
2484; SSE-NEXT: orl %ecx, %edx
2485; SSE-NEXT: movl %eax, %ecx
2486; SSE-NEXT: andl $4, %ecx
2487; SSE-NEXT: shll $11, %ecx
2488; SSE-NEXT: orl %edx, %ecx
2489; SSE-NEXT: movl %eax, %edx
2490; SSE-NEXT: andl $8, %edx
2491; SSE-NEXT: shll $9, %edx
2492; SSE-NEXT: orl %ecx, %edx
2493; SSE-NEXT: movl %eax, %esi
2494; SSE-NEXT: andl $16, %esi
2495; SSE-NEXT: shll $7, %esi
2496; SSE-NEXT: orl %edx, %esi
2497; SSE-NEXT: movl %eax, %ecx
2498; SSE-NEXT: andl $32, %ecx
2499; SSE-NEXT: shll $5, %ecx
2500; SSE-NEXT: orl %esi, %ecx
2501; SSE-NEXT: movl %eax, %edx
2502; SSE-NEXT: andl $64, %edx
2503; SSE-NEXT: shll $3, %edx
2504; SSE-NEXT: leal (%rax,%rax), %esi
2505; SSE-NEXT: andl $256, %esi # imm = 0x100
2506; SSE-NEXT: orl %edx, %esi
2507; SSE-NEXT: movl %eax, %edx
2508; SSE-NEXT: shrl %edx
2509; SSE-NEXT: andl $128, %edx
2510; SSE-NEXT: orl %esi, %edx
2511; SSE-NEXT: movl %eax, %esi
2512; SSE-NEXT: shrl $3, %esi
2513; SSE-NEXT: andl $64, %esi
2514; SSE-NEXT: orl %edx, %esi
2515; SSE-NEXT: movl %eax, %edx
2516; SSE-NEXT: shrl $5, %edx
2517; SSE-NEXT: andl $32, %edx
2518; SSE-NEXT: orl %esi, %edx
2519; SSE-NEXT: movl %eax, %esi
2520; SSE-NEXT: shrl $7, %esi
2521; SSE-NEXT: andl $16, %esi
2522; SSE-NEXT: orl %edx, %esi
2523; SSE-NEXT: movl %eax, %edx
2524; SSE-NEXT: shrl $9, %edx
2525; SSE-NEXT: andl $8, %edx
2526; SSE-NEXT: orl %esi, %edx
2527; SSE-NEXT: movl %eax, %esi
2528; SSE-NEXT: shrl $11, %esi
2529; SSE-NEXT: andl $4, %esi
2530; SSE-NEXT: orl %edx, %esi
2531; SSE-NEXT: movl %eax, %edx
2532; SSE-NEXT: shrl $13, %edx
2533; SSE-NEXT: andl $2, %edx
2534; SSE-NEXT: orl %esi, %edx
2535; SSE-NEXT: andl $32768, %eax # imm = 0x8000
2536; SSE-NEXT: shrl $15, %eax
2537; SSE-NEXT: orl %edx, %eax
2538; SSE-NEXT: orl %ecx, %eax
2539; SSE-NEXT: movd %eax, %xmm0
2540; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
2541; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2542; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2543; SSE-NEXT: retq
2544;
2545; AVX-LABEL: test_bitreverse_v8i16:
2546; AVX: # BB#0:
2547; AVX-NEXT: vpextrw $1, %xmm0, %eax
2548; AVX-NEXT: movl %eax, %ecx
2549; AVX-NEXT: shll $15, %ecx
2550; AVX-NEXT: movl %eax, %edx
2551; AVX-NEXT: andl $2, %edx
2552; AVX-NEXT: shll $13, %edx
2553; AVX-NEXT: orl %ecx, %edx
2554; AVX-NEXT: movl %eax, %ecx
2555; AVX-NEXT: andl $4, %ecx
2556; AVX-NEXT: shll $11, %ecx
2557; AVX-NEXT: orl %edx, %ecx
2558; AVX-NEXT: movl %eax, %edx
2559; AVX-NEXT: andl $8, %edx
2560; AVX-NEXT: shll $9, %edx
2561; AVX-NEXT: orl %ecx, %edx
2562; AVX-NEXT: movl %eax, %esi
2563; AVX-NEXT: andl $16, %esi
2564; AVX-NEXT: shll $7, %esi
2565; AVX-NEXT: orl %edx, %esi
2566; AVX-NEXT: movl %eax, %ecx
2567; AVX-NEXT: andl $32, %ecx
2568; AVX-NEXT: shll $5, %ecx
2569; AVX-NEXT: orl %esi, %ecx
2570; AVX-NEXT: movl %eax, %edx
2571; AVX-NEXT: andl $64, %edx
2572; AVX-NEXT: shll $3, %edx
2573; AVX-NEXT: leal (%rax,%rax), %esi
2574; AVX-NEXT: andl $256, %esi # imm = 0x100
2575; AVX-NEXT: orl %edx, %esi
2576; AVX-NEXT: movl %eax, %edx
2577; AVX-NEXT: shrl %edx
2578; AVX-NEXT: andl $128, %edx
2579; AVX-NEXT: orl %esi, %edx
2580; AVX-NEXT: movl %eax, %esi
2581; AVX-NEXT: shrl $3, %esi
2582; AVX-NEXT: andl $64, %esi
2583; AVX-NEXT: orl %edx, %esi
2584; AVX-NEXT: movl %eax, %edx
2585; AVX-NEXT: shrl $5, %edx
2586; AVX-NEXT: andl $32, %edx
2587; AVX-NEXT: orl %esi, %edx
2588; AVX-NEXT: movl %eax, %esi
2589; AVX-NEXT: shrl $7, %esi
2590; AVX-NEXT: andl $16, %esi
2591; AVX-NEXT: orl %edx, %esi
2592; AVX-NEXT: movl %eax, %edx
2593; AVX-NEXT: shrl $9, %edx
2594; AVX-NEXT: andl $8, %edx
2595; AVX-NEXT: orl %esi, %edx
2596; AVX-NEXT: movl %eax, %esi
2597; AVX-NEXT: shrl $11, %esi
2598; AVX-NEXT: andl $4, %esi
2599; AVX-NEXT: orl %edx, %esi
2600; AVX-NEXT: movl %eax, %edx
2601; AVX-NEXT: shrl $13, %edx
2602; AVX-NEXT: andl $2, %edx
2603; AVX-NEXT: orl %esi, %edx
2604; AVX-NEXT: shrl $15, %eax
2605; AVX-NEXT: orl %edx, %eax
2606; AVX-NEXT: orl %ecx, %eax
2607; AVX-NEXT: vmovd %xmm0, %ecx
2608; AVX-NEXT: movl %ecx, %edx
2609; AVX-NEXT: shll $15, %edx
2610; AVX-NEXT: movl %ecx, %esi
2611; AVX-NEXT: andl $2, %esi
2612; AVX-NEXT: shll $13, %esi
2613; AVX-NEXT: orl %edx, %esi
2614; AVX-NEXT: movl %ecx, %edx
2615; AVX-NEXT: andl $4, %edx
2616; AVX-NEXT: shll $11, %edx
2617; AVX-NEXT: orl %esi, %edx
2618; AVX-NEXT: movl %ecx, %esi
2619; AVX-NEXT: andl $8, %esi
2620; AVX-NEXT: shll $9, %esi
2621; AVX-NEXT: orl %edx, %esi
2622; AVX-NEXT: movl %ecx, %edi
2623; AVX-NEXT: andl $16, %edi
2624; AVX-NEXT: shll $7, %edi
2625; AVX-NEXT: orl %esi, %edi
2626; AVX-NEXT: movl %ecx, %edx
2627; AVX-NEXT: andl $32, %edx
2628; AVX-NEXT: shll $5, %edx
2629; AVX-NEXT: orl %edi, %edx
2630; AVX-NEXT: movl %ecx, %esi
2631; AVX-NEXT: andl $64, %esi
2632; AVX-NEXT: shll $3, %esi
2633; AVX-NEXT: leal (%rcx,%rcx), %edi
2634; AVX-NEXT: andl $256, %edi # imm = 0x100
2635; AVX-NEXT: orl %esi, %edi
2636; AVX-NEXT: movl %ecx, %esi
2637; AVX-NEXT: shrl %esi
2638; AVX-NEXT: andl $128, %esi
2639; AVX-NEXT: orl %edi, %esi
2640; AVX-NEXT: movl %ecx, %edi
2641; AVX-NEXT: shrl $3, %edi
2642; AVX-NEXT: andl $64, %edi
2643; AVX-NEXT: orl %esi, %edi
2644; AVX-NEXT: movl %ecx, %esi
2645; AVX-NEXT: shrl $5, %esi
2646; AVX-NEXT: andl $32, %esi
2647; AVX-NEXT: orl %edi, %esi
2648; AVX-NEXT: movl %ecx, %edi
2649; AVX-NEXT: shrl $7, %edi
2650; AVX-NEXT: andl $16, %edi
2651; AVX-NEXT: orl %esi, %edi
2652; AVX-NEXT: movl %ecx, %esi
2653; AVX-NEXT: shrl $9, %esi
2654; AVX-NEXT: andl $8, %esi
2655; AVX-NEXT: orl %edi, %esi
2656; AVX-NEXT: movl %ecx, %edi
2657; AVX-NEXT: shrl $11, %edi
2658; AVX-NEXT: andl $4, %edi
2659; AVX-NEXT: orl %esi, %edi
2660; AVX-NEXT: movl %ecx, %esi
2661; AVX-NEXT: shrl $13, %esi
2662; AVX-NEXT: andl $2, %esi
2663; AVX-NEXT: orl %edi, %esi
2664; AVX-NEXT: andl $32768, %ecx # imm = 0x8000
2665; AVX-NEXT: shrl $15, %ecx
2666; AVX-NEXT: orl %esi, %ecx
2667; AVX-NEXT: orl %edx, %ecx
2668; AVX-NEXT: vmovd %ecx, %xmm1
2669; AVX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
2670; AVX-NEXT: vpextrw $2, %xmm0, %eax
2671; AVX-NEXT: movl %eax, %ecx
2672; AVX-NEXT: shll $15, %ecx
2673; AVX-NEXT: movl %eax, %edx
2674; AVX-NEXT: andl $2, %edx
2675; AVX-NEXT: shll $13, %edx
2676; AVX-NEXT: orl %ecx, %edx
2677; AVX-NEXT: movl %eax, %ecx
2678; AVX-NEXT: andl $4, %ecx
2679; AVX-NEXT: shll $11, %ecx
2680; AVX-NEXT: orl %edx, %ecx
2681; AVX-NEXT: movl %eax, %edx
2682; AVX-NEXT: andl $8, %edx
2683; AVX-NEXT: shll $9, %edx
2684; AVX-NEXT: orl %ecx, %edx
2685; AVX-NEXT: movl %eax, %esi
2686; AVX-NEXT: andl $16, %esi
2687; AVX-NEXT: shll $7, %esi
2688; AVX-NEXT: orl %edx, %esi
2689; AVX-NEXT: movl %eax, %ecx
2690; AVX-NEXT: andl $32, %ecx
2691; AVX-NEXT: shll $5, %ecx
2692; AVX-NEXT: orl %esi, %ecx
2693; AVX-NEXT: movl %eax, %edx
2694; AVX-NEXT: andl $64, %edx
2695; AVX-NEXT: shll $3, %edx
2696; AVX-NEXT: leal (%rax,%rax), %esi
2697; AVX-NEXT: andl $256, %esi # imm = 0x100
2698; AVX-NEXT: orl %edx, %esi
2699; AVX-NEXT: movl %eax, %edx
2700; AVX-NEXT: shrl %edx
2701; AVX-NEXT: andl $128, %edx
2702; AVX-NEXT: orl %esi, %edx
2703; AVX-NEXT: movl %eax, %esi
2704; AVX-NEXT: shrl $3, %esi
2705; AVX-NEXT: andl $64, %esi
2706; AVX-NEXT: orl %edx, %esi
2707; AVX-NEXT: movl %eax, %edx
2708; AVX-NEXT: shrl $5, %edx
2709; AVX-NEXT: andl $32, %edx
2710; AVX-NEXT: orl %esi, %edx
2711; AVX-NEXT: movl %eax, %esi
2712; AVX-NEXT: shrl $7, %esi
2713; AVX-NEXT: andl $16, %esi
2714; AVX-NEXT: orl %edx, %esi
2715; AVX-NEXT: movl %eax, %edx
2716; AVX-NEXT: shrl $9, %edx
2717; AVX-NEXT: andl $8, %edx
2718; AVX-NEXT: orl %esi, %edx
2719; AVX-NEXT: movl %eax, %esi
2720; AVX-NEXT: shrl $11, %esi
2721; AVX-NEXT: andl $4, %esi
2722; AVX-NEXT: orl %edx, %esi
2723; AVX-NEXT: movl %eax, %edx
2724; AVX-NEXT: shrl $13, %edx
2725; AVX-NEXT: andl $2, %edx
2726; AVX-NEXT: orl %esi, %edx
2727; AVX-NEXT: shrl $15, %eax
2728; AVX-NEXT: orl %edx, %eax
2729; AVX-NEXT: orl %ecx, %eax
2730; AVX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
2731; AVX-NEXT: vpextrw $3, %xmm0, %eax
2732; AVX-NEXT: movl %eax, %ecx
2733; AVX-NEXT: shll $15, %ecx
2734; AVX-NEXT: movl %eax, %edx
2735; AVX-NEXT: andl $2, %edx
2736; AVX-NEXT: shll $13, %edx
2737; AVX-NEXT: orl %ecx, %edx
2738; AVX-NEXT: movl %eax, %ecx
2739; AVX-NEXT: andl $4, %ecx
2740; AVX-NEXT: shll $11, %ecx
2741; AVX-NEXT: orl %edx, %ecx
2742; AVX-NEXT: movl %eax, %edx
2743; AVX-NEXT: andl $8, %edx
2744; AVX-NEXT: shll $9, %edx
2745; AVX-NEXT: orl %ecx, %edx
2746; AVX-NEXT: movl %eax, %esi
2747; AVX-NEXT: andl $16, %esi
2748; AVX-NEXT: shll $7, %esi
2749; AVX-NEXT: orl %edx, %esi
2750; AVX-NEXT: movl %eax, %ecx
2751; AVX-NEXT: andl $32, %ecx
2752; AVX-NEXT: shll $5, %ecx
2753; AVX-NEXT: orl %esi, %ecx
2754; AVX-NEXT: movl %eax, %edx
2755; AVX-NEXT: andl $64, %edx
2756; AVX-NEXT: shll $3, %edx
2757; AVX-NEXT: leal (%rax,%rax), %esi
2758; AVX-NEXT: andl $256, %esi # imm = 0x100
2759; AVX-NEXT: orl %edx, %esi
2760; AVX-NEXT: movl %eax, %edx
2761; AVX-NEXT: shrl %edx
2762; AVX-NEXT: andl $128, %edx
2763; AVX-NEXT: orl %esi, %edx
2764; AVX-NEXT: movl %eax, %esi
2765; AVX-NEXT: shrl $3, %esi
2766; AVX-NEXT: andl $64, %esi
2767; AVX-NEXT: orl %edx, %esi
2768; AVX-NEXT: movl %eax, %edx
2769; AVX-NEXT: shrl $5, %edx
2770; AVX-NEXT: andl $32, %edx
2771; AVX-NEXT: orl %esi, %edx
2772; AVX-NEXT: movl %eax, %esi
2773; AVX-NEXT: shrl $7, %esi
2774; AVX-NEXT: andl $16, %esi
2775; AVX-NEXT: orl %edx, %esi
2776; AVX-NEXT: movl %eax, %edx
2777; AVX-NEXT: shrl $9, %edx
2778; AVX-NEXT: andl $8, %edx
2779; AVX-NEXT: orl %esi, %edx
2780; AVX-NEXT: movl %eax, %esi
2781; AVX-NEXT: shrl $11, %esi
2782; AVX-NEXT: andl $4, %esi
2783; AVX-NEXT: orl %edx, %esi
2784; AVX-NEXT: movl %eax, %edx
2785; AVX-NEXT: shrl $13, %edx
2786; AVX-NEXT: andl $2, %edx
2787; AVX-NEXT: orl %esi, %edx
2788; AVX-NEXT: shrl $15, %eax
2789; AVX-NEXT: orl %edx, %eax
2790; AVX-NEXT: orl %ecx, %eax
2791; AVX-NEXT: vpinsrw $3, %eax, %xmm1, %xmm1
2792; AVX-NEXT: vpextrw $4, %xmm0, %eax
2793; AVX-NEXT: movl %eax, %ecx
2794; AVX-NEXT: shll $15, %ecx
2795; AVX-NEXT: movl %eax, %edx
2796; AVX-NEXT: andl $2, %edx
2797; AVX-NEXT: shll $13, %edx
2798; AVX-NEXT: orl %ecx, %edx
2799; AVX-NEXT: movl %eax, %ecx
2800; AVX-NEXT: andl $4, %ecx
2801; AVX-NEXT: shll $11, %ecx
2802; AVX-NEXT: orl %edx, %ecx
2803; AVX-NEXT: movl %eax, %edx
2804; AVX-NEXT: andl $8, %edx
2805; AVX-NEXT: shll $9, %edx
2806; AVX-NEXT: orl %ecx, %edx
2807; AVX-NEXT: movl %eax, %esi
2808; AVX-NEXT: andl $16, %esi
2809; AVX-NEXT: shll $7, %esi
2810; AVX-NEXT: orl %edx, %esi
2811; AVX-NEXT: movl %eax, %ecx
2812; AVX-NEXT: andl $32, %ecx
2813; AVX-NEXT: shll $5, %ecx
2814; AVX-NEXT: orl %esi, %ecx
2815; AVX-NEXT: movl %eax, %edx
2816; AVX-NEXT: andl $64, %edx
2817; AVX-NEXT: shll $3, %edx
2818; AVX-NEXT: leal (%rax,%rax), %esi
2819; AVX-NEXT: andl $256, %esi # imm = 0x100
2820; AVX-NEXT: orl %edx, %esi
2821; AVX-NEXT: movl %eax, %edx
2822; AVX-NEXT: shrl %edx
2823; AVX-NEXT: andl $128, %edx
2824; AVX-NEXT: orl %esi, %edx
2825; AVX-NEXT: movl %eax, %esi
2826; AVX-NEXT: shrl $3, %esi
2827; AVX-NEXT: andl $64, %esi
2828; AVX-NEXT: orl %edx, %esi
2829; AVX-NEXT: movl %eax, %edx
2830; AVX-NEXT: shrl $5, %edx
2831; AVX-NEXT: andl $32, %edx
2832; AVX-NEXT: orl %esi, %edx
2833; AVX-NEXT: movl %eax, %esi
2834; AVX-NEXT: shrl $7, %esi
2835; AVX-NEXT: andl $16, %esi
2836; AVX-NEXT: orl %edx, %esi
2837; AVX-NEXT: movl %eax, %edx
2838; AVX-NEXT: shrl $9, %edx
2839; AVX-NEXT: andl $8, %edx
2840; AVX-NEXT: orl %esi, %edx
2841; AVX-NEXT: movl %eax, %esi
2842; AVX-NEXT: shrl $11, %esi
2843; AVX-NEXT: andl $4, %esi
2844; AVX-NEXT: orl %edx, %esi
2845; AVX-NEXT: movl %eax, %edx
2846; AVX-NEXT: shrl $13, %edx
2847; AVX-NEXT: andl $2, %edx
2848; AVX-NEXT: orl %esi, %edx
2849; AVX-NEXT: shrl $15, %eax
2850; AVX-NEXT: orl %edx, %eax
2851; AVX-NEXT: orl %ecx, %eax
2852; AVX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
2853; AVX-NEXT: vpextrw $5, %xmm0, %eax
2854; AVX-NEXT: movl %eax, %ecx
2855; AVX-NEXT: shll $15, %ecx
2856; AVX-NEXT: movl %eax, %edx
2857; AVX-NEXT: andl $2, %edx
2858; AVX-NEXT: shll $13, %edx
2859; AVX-NEXT: orl %ecx, %edx
2860; AVX-NEXT: movl %eax, %ecx
2861; AVX-NEXT: andl $4, %ecx
2862; AVX-NEXT: shll $11, %ecx
2863; AVX-NEXT: orl %edx, %ecx
2864; AVX-NEXT: movl %eax, %edx
2865; AVX-NEXT: andl $8, %edx
2866; AVX-NEXT: shll $9, %edx
2867; AVX-NEXT: orl %ecx, %edx
2868; AVX-NEXT: movl %eax, %esi
2869; AVX-NEXT: andl $16, %esi
2870; AVX-NEXT: shll $7, %esi
2871; AVX-NEXT: orl %edx, %esi
2872; AVX-NEXT: movl %eax, %ecx
2873; AVX-NEXT: andl $32, %ecx
2874; AVX-NEXT: shll $5, %ecx
2875; AVX-NEXT: orl %esi, %ecx
2876; AVX-NEXT: movl %eax, %edx
2877; AVX-NEXT: andl $64, %edx
2878; AVX-NEXT: shll $3, %edx
2879; AVX-NEXT: leal (%rax,%rax), %esi
2880; AVX-NEXT: andl $256, %esi # imm = 0x100
2881; AVX-NEXT: orl %edx, %esi
2882; AVX-NEXT: movl %eax, %edx
2883; AVX-NEXT: shrl %edx
2884; AVX-NEXT: andl $128, %edx
2885; AVX-NEXT: orl %esi, %edx
2886; AVX-NEXT: movl %eax, %esi
2887; AVX-NEXT: shrl $3, %esi
2888; AVX-NEXT: andl $64, %esi
2889; AVX-NEXT: orl %edx, %esi
2890; AVX-NEXT: movl %eax, %edx
2891; AVX-NEXT: shrl $5, %edx
2892; AVX-NEXT: andl $32, %edx
2893; AVX-NEXT: orl %esi, %edx
2894; AVX-NEXT: movl %eax, %esi
2895; AVX-NEXT: shrl $7, %esi
2896; AVX-NEXT: andl $16, %esi
2897; AVX-NEXT: orl %edx, %esi
2898; AVX-NEXT: movl %eax, %edx
2899; AVX-NEXT: shrl $9, %edx
2900; AVX-NEXT: andl $8, %edx
2901; AVX-NEXT: orl %esi, %edx
2902; AVX-NEXT: movl %eax, %esi
2903; AVX-NEXT: shrl $11, %esi
2904; AVX-NEXT: andl $4, %esi
2905; AVX-NEXT: orl %edx, %esi
2906; AVX-NEXT: movl %eax, %edx
2907; AVX-NEXT: shrl $13, %edx
2908; AVX-NEXT: andl $2, %edx
2909; AVX-NEXT: orl %esi, %edx
2910; AVX-NEXT: shrl $15, %eax
2911; AVX-NEXT: orl %edx, %eax
2912; AVX-NEXT: orl %ecx, %eax
2913; AVX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
2914; AVX-NEXT: vpextrw $6, %xmm0, %eax
2915; AVX-NEXT: movl %eax, %ecx
2916; AVX-NEXT: shll $15, %ecx
2917; AVX-NEXT: movl %eax, %edx
2918; AVX-NEXT: andl $2, %edx
2919; AVX-NEXT: shll $13, %edx
2920; AVX-NEXT: orl %ecx, %edx
2921; AVX-NEXT: movl %eax, %ecx
2922; AVX-NEXT: andl $4, %ecx
2923; AVX-NEXT: shll $11, %ecx
2924; AVX-NEXT: orl %edx, %ecx
2925; AVX-NEXT: movl %eax, %edx
2926; AVX-NEXT: andl $8, %edx
2927; AVX-NEXT: shll $9, %edx
2928; AVX-NEXT: orl %ecx, %edx
2929; AVX-NEXT: movl %eax, %esi
2930; AVX-NEXT: andl $16, %esi
2931; AVX-NEXT: shll $7, %esi
2932; AVX-NEXT: orl %edx, %esi
2933; AVX-NEXT: movl %eax, %ecx
2934; AVX-NEXT: andl $32, %ecx
2935; AVX-NEXT: shll $5, %ecx
2936; AVX-NEXT: orl %esi, %ecx
2937; AVX-NEXT: movl %eax, %edx
2938; AVX-NEXT: andl $64, %edx
2939; AVX-NEXT: shll $3, %edx
2940; AVX-NEXT: leal (%rax,%rax), %esi
2941; AVX-NEXT: andl $256, %esi # imm = 0x100
2942; AVX-NEXT: orl %edx, %esi
2943; AVX-NEXT: movl %eax, %edx
2944; AVX-NEXT: shrl %edx
2945; AVX-NEXT: andl $128, %edx
2946; AVX-NEXT: orl %esi, %edx
2947; AVX-NEXT: movl %eax, %esi
2948; AVX-NEXT: shrl $3, %esi
2949; AVX-NEXT: andl $64, %esi
2950; AVX-NEXT: orl %edx, %esi
2951; AVX-NEXT: movl %eax, %edx
2952; AVX-NEXT: shrl $5, %edx
2953; AVX-NEXT: andl $32, %edx
2954; AVX-NEXT: orl %esi, %edx
2955; AVX-NEXT: movl %eax, %esi
2956; AVX-NEXT: shrl $7, %esi
2957; AVX-NEXT: andl $16, %esi
2958; AVX-NEXT: orl %edx, %esi
2959; AVX-NEXT: movl %eax, %edx
2960; AVX-NEXT: shrl $9, %edx
2961; AVX-NEXT: andl $8, %edx
2962; AVX-NEXT: orl %esi, %edx
2963; AVX-NEXT: movl %eax, %esi
2964; AVX-NEXT: shrl $11, %esi
2965; AVX-NEXT: andl $4, %esi
2966; AVX-NEXT: orl %edx, %esi
2967; AVX-NEXT: movl %eax, %edx
2968; AVX-NEXT: shrl $13, %edx
2969; AVX-NEXT: andl $2, %edx
2970; AVX-NEXT: orl %esi, %edx
2971; AVX-NEXT: shrl $15, %eax
2972; AVX-NEXT: orl %edx, %eax
2973; AVX-NEXT: orl %ecx, %eax
2974; AVX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
2975; AVX-NEXT: vpextrw $7, %xmm0, %eax
2976; AVX-NEXT: movl %eax, %ecx
2977; AVX-NEXT: shll $15, %ecx
2978; AVX-NEXT: movl %eax, %edx
2979; AVX-NEXT: andl $2, %edx
2980; AVX-NEXT: shll $13, %edx
2981; AVX-NEXT: orl %ecx, %edx
2982; AVX-NEXT: movl %eax, %ecx
2983; AVX-NEXT: andl $4, %ecx
2984; AVX-NEXT: shll $11, %ecx
2985; AVX-NEXT: orl %edx, %ecx
2986; AVX-NEXT: movl %eax, %edx
2987; AVX-NEXT: andl $8, %edx
2988; AVX-NEXT: shll $9, %edx
2989; AVX-NEXT: orl %ecx, %edx
2990; AVX-NEXT: movl %eax, %esi
2991; AVX-NEXT: andl $16, %esi
2992; AVX-NEXT: shll $7, %esi
2993; AVX-NEXT: orl %edx, %esi
2994; AVX-NEXT: movl %eax, %ecx
2995; AVX-NEXT: andl $32, %ecx
2996; AVX-NEXT: shll $5, %ecx
2997; AVX-NEXT: orl %esi, %ecx
2998; AVX-NEXT: movl %eax, %edx
2999; AVX-NEXT: andl $64, %edx
3000; AVX-NEXT: shll $3, %edx
3001; AVX-NEXT: leal (%rax,%rax), %esi
3002; AVX-NEXT: andl $256, %esi # imm = 0x100
3003; AVX-NEXT: orl %edx, %esi
3004; AVX-NEXT: movl %eax, %edx
3005; AVX-NEXT: shrl %edx
3006; AVX-NEXT: andl $128, %edx
3007; AVX-NEXT: orl %esi, %edx
3008; AVX-NEXT: movl %eax, %esi
3009; AVX-NEXT: shrl $3, %esi
3010; AVX-NEXT: andl $64, %esi
3011; AVX-NEXT: orl %edx, %esi
3012; AVX-NEXT: movl %eax, %edx
3013; AVX-NEXT: shrl $5, %edx
3014; AVX-NEXT: andl $32, %edx
3015; AVX-NEXT: orl %esi, %edx
3016; AVX-NEXT: movl %eax, %esi
3017; AVX-NEXT: shrl $7, %esi
3018; AVX-NEXT: andl $16, %esi
3019; AVX-NEXT: orl %edx, %esi
3020; AVX-NEXT: movl %eax, %edx
3021; AVX-NEXT: shrl $9, %edx
3022; AVX-NEXT: andl $8, %edx
3023; AVX-NEXT: orl %esi, %edx
3024; AVX-NEXT: movl %eax, %esi
3025; AVX-NEXT: shrl $11, %esi
3026; AVX-NEXT: andl $4, %esi
3027; AVX-NEXT: orl %edx, %esi
3028; AVX-NEXT: movl %eax, %edx
3029; AVX-NEXT: shrl $13, %edx
3030; AVX-NEXT: andl $2, %edx
3031; AVX-NEXT: orl %esi, %edx
3032; AVX-NEXT: shrl $15, %eax
3033; AVX-NEXT: orl %edx, %eax
3034; AVX-NEXT: orl %ecx, %eax
3035; AVX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm0
3036; AVX-NEXT: retq
3037;
3038; XOP-LABEL: test_bitreverse_v8i16:
3039; XOP: # BB#0:
3040; XOP-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0
3041; XOP-NEXT: retq
Simon Pilgrimb87ffe82016-03-30 14:14:00 +00003042 %b = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a)
3043 ret <8 x i16> %b
3044}
3045
Simon Pilgrim2d0104c2016-04-24 15:45:06 +00003046define <4 x i32> @test_bitreverse_v4i32(<4 x i32> %a) nounwind {
3047; SSE-LABEL: test_bitreverse_v4i32:
3048; SSE: # BB#0:
3049; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
3050; SSE-NEXT: movd %xmm1, %eax
3051; SSE-NEXT: movl %eax, %ecx
3052; SSE-NEXT: shll $31, %ecx
3053; SSE-NEXT: movl %eax, %edx
3054; SSE-NEXT: andl $2, %edx
3055; SSE-NEXT: shll $29, %edx
3056; SSE-NEXT: orl %ecx, %edx
3057; SSE-NEXT: movl %eax, %ecx
3058; SSE-NEXT: andl $4, %ecx
3059; SSE-NEXT: shll $27, %ecx
3060; SSE-NEXT: orl %edx, %ecx
3061; SSE-NEXT: movl %eax, %edx
3062; SSE-NEXT: andl $8, %edx
3063; SSE-NEXT: shll $25, %edx
3064; SSE-NEXT: orl %ecx, %edx
3065; SSE-NEXT: movl %eax, %esi
3066; SSE-NEXT: andl $16, %esi
3067; SSE-NEXT: shll $23, %esi
3068; SSE-NEXT: orl %edx, %esi
3069; SSE-NEXT: movl %eax, %ecx
3070; SSE-NEXT: andl $32, %ecx
3071; SSE-NEXT: shll $21, %ecx
3072; SSE-NEXT: orl %esi, %ecx
3073; SSE-NEXT: movl %eax, %edx
3074; SSE-NEXT: andl $64, %edx
3075; SSE-NEXT: shll $19, %edx
3076; SSE-NEXT: movl %eax, %esi
3077; SSE-NEXT: shll $17, %esi
3078; SSE-NEXT: andl $16777216, %esi # imm = 0x1000000
3079; SSE-NEXT: orl %edx, %esi
3080; SSE-NEXT: movl %eax, %edx
3081; SSE-NEXT: shll $15, %edx
3082; SSE-NEXT: andl $8388608, %edx # imm = 0x800000
3083; SSE-NEXT: orl %esi, %edx
3084; SSE-NEXT: movl %eax, %esi
3085; SSE-NEXT: shll $13, %esi
3086; SSE-NEXT: andl $4194304, %esi # imm = 0x400000
3087; SSE-NEXT: orl %edx, %esi
3088; SSE-NEXT: movl %eax, %edx
3089; SSE-NEXT: shll $11, %edx
3090; SSE-NEXT: andl $2097152, %edx # imm = 0x200000
3091; SSE-NEXT: orl %esi, %edx
3092; SSE-NEXT: movl %eax, %esi
3093; SSE-NEXT: shll $9, %esi
3094; SSE-NEXT: andl $1048576, %esi # imm = 0x100000
3095; SSE-NEXT: orl %edx, %esi
3096; SSE-NEXT: movl %eax, %edx
3097; SSE-NEXT: shll $7, %edx
3098; SSE-NEXT: andl $524288, %edx # imm = 0x80000
3099; SSE-NEXT: orl %esi, %edx
3100; SSE-NEXT: movl %eax, %esi
3101; SSE-NEXT: shll $5, %esi
3102; SSE-NEXT: andl $262144, %esi # imm = 0x40000
3103; SSE-NEXT: orl %edx, %esi
3104; SSE-NEXT: leal (,%rax,8), %edx
3105; SSE-NEXT: andl $131072, %edx # imm = 0x20000
3106; SSE-NEXT: orl %esi, %edx
3107; SSE-NEXT: leal (%rax,%rax), %esi
3108; SSE-NEXT: andl $65536, %esi # imm = 0x10000
3109; SSE-NEXT: orl %edx, %esi
3110; SSE-NEXT: movl %eax, %edx
3111; SSE-NEXT: shrl %edx
3112; SSE-NEXT: andl $32768, %edx # imm = 0x8000
3113; SSE-NEXT: orl %esi, %edx
3114; SSE-NEXT: movl %eax, %esi
3115; SSE-NEXT: shrl $3, %esi
3116; SSE-NEXT: andl $16384, %esi # imm = 0x4000
3117; SSE-NEXT: orl %edx, %esi
3118; SSE-NEXT: movl %eax, %edx
3119; SSE-NEXT: shrl $5, %edx
3120; SSE-NEXT: andl $8192, %edx # imm = 0x2000
3121; SSE-NEXT: orl %esi, %edx
3122; SSE-NEXT: movl %eax, %esi
3123; SSE-NEXT: shrl $7, %esi
3124; SSE-NEXT: andl $4096, %esi # imm = 0x1000
3125; SSE-NEXT: orl %edx, %esi
3126; SSE-NEXT: movl %eax, %edx
3127; SSE-NEXT: shrl $9, %edx
3128; SSE-NEXT: andl $2048, %edx # imm = 0x800
3129; SSE-NEXT: orl %esi, %edx
3130; SSE-NEXT: movl %eax, %esi
3131; SSE-NEXT: shrl $11, %esi
3132; SSE-NEXT: andl $1024, %esi # imm = 0x400
3133; SSE-NEXT: orl %edx, %esi
3134; SSE-NEXT: movl %eax, %edx
3135; SSE-NEXT: shrl $13, %edx
3136; SSE-NEXT: andl $512, %edx # imm = 0x200
3137; SSE-NEXT: orl %esi, %edx
3138; SSE-NEXT: movl %eax, %esi
3139; SSE-NEXT: shrl $15, %esi
3140; SSE-NEXT: andl $256, %esi # imm = 0x100
3141; SSE-NEXT: orl %edx, %esi
3142; SSE-NEXT: movl %eax, %edx
3143; SSE-NEXT: shrl $17, %edx
3144; SSE-NEXT: andl $128, %edx
3145; SSE-NEXT: orl %esi, %edx
3146; SSE-NEXT: movl %eax, %esi
3147; SSE-NEXT: shrl $19, %esi
3148; SSE-NEXT: andl $64, %esi
3149; SSE-NEXT: orl %edx, %esi
3150; SSE-NEXT: movl %eax, %edx
3151; SSE-NEXT: shrl $21, %edx
3152; SSE-NEXT: andl $32, %edx
3153; SSE-NEXT: orl %esi, %edx
3154; SSE-NEXT: movl %eax, %esi
3155; SSE-NEXT: shrl $23, %esi
3156; SSE-NEXT: andl $16, %esi
3157; SSE-NEXT: orl %edx, %esi
3158; SSE-NEXT: movl %eax, %edx
3159; SSE-NEXT: shrl $25, %edx
3160; SSE-NEXT: andl $8, %edx
3161; SSE-NEXT: orl %esi, %edx
3162; SSE-NEXT: movl %eax, %esi
3163; SSE-NEXT: shrl $27, %esi
3164; SSE-NEXT: andl $4, %esi
3165; SSE-NEXT: orl %edx, %esi
3166; SSE-NEXT: movl %eax, %edx
3167; SSE-NEXT: shrl $29, %edx
3168; SSE-NEXT: andl $2, %edx
3169; SSE-NEXT: orl %esi, %edx
3170; SSE-NEXT: shrl $31, %eax
3171; SSE-NEXT: orl %edx, %eax
3172; SSE-NEXT: orl %ecx, %eax
3173; SSE-NEXT: movd %eax, %xmm1
3174; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
3175; SSE-NEXT: movd %xmm2, %eax
3176; SSE-NEXT: movl %eax, %ecx
3177; SSE-NEXT: shll $31, %ecx
3178; SSE-NEXT: movl %eax, %edx
3179; SSE-NEXT: andl $2, %edx
3180; SSE-NEXT: shll $29, %edx
3181; SSE-NEXT: orl %ecx, %edx
3182; SSE-NEXT: movl %eax, %ecx
3183; SSE-NEXT: andl $4, %ecx
3184; SSE-NEXT: shll $27, %ecx
3185; SSE-NEXT: orl %edx, %ecx
3186; SSE-NEXT: movl %eax, %edx
3187; SSE-NEXT: andl $8, %edx
3188; SSE-NEXT: shll $25, %edx
3189; SSE-NEXT: orl %ecx, %edx
3190; SSE-NEXT: movl %eax, %esi
3191; SSE-NEXT: andl $16, %esi
3192; SSE-NEXT: shll $23, %esi
3193; SSE-NEXT: orl %edx, %esi
3194; SSE-NEXT: movl %eax, %ecx
3195; SSE-NEXT: andl $32, %ecx
3196; SSE-NEXT: shll $21, %ecx
3197; SSE-NEXT: orl %esi, %ecx
3198; SSE-NEXT: movl %eax, %edx
3199; SSE-NEXT: andl $64, %edx
3200; SSE-NEXT: shll $19, %edx
3201; SSE-NEXT: movl %eax, %esi
3202; SSE-NEXT: shll $17, %esi
3203; SSE-NEXT: andl $16777216, %esi # imm = 0x1000000
3204; SSE-NEXT: orl %edx, %esi
3205; SSE-NEXT: movl %eax, %edx
3206; SSE-NEXT: shll $15, %edx
3207; SSE-NEXT: andl $8388608, %edx # imm = 0x800000
3208; SSE-NEXT: orl %esi, %edx
3209; SSE-NEXT: movl %eax, %esi
3210; SSE-NEXT: shll $13, %esi
3211; SSE-NEXT: andl $4194304, %esi # imm = 0x400000
3212; SSE-NEXT: orl %edx, %esi
3213; SSE-NEXT: movl %eax, %edx
3214; SSE-NEXT: shll $11, %edx
3215; SSE-NEXT: andl $2097152, %edx # imm = 0x200000
3216; SSE-NEXT: orl %esi, %edx
3217; SSE-NEXT: movl %eax, %esi
3218; SSE-NEXT: shll $9, %esi
3219; SSE-NEXT: andl $1048576, %esi # imm = 0x100000
3220; SSE-NEXT: orl %edx, %esi
3221; SSE-NEXT: movl %eax, %edx
3222; SSE-NEXT: shll $7, %edx
3223; SSE-NEXT: andl $524288, %edx # imm = 0x80000
3224; SSE-NEXT: orl %esi, %edx
3225; SSE-NEXT: movl %eax, %esi
3226; SSE-NEXT: shll $5, %esi
3227; SSE-NEXT: andl $262144, %esi # imm = 0x40000
3228; SSE-NEXT: orl %edx, %esi
3229; SSE-NEXT: leal (,%rax,8), %edx
3230; SSE-NEXT: andl $131072, %edx # imm = 0x20000
3231; SSE-NEXT: orl %esi, %edx
3232; SSE-NEXT: leal (%rax,%rax), %esi
3233; SSE-NEXT: andl $65536, %esi # imm = 0x10000
3234; SSE-NEXT: orl %edx, %esi
3235; SSE-NEXT: movl %eax, %edx
3236; SSE-NEXT: shrl %edx
3237; SSE-NEXT: andl $32768, %edx # imm = 0x8000
3238; SSE-NEXT: orl %esi, %edx
3239; SSE-NEXT: movl %eax, %esi
3240; SSE-NEXT: shrl $3, %esi
3241; SSE-NEXT: andl $16384, %esi # imm = 0x4000
3242; SSE-NEXT: orl %edx, %esi
3243; SSE-NEXT: movl %eax, %edx
3244; SSE-NEXT: shrl $5, %edx
3245; SSE-NEXT: andl $8192, %edx # imm = 0x2000
3246; SSE-NEXT: orl %esi, %edx
3247; SSE-NEXT: movl %eax, %esi
3248; SSE-NEXT: shrl $7, %esi
3249; SSE-NEXT: andl $4096, %esi # imm = 0x1000
3250; SSE-NEXT: orl %edx, %esi
3251; SSE-NEXT: movl %eax, %edx
3252; SSE-NEXT: shrl $9, %edx
3253; SSE-NEXT: andl $2048, %edx # imm = 0x800
3254; SSE-NEXT: orl %esi, %edx
3255; SSE-NEXT: movl %eax, %esi
3256; SSE-NEXT: shrl $11, %esi
3257; SSE-NEXT: andl $1024, %esi # imm = 0x400
3258; SSE-NEXT: orl %edx, %esi
3259; SSE-NEXT: movl %eax, %edx
3260; SSE-NEXT: shrl $13, %edx
3261; SSE-NEXT: andl $512, %edx # imm = 0x200
3262; SSE-NEXT: orl %esi, %edx
3263; SSE-NEXT: movl %eax, %esi
3264; SSE-NEXT: shrl $15, %esi
3265; SSE-NEXT: andl $256, %esi # imm = 0x100
3266; SSE-NEXT: orl %edx, %esi
3267; SSE-NEXT: movl %eax, %edx
3268; SSE-NEXT: shrl $17, %edx
3269; SSE-NEXT: andl $128, %edx
3270; SSE-NEXT: orl %esi, %edx
3271; SSE-NEXT: movl %eax, %esi
3272; SSE-NEXT: shrl $19, %esi
3273; SSE-NEXT: andl $64, %esi
3274; SSE-NEXT: orl %edx, %esi
3275; SSE-NEXT: movl %eax, %edx
3276; SSE-NEXT: shrl $21, %edx
3277; SSE-NEXT: andl $32, %edx
3278; SSE-NEXT: orl %esi, %edx
3279; SSE-NEXT: movl %eax, %esi
3280; SSE-NEXT: shrl $23, %esi
3281; SSE-NEXT: andl $16, %esi
3282; SSE-NEXT: orl %edx, %esi
3283; SSE-NEXT: movl %eax, %edx
3284; SSE-NEXT: shrl $25, %edx
3285; SSE-NEXT: andl $8, %edx
3286; SSE-NEXT: orl %esi, %edx
3287; SSE-NEXT: movl %eax, %esi
3288; SSE-NEXT: shrl $27, %esi
3289; SSE-NEXT: andl $4, %esi
3290; SSE-NEXT: orl %edx, %esi
3291; SSE-NEXT: movl %eax, %edx
3292; SSE-NEXT: shrl $29, %edx
3293; SSE-NEXT: andl $2, %edx
3294; SSE-NEXT: orl %esi, %edx
3295; SSE-NEXT: shrl $31, %eax
3296; SSE-NEXT: orl %edx, %eax
3297; SSE-NEXT: orl %ecx, %eax
3298; SSE-NEXT: movd %eax, %xmm2
3299; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
3300; SSE-NEXT: movd %xmm0, %eax
3301; SSE-NEXT: movl %eax, %ecx
3302; SSE-NEXT: shll $31, %ecx
3303; SSE-NEXT: movl %eax, %edx
3304; SSE-NEXT: andl $2, %edx
3305; SSE-NEXT: shll $29, %edx
3306; SSE-NEXT: orl %ecx, %edx
3307; SSE-NEXT: movl %eax, %ecx
3308; SSE-NEXT: andl $4, %ecx
3309; SSE-NEXT: shll $27, %ecx
3310; SSE-NEXT: orl %edx, %ecx
3311; SSE-NEXT: movl %eax, %edx
3312; SSE-NEXT: andl $8, %edx
3313; SSE-NEXT: shll $25, %edx
3314; SSE-NEXT: orl %ecx, %edx
3315; SSE-NEXT: movl %eax, %esi
3316; SSE-NEXT: andl $16, %esi
3317; SSE-NEXT: shll $23, %esi
3318; SSE-NEXT: orl %edx, %esi
3319; SSE-NEXT: movl %eax, %ecx
3320; SSE-NEXT: andl $32, %ecx
3321; SSE-NEXT: shll $21, %ecx
3322; SSE-NEXT: orl %esi, %ecx
3323; SSE-NEXT: movl %eax, %edx
3324; SSE-NEXT: andl $64, %edx
3325; SSE-NEXT: shll $19, %edx
3326; SSE-NEXT: movl %eax, %esi
3327; SSE-NEXT: shll $17, %esi
3328; SSE-NEXT: andl $16777216, %esi # imm = 0x1000000
3329; SSE-NEXT: orl %edx, %esi
3330; SSE-NEXT: movl %eax, %edx
3331; SSE-NEXT: shll $15, %edx
3332; SSE-NEXT: andl $8388608, %edx # imm = 0x800000
3333; SSE-NEXT: orl %esi, %edx
3334; SSE-NEXT: movl %eax, %esi
3335; SSE-NEXT: shll $13, %esi
3336; SSE-NEXT: andl $4194304, %esi # imm = 0x400000
3337; SSE-NEXT: orl %edx, %esi
3338; SSE-NEXT: movl %eax, %edx
3339; SSE-NEXT: shll $11, %edx
3340; SSE-NEXT: andl $2097152, %edx # imm = 0x200000
3341; SSE-NEXT: orl %esi, %edx
3342; SSE-NEXT: movl %eax, %esi
3343; SSE-NEXT: shll $9, %esi
3344; SSE-NEXT: andl $1048576, %esi # imm = 0x100000
3345; SSE-NEXT: orl %edx, %esi
3346; SSE-NEXT: movl %eax, %edx
3347; SSE-NEXT: shll $7, %edx
3348; SSE-NEXT: andl $524288, %edx # imm = 0x80000
3349; SSE-NEXT: orl %esi, %edx
3350; SSE-NEXT: movl %eax, %esi
3351; SSE-NEXT: shll $5, %esi
3352; SSE-NEXT: andl $262144, %esi # imm = 0x40000
3353; SSE-NEXT: orl %edx, %esi
3354; SSE-NEXT: leal (,%rax,8), %edx
3355; SSE-NEXT: andl $131072, %edx # imm = 0x20000
3356; SSE-NEXT: orl %esi, %edx
3357; SSE-NEXT: leal (%rax,%rax), %esi
3358; SSE-NEXT: andl $65536, %esi # imm = 0x10000
3359; SSE-NEXT: orl %edx, %esi
3360; SSE-NEXT: movl %eax, %edx
3361; SSE-NEXT: shrl %edx
3362; SSE-NEXT: andl $32768, %edx # imm = 0x8000
3363; SSE-NEXT: orl %esi, %edx
3364; SSE-NEXT: movl %eax, %esi
3365; SSE-NEXT: shrl $3, %esi
3366; SSE-NEXT: andl $16384, %esi # imm = 0x4000
3367; SSE-NEXT: orl %edx, %esi
3368; SSE-NEXT: movl %eax, %edx
3369; SSE-NEXT: shrl $5, %edx
3370; SSE-NEXT: andl $8192, %edx # imm = 0x2000
3371; SSE-NEXT: orl %esi, %edx
3372; SSE-NEXT: movl %eax, %esi
3373; SSE-NEXT: shrl $7, %esi
3374; SSE-NEXT: andl $4096, %esi # imm = 0x1000
3375; SSE-NEXT: orl %edx, %esi
3376; SSE-NEXT: movl %eax, %edx
3377; SSE-NEXT: shrl $9, %edx
3378; SSE-NEXT: andl $2048, %edx # imm = 0x800
3379; SSE-NEXT: orl %esi, %edx
3380; SSE-NEXT: movl %eax, %esi
3381; SSE-NEXT: shrl $11, %esi
3382; SSE-NEXT: andl $1024, %esi # imm = 0x400
3383; SSE-NEXT: orl %edx, %esi
3384; SSE-NEXT: movl %eax, %edx
3385; SSE-NEXT: shrl $13, %edx
3386; SSE-NEXT: andl $512, %edx # imm = 0x200
3387; SSE-NEXT: orl %esi, %edx
3388; SSE-NEXT: movl %eax, %esi
3389; SSE-NEXT: shrl $15, %esi
3390; SSE-NEXT: andl $256, %esi # imm = 0x100
3391; SSE-NEXT: orl %edx, %esi
3392; SSE-NEXT: movl %eax, %edx
3393; SSE-NEXT: shrl $17, %edx
3394; SSE-NEXT: andl $128, %edx
3395; SSE-NEXT: orl %esi, %edx
3396; SSE-NEXT: movl %eax, %esi
3397; SSE-NEXT: shrl $19, %esi
3398; SSE-NEXT: andl $64, %esi
3399; SSE-NEXT: orl %edx, %esi
3400; SSE-NEXT: movl %eax, %edx
3401; SSE-NEXT: shrl $21, %edx
3402; SSE-NEXT: andl $32, %edx
3403; SSE-NEXT: orl %esi, %edx
3404; SSE-NEXT: movl %eax, %esi
3405; SSE-NEXT: shrl $23, %esi
3406; SSE-NEXT: andl $16, %esi
3407; SSE-NEXT: orl %edx, %esi
3408; SSE-NEXT: movl %eax, %edx
3409; SSE-NEXT: shrl $25, %edx
3410; SSE-NEXT: andl $8, %edx
3411; SSE-NEXT: orl %esi, %edx
3412; SSE-NEXT: movl %eax, %esi
3413; SSE-NEXT: shrl $27, %esi
3414; SSE-NEXT: andl $4, %esi
3415; SSE-NEXT: orl %edx, %esi
3416; SSE-NEXT: movl %eax, %edx
3417; SSE-NEXT: shrl $29, %edx
3418; SSE-NEXT: andl $2, %edx
3419; SSE-NEXT: orl %esi, %edx
3420; SSE-NEXT: shrl $31, %eax
3421; SSE-NEXT: orl %edx, %eax
3422; SSE-NEXT: orl %ecx, %eax
3423; SSE-NEXT: movd %eax, %xmm1
3424; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
3425; SSE-NEXT: movd %xmm0, %eax
3426; SSE-NEXT: movl %eax, %ecx
3427; SSE-NEXT: shll $31, %ecx
3428; SSE-NEXT: movl %eax, %edx
3429; SSE-NEXT: andl $2, %edx
3430; SSE-NEXT: shll $29, %edx
3431; SSE-NEXT: orl %ecx, %edx
3432; SSE-NEXT: movl %eax, %ecx
3433; SSE-NEXT: andl $4, %ecx
3434; SSE-NEXT: shll $27, %ecx
3435; SSE-NEXT: orl %edx, %ecx
3436; SSE-NEXT: movl %eax, %edx
3437; SSE-NEXT: andl $8, %edx
3438; SSE-NEXT: shll $25, %edx
3439; SSE-NEXT: orl %ecx, %edx
3440; SSE-NEXT: movl %eax, %esi
3441; SSE-NEXT: andl $16, %esi
3442; SSE-NEXT: shll $23, %esi
3443; SSE-NEXT: orl %edx, %esi
3444; SSE-NEXT: movl %eax, %ecx
3445; SSE-NEXT: andl $32, %ecx
3446; SSE-NEXT: shll $21, %ecx
3447; SSE-NEXT: orl %esi, %ecx
3448; SSE-NEXT: movl %eax, %edx
3449; SSE-NEXT: andl $64, %edx
3450; SSE-NEXT: shll $19, %edx
3451; SSE-NEXT: movl %eax, %esi
3452; SSE-NEXT: shll $17, %esi
3453; SSE-NEXT: andl $16777216, %esi # imm = 0x1000000
3454; SSE-NEXT: orl %edx, %esi
3455; SSE-NEXT: movl %eax, %edx
3456; SSE-NEXT: shll $15, %edx
3457; SSE-NEXT: andl $8388608, %edx # imm = 0x800000
3458; SSE-NEXT: orl %esi, %edx
3459; SSE-NEXT: movl %eax, %esi
3460; SSE-NEXT: shll $13, %esi
3461; SSE-NEXT: andl $4194304, %esi # imm = 0x400000
3462; SSE-NEXT: orl %edx, %esi
3463; SSE-NEXT: movl %eax, %edx
3464; SSE-NEXT: shll $11, %edx
3465; SSE-NEXT: andl $2097152, %edx # imm = 0x200000
3466; SSE-NEXT: orl %esi, %edx
3467; SSE-NEXT: movl %eax, %esi
3468; SSE-NEXT: shll $9, %esi
3469; SSE-NEXT: andl $1048576, %esi # imm = 0x100000
3470; SSE-NEXT: orl %edx, %esi
3471; SSE-NEXT: movl %eax, %edx
3472; SSE-NEXT: shll $7, %edx
3473; SSE-NEXT: andl $524288, %edx # imm = 0x80000
3474; SSE-NEXT: orl %esi, %edx
3475; SSE-NEXT: movl %eax, %esi
3476; SSE-NEXT: shll $5, %esi
3477; SSE-NEXT: andl $262144, %esi # imm = 0x40000
3478; SSE-NEXT: orl %edx, %esi
3479; SSE-NEXT: leal (,%rax,8), %edx
3480; SSE-NEXT: andl $131072, %edx # imm = 0x20000
3481; SSE-NEXT: orl %esi, %edx
3482; SSE-NEXT: leal (%rax,%rax), %esi
3483; SSE-NEXT: andl $65536, %esi # imm = 0x10000
3484; SSE-NEXT: orl %edx, %esi
3485; SSE-NEXT: movl %eax, %edx
3486; SSE-NEXT: shrl %edx
3487; SSE-NEXT: andl $32768, %edx # imm = 0x8000
3488; SSE-NEXT: orl %esi, %edx
3489; SSE-NEXT: movl %eax, %esi
3490; SSE-NEXT: shrl $3, %esi
3491; SSE-NEXT: andl $16384, %esi # imm = 0x4000
3492; SSE-NEXT: orl %edx, %esi
3493; SSE-NEXT: movl %eax, %edx
3494; SSE-NEXT: shrl $5, %edx
3495; SSE-NEXT: andl $8192, %edx # imm = 0x2000
3496; SSE-NEXT: orl %esi, %edx
3497; SSE-NEXT: movl %eax, %esi
3498; SSE-NEXT: shrl $7, %esi
3499; SSE-NEXT: andl $4096, %esi # imm = 0x1000
3500; SSE-NEXT: orl %edx, %esi
3501; SSE-NEXT: movl %eax, %edx
3502; SSE-NEXT: shrl $9, %edx
3503; SSE-NEXT: andl $2048, %edx # imm = 0x800
3504; SSE-NEXT: orl %esi, %edx
3505; SSE-NEXT: movl %eax, %esi
3506; SSE-NEXT: shrl $11, %esi
3507; SSE-NEXT: andl $1024, %esi # imm = 0x400
3508; SSE-NEXT: orl %edx, %esi
3509; SSE-NEXT: movl %eax, %edx
3510; SSE-NEXT: shrl $13, %edx
3511; SSE-NEXT: andl $512, %edx # imm = 0x200
3512; SSE-NEXT: orl %esi, %edx
3513; SSE-NEXT: movl %eax, %esi
3514; SSE-NEXT: shrl $15, %esi
3515; SSE-NEXT: andl $256, %esi # imm = 0x100
3516; SSE-NEXT: orl %edx, %esi
3517; SSE-NEXT: movl %eax, %edx
3518; SSE-NEXT: shrl $17, %edx
3519; SSE-NEXT: andl $128, %edx
3520; SSE-NEXT: orl %esi, %edx
3521; SSE-NEXT: movl %eax, %esi
3522; SSE-NEXT: shrl $19, %esi
3523; SSE-NEXT: andl $64, %esi
3524; SSE-NEXT: orl %edx, %esi
3525; SSE-NEXT: movl %eax, %edx
3526; SSE-NEXT: shrl $21, %edx
3527; SSE-NEXT: andl $32, %edx
3528; SSE-NEXT: orl %esi, %edx
3529; SSE-NEXT: movl %eax, %esi
3530; SSE-NEXT: shrl $23, %esi
3531; SSE-NEXT: andl $16, %esi
3532; SSE-NEXT: orl %edx, %esi
3533; SSE-NEXT: movl %eax, %edx
3534; SSE-NEXT: shrl $25, %edx
3535; SSE-NEXT: andl $8, %edx
3536; SSE-NEXT: orl %esi, %edx
3537; SSE-NEXT: movl %eax, %esi
3538; SSE-NEXT: shrl $27, %esi
3539; SSE-NEXT: andl $4, %esi
3540; SSE-NEXT: orl %edx, %esi
3541; SSE-NEXT: movl %eax, %edx
3542; SSE-NEXT: shrl $29, %edx
3543; SSE-NEXT: andl $2, %edx
3544; SSE-NEXT: orl %esi, %edx
3545; SSE-NEXT: shrl $31, %eax
3546; SSE-NEXT: orl %edx, %eax
3547; SSE-NEXT: orl %ecx, %eax
3548; SSE-NEXT: movd %eax, %xmm0
3549; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
3550; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3551; SSE-NEXT: movdqa %xmm1, %xmm0
3552; SSE-NEXT: retq
3553;
3554; AVX-LABEL: test_bitreverse_v4i32:
3555; AVX: # BB#0:
3556; AVX-NEXT: vpextrd $1, %xmm0, %eax
3557; AVX-NEXT: movl %eax, %ecx
3558; AVX-NEXT: shll $31, %ecx
3559; AVX-NEXT: movl %eax, %edx
3560; AVX-NEXT: andl $2, %edx
3561; AVX-NEXT: shll $29, %edx
3562; AVX-NEXT: orl %ecx, %edx
3563; AVX-NEXT: movl %eax, %ecx
3564; AVX-NEXT: andl $4, %ecx
3565; AVX-NEXT: shll $27, %ecx
3566; AVX-NEXT: orl %edx, %ecx
3567; AVX-NEXT: movl %eax, %edx
3568; AVX-NEXT: andl $8, %edx
3569; AVX-NEXT: shll $25, %edx
3570; AVX-NEXT: orl %ecx, %edx
3571; AVX-NEXT: movl %eax, %esi
3572; AVX-NEXT: andl $16, %esi
3573; AVX-NEXT: shll $23, %esi
3574; AVX-NEXT: orl %edx, %esi
3575; AVX-NEXT: movl %eax, %ecx
3576; AVX-NEXT: andl $32, %ecx
3577; AVX-NEXT: shll $21, %ecx
3578; AVX-NEXT: orl %esi, %ecx
3579; AVX-NEXT: movl %eax, %edx
3580; AVX-NEXT: andl $64, %edx
3581; AVX-NEXT: shll $19, %edx
3582; AVX-NEXT: movl %eax, %esi
3583; AVX-NEXT: shll $17, %esi
3584; AVX-NEXT: andl $16777216, %esi # imm = 0x1000000
3585; AVX-NEXT: orl %edx, %esi
3586; AVX-NEXT: movl %eax, %edx
3587; AVX-NEXT: shll $15, %edx
3588; AVX-NEXT: andl $8388608, %edx # imm = 0x800000
3589; AVX-NEXT: orl %esi, %edx
3590; AVX-NEXT: movl %eax, %esi
3591; AVX-NEXT: shll $13, %esi
3592; AVX-NEXT: andl $4194304, %esi # imm = 0x400000
3593; AVX-NEXT: orl %edx, %esi
3594; AVX-NEXT: movl %eax, %edx
3595; AVX-NEXT: shll $11, %edx
3596; AVX-NEXT: andl $2097152, %edx # imm = 0x200000
3597; AVX-NEXT: orl %esi, %edx
3598; AVX-NEXT: movl %eax, %esi
3599; AVX-NEXT: shll $9, %esi
3600; AVX-NEXT: andl $1048576, %esi # imm = 0x100000
3601; AVX-NEXT: orl %edx, %esi
3602; AVX-NEXT: movl %eax, %edx
3603; AVX-NEXT: shll $7, %edx
3604; AVX-NEXT: andl $524288, %edx # imm = 0x80000
3605; AVX-NEXT: orl %esi, %edx
3606; AVX-NEXT: movl %eax, %esi
3607; AVX-NEXT: shll $5, %esi
3608; AVX-NEXT: andl $262144, %esi # imm = 0x40000
3609; AVX-NEXT: orl %edx, %esi
3610; AVX-NEXT: leal (,%rax,8), %edx
3611; AVX-NEXT: andl $131072, %edx # imm = 0x20000
3612; AVX-NEXT: orl %esi, %edx
3613; AVX-NEXT: leal (%rax,%rax), %esi
3614; AVX-NEXT: andl $65536, %esi # imm = 0x10000
3615; AVX-NEXT: orl %edx, %esi
3616; AVX-NEXT: movl %eax, %edx
3617; AVX-NEXT: shrl %edx
3618; AVX-NEXT: andl $32768, %edx # imm = 0x8000
3619; AVX-NEXT: orl %esi, %edx
3620; AVX-NEXT: movl %eax, %esi
3621; AVX-NEXT: shrl $3, %esi
3622; AVX-NEXT: andl $16384, %esi # imm = 0x4000
3623; AVX-NEXT: orl %edx, %esi
3624; AVX-NEXT: movl %eax, %edx
3625; AVX-NEXT: shrl $5, %edx
3626; AVX-NEXT: andl $8192, %edx # imm = 0x2000
3627; AVX-NEXT: orl %esi, %edx
3628; AVX-NEXT: movl %eax, %esi
3629; AVX-NEXT: shrl $7, %esi
3630; AVX-NEXT: andl $4096, %esi # imm = 0x1000
3631; AVX-NEXT: orl %edx, %esi
3632; AVX-NEXT: movl %eax, %edx
3633; AVX-NEXT: shrl $9, %edx
3634; AVX-NEXT: andl $2048, %edx # imm = 0x800
3635; AVX-NEXT: orl %esi, %edx
3636; AVX-NEXT: movl %eax, %esi
3637; AVX-NEXT: shrl $11, %esi
3638; AVX-NEXT: andl $1024, %esi # imm = 0x400
3639; AVX-NEXT: orl %edx, %esi
3640; AVX-NEXT: movl %eax, %edx
3641; AVX-NEXT: shrl $13, %edx
3642; AVX-NEXT: andl $512, %edx # imm = 0x200
3643; AVX-NEXT: orl %esi, %edx
3644; AVX-NEXT: movl %eax, %esi
3645; AVX-NEXT: shrl $15, %esi
3646; AVX-NEXT: andl $256, %esi # imm = 0x100
3647; AVX-NEXT: orl %edx, %esi
3648; AVX-NEXT: movl %eax, %edx
3649; AVX-NEXT: shrl $17, %edx
3650; AVX-NEXT: andl $128, %edx
3651; AVX-NEXT: orl %esi, %edx
3652; AVX-NEXT: movl %eax, %esi
3653; AVX-NEXT: shrl $19, %esi
3654; AVX-NEXT: andl $64, %esi
3655; AVX-NEXT: orl %edx, %esi
3656; AVX-NEXT: movl %eax, %edx
3657; AVX-NEXT: shrl $21, %edx
3658; AVX-NEXT: andl $32, %edx
3659; AVX-NEXT: orl %esi, %edx
3660; AVX-NEXT: movl %eax, %esi
3661; AVX-NEXT: shrl $23, %esi
3662; AVX-NEXT: andl $16, %esi
3663; AVX-NEXT: orl %edx, %esi
3664; AVX-NEXT: movl %eax, %edx
3665; AVX-NEXT: shrl $25, %edx
3666; AVX-NEXT: andl $8, %edx
3667; AVX-NEXT: orl %esi, %edx
3668; AVX-NEXT: movl %eax, %esi
3669; AVX-NEXT: shrl $27, %esi
3670; AVX-NEXT: andl $4, %esi
3671; AVX-NEXT: orl %edx, %esi
3672; AVX-NEXT: movl %eax, %edx
3673; AVX-NEXT: shrl $29, %edx
3674; AVX-NEXT: andl $2, %edx
3675; AVX-NEXT: orl %esi, %edx
3676; AVX-NEXT: shrl $31, %eax
3677; AVX-NEXT: orl %edx, %eax
3678; AVX-NEXT: orl %ecx, %eax
3679; AVX-NEXT: vmovd %xmm0, %ecx
3680; AVX-NEXT: movl %ecx, %edx
3681; AVX-NEXT: shll $31, %edx
3682; AVX-NEXT: movl %ecx, %esi
3683; AVX-NEXT: andl $2, %esi
3684; AVX-NEXT: shll $29, %esi
3685; AVX-NEXT: orl %edx, %esi
3686; AVX-NEXT: movl %ecx, %edx
3687; AVX-NEXT: andl $4, %edx
3688; AVX-NEXT: shll $27, %edx
3689; AVX-NEXT: orl %esi, %edx
3690; AVX-NEXT: movl %ecx, %esi
3691; AVX-NEXT: andl $8, %esi
3692; AVX-NEXT: shll $25, %esi
3693; AVX-NEXT: orl %edx, %esi
3694; AVX-NEXT: movl %ecx, %edi
3695; AVX-NEXT: andl $16, %edi
3696; AVX-NEXT: shll $23, %edi
3697; AVX-NEXT: orl %esi, %edi
3698; AVX-NEXT: movl %ecx, %edx
3699; AVX-NEXT: andl $32, %edx
3700; AVX-NEXT: shll $21, %edx
3701; AVX-NEXT: orl %edi, %edx
3702; AVX-NEXT: movl %ecx, %esi
3703; AVX-NEXT: andl $64, %esi
3704; AVX-NEXT: shll $19, %esi
3705; AVX-NEXT: movl %ecx, %edi
3706; AVX-NEXT: shll $17, %edi
3707; AVX-NEXT: andl $16777216, %edi # imm = 0x1000000
3708; AVX-NEXT: orl %esi, %edi
3709; AVX-NEXT: movl %ecx, %esi
3710; AVX-NEXT: shll $15, %esi
3711; AVX-NEXT: andl $8388608, %esi # imm = 0x800000
3712; AVX-NEXT: orl %edi, %esi
3713; AVX-NEXT: movl %ecx, %edi
3714; AVX-NEXT: shll $13, %edi
3715; AVX-NEXT: andl $4194304, %edi # imm = 0x400000
3716; AVX-NEXT: orl %esi, %edi
3717; AVX-NEXT: movl %ecx, %esi
3718; AVX-NEXT: shll $11, %esi
3719; AVX-NEXT: andl $2097152, %esi # imm = 0x200000
3720; AVX-NEXT: orl %edi, %esi
3721; AVX-NEXT: movl %ecx, %edi
3722; AVX-NEXT: shll $9, %edi
3723; AVX-NEXT: andl $1048576, %edi # imm = 0x100000
3724; AVX-NEXT: orl %esi, %edi
3725; AVX-NEXT: movl %ecx, %esi
3726; AVX-NEXT: shll $7, %esi
3727; AVX-NEXT: andl $524288, %esi # imm = 0x80000
3728; AVX-NEXT: orl %edi, %esi
3729; AVX-NEXT: movl %ecx, %edi
3730; AVX-NEXT: shll $5, %edi
3731; AVX-NEXT: andl $262144, %edi # imm = 0x40000
3732; AVX-NEXT: orl %esi, %edi
3733; AVX-NEXT: leal (,%rcx,8), %esi
3734; AVX-NEXT: andl $131072, %esi # imm = 0x20000
3735; AVX-NEXT: orl %edi, %esi
3736; AVX-NEXT: leal (%rcx,%rcx), %edi
3737; AVX-NEXT: andl $65536, %edi # imm = 0x10000
3738; AVX-NEXT: orl %esi, %edi
3739; AVX-NEXT: movl %ecx, %esi
3740; AVX-NEXT: shrl %esi
3741; AVX-NEXT: andl $32768, %esi # imm = 0x8000
3742; AVX-NEXT: orl %edi, %esi
3743; AVX-NEXT: movl %ecx, %edi
3744; AVX-NEXT: shrl $3, %edi
3745; AVX-NEXT: andl $16384, %edi # imm = 0x4000
3746; AVX-NEXT: orl %esi, %edi
3747; AVX-NEXT: movl %ecx, %esi
3748; AVX-NEXT: shrl $5, %esi
3749; AVX-NEXT: andl $8192, %esi # imm = 0x2000
3750; AVX-NEXT: orl %edi, %esi
3751; AVX-NEXT: movl %ecx, %edi
3752; AVX-NEXT: shrl $7, %edi
3753; AVX-NEXT: andl $4096, %edi # imm = 0x1000
3754; AVX-NEXT: orl %esi, %edi
3755; AVX-NEXT: movl %ecx, %esi
3756; AVX-NEXT: shrl $9, %esi
3757; AVX-NEXT: andl $2048, %esi # imm = 0x800
3758; AVX-NEXT: orl %edi, %esi
3759; AVX-NEXT: movl %ecx, %edi
3760; AVX-NEXT: shrl $11, %edi
3761; AVX-NEXT: andl $1024, %edi # imm = 0x400
3762; AVX-NEXT: orl %esi, %edi
3763; AVX-NEXT: movl %ecx, %esi
3764; AVX-NEXT: shrl $13, %esi
3765; AVX-NEXT: andl $512, %esi # imm = 0x200
3766; AVX-NEXT: orl %edi, %esi
3767; AVX-NEXT: movl %ecx, %edi
3768; AVX-NEXT: shrl $15, %edi
3769; AVX-NEXT: andl $256, %edi # imm = 0x100
3770; AVX-NEXT: orl %esi, %edi
3771; AVX-NEXT: movl %ecx, %esi
3772; AVX-NEXT: shrl $17, %esi
3773; AVX-NEXT: andl $128, %esi
3774; AVX-NEXT: orl %edi, %esi
3775; AVX-NEXT: movl %ecx, %edi
3776; AVX-NEXT: shrl $19, %edi
3777; AVX-NEXT: andl $64, %edi
3778; AVX-NEXT: orl %esi, %edi
3779; AVX-NEXT: movl %ecx, %esi
3780; AVX-NEXT: shrl $21, %esi
3781; AVX-NEXT: andl $32, %esi
3782; AVX-NEXT: orl %edi, %esi
3783; AVX-NEXT: movl %ecx, %edi
3784; AVX-NEXT: shrl $23, %edi
3785; AVX-NEXT: andl $16, %edi
3786; AVX-NEXT: orl %esi, %edi
3787; AVX-NEXT: movl %ecx, %esi
3788; AVX-NEXT: shrl $25, %esi
3789; AVX-NEXT: andl $8, %esi
3790; AVX-NEXT: orl %edi, %esi
3791; AVX-NEXT: movl %ecx, %edi
3792; AVX-NEXT: shrl $27, %edi
3793; AVX-NEXT: andl $4, %edi
3794; AVX-NEXT: orl %esi, %edi
3795; AVX-NEXT: movl %ecx, %esi
3796; AVX-NEXT: shrl $29, %esi
3797; AVX-NEXT: andl $2, %esi
3798; AVX-NEXT: orl %edi, %esi
3799; AVX-NEXT: shrl $31, %ecx
3800; AVX-NEXT: orl %esi, %ecx
3801; AVX-NEXT: orl %edx, %ecx
3802; AVX-NEXT: vmovd %ecx, %xmm1
3803; AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
3804; AVX-NEXT: vpextrd $2, %xmm0, %eax
3805; AVX-NEXT: movl %eax, %ecx
3806; AVX-NEXT: shll $31, %ecx
3807; AVX-NEXT: movl %eax, %edx
3808; AVX-NEXT: andl $2, %edx
3809; AVX-NEXT: shll $29, %edx
3810; AVX-NEXT: orl %ecx, %edx
3811; AVX-NEXT: movl %eax, %ecx
3812; AVX-NEXT: andl $4, %ecx
3813; AVX-NEXT: shll $27, %ecx
3814; AVX-NEXT: orl %edx, %ecx
3815; AVX-NEXT: movl %eax, %edx
3816; AVX-NEXT: andl $8, %edx
3817; AVX-NEXT: shll $25, %edx
3818; AVX-NEXT: orl %ecx, %edx
3819; AVX-NEXT: movl %eax, %esi
3820; AVX-NEXT: andl $16, %esi
3821; AVX-NEXT: shll $23, %esi
3822; AVX-NEXT: orl %edx, %esi
3823; AVX-NEXT: movl %eax, %ecx
3824; AVX-NEXT: andl $32, %ecx
3825; AVX-NEXT: shll $21, %ecx
3826; AVX-NEXT: orl %esi, %ecx
3827; AVX-NEXT: movl %eax, %edx
3828; AVX-NEXT: andl $64, %edx
3829; AVX-NEXT: shll $19, %edx
3830; AVX-NEXT: movl %eax, %esi
3831; AVX-NEXT: shll $17, %esi
3832; AVX-NEXT: andl $16777216, %esi # imm = 0x1000000
3833; AVX-NEXT: orl %edx, %esi
3834; AVX-NEXT: movl %eax, %edx
3835; AVX-NEXT: shll $15, %edx
3836; AVX-NEXT: andl $8388608, %edx # imm = 0x800000
3837; AVX-NEXT: orl %esi, %edx
3838; AVX-NEXT: movl %eax, %esi
3839; AVX-NEXT: shll $13, %esi
3840; AVX-NEXT: andl $4194304, %esi # imm = 0x400000
3841; AVX-NEXT: orl %edx, %esi
3842; AVX-NEXT: movl %eax, %edx
3843; AVX-NEXT: shll $11, %edx
3844; AVX-NEXT: andl $2097152, %edx # imm = 0x200000
3845; AVX-NEXT: orl %esi, %edx
3846; AVX-NEXT: movl %eax, %esi
3847; AVX-NEXT: shll $9, %esi
3848; AVX-NEXT: andl $1048576, %esi # imm = 0x100000
3849; AVX-NEXT: orl %edx, %esi
3850; AVX-NEXT: movl %eax, %edx
3851; AVX-NEXT: shll $7, %edx
3852; AVX-NEXT: andl $524288, %edx # imm = 0x80000
3853; AVX-NEXT: orl %esi, %edx
3854; AVX-NEXT: movl %eax, %esi
3855; AVX-NEXT: shll $5, %esi
3856; AVX-NEXT: andl $262144, %esi # imm = 0x40000
3857; AVX-NEXT: orl %edx, %esi
3858; AVX-NEXT: leal (,%rax,8), %edx
3859; AVX-NEXT: andl $131072, %edx # imm = 0x20000
3860; AVX-NEXT: orl %esi, %edx
3861; AVX-NEXT: leal (%rax,%rax), %esi
3862; AVX-NEXT: andl $65536, %esi # imm = 0x10000
3863; AVX-NEXT: orl %edx, %esi
3864; AVX-NEXT: movl %eax, %edx
3865; AVX-NEXT: shrl %edx
3866; AVX-NEXT: andl $32768, %edx # imm = 0x8000
3867; AVX-NEXT: orl %esi, %edx
3868; AVX-NEXT: movl %eax, %esi
3869; AVX-NEXT: shrl $3, %esi
3870; AVX-NEXT: andl $16384, %esi # imm = 0x4000
3871; AVX-NEXT: orl %edx, %esi
3872; AVX-NEXT: movl %eax, %edx
3873; AVX-NEXT: shrl $5, %edx
3874; AVX-NEXT: andl $8192, %edx # imm = 0x2000
3875; AVX-NEXT: orl %esi, %edx
3876; AVX-NEXT: movl %eax, %esi
3877; AVX-NEXT: shrl $7, %esi
3878; AVX-NEXT: andl $4096, %esi # imm = 0x1000
3879; AVX-NEXT: orl %edx, %esi
3880; AVX-NEXT: movl %eax, %edx
3881; AVX-NEXT: shrl $9, %edx
3882; AVX-NEXT: andl $2048, %edx # imm = 0x800
3883; AVX-NEXT: orl %esi, %edx
3884; AVX-NEXT: movl %eax, %esi
3885; AVX-NEXT: shrl $11, %esi
3886; AVX-NEXT: andl $1024, %esi # imm = 0x400
3887; AVX-NEXT: orl %edx, %esi
3888; AVX-NEXT: movl %eax, %edx
3889; AVX-NEXT: shrl $13, %edx
3890; AVX-NEXT: andl $512, %edx # imm = 0x200
3891; AVX-NEXT: orl %esi, %edx
3892; AVX-NEXT: movl %eax, %esi
3893; AVX-NEXT: shrl $15, %esi
3894; AVX-NEXT: andl $256, %esi # imm = 0x100
3895; AVX-NEXT: orl %edx, %esi
3896; AVX-NEXT: movl %eax, %edx
3897; AVX-NEXT: shrl $17, %edx
3898; AVX-NEXT: andl $128, %edx
3899; AVX-NEXT: orl %esi, %edx
3900; AVX-NEXT: movl %eax, %esi
3901; AVX-NEXT: shrl $19, %esi
3902; AVX-NEXT: andl $64, %esi
3903; AVX-NEXT: orl %edx, %esi
3904; AVX-NEXT: movl %eax, %edx
3905; AVX-NEXT: shrl $21, %edx
3906; AVX-NEXT: andl $32, %edx
3907; AVX-NEXT: orl %esi, %edx
3908; AVX-NEXT: movl %eax, %esi
3909; AVX-NEXT: shrl $23, %esi
3910; AVX-NEXT: andl $16, %esi
3911; AVX-NEXT: orl %edx, %esi
3912; AVX-NEXT: movl %eax, %edx
3913; AVX-NEXT: shrl $25, %edx
3914; AVX-NEXT: andl $8, %edx
3915; AVX-NEXT: orl %esi, %edx
3916; AVX-NEXT: movl %eax, %esi
3917; AVX-NEXT: shrl $27, %esi
3918; AVX-NEXT: andl $4, %esi
3919; AVX-NEXT: orl %edx, %esi
3920; AVX-NEXT: movl %eax, %edx
3921; AVX-NEXT: shrl $29, %edx
3922; AVX-NEXT: andl $2, %edx
3923; AVX-NEXT: orl %esi, %edx
3924; AVX-NEXT: shrl $31, %eax
3925; AVX-NEXT: orl %edx, %eax
3926; AVX-NEXT: orl %ecx, %eax
3927; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
3928; AVX-NEXT: vpextrd $3, %xmm0, %eax
3929; AVX-NEXT: movl %eax, %ecx
3930; AVX-NEXT: shll $31, %ecx
3931; AVX-NEXT: movl %eax, %edx
3932; AVX-NEXT: andl $2, %edx
3933; AVX-NEXT: shll $29, %edx
3934; AVX-NEXT: orl %ecx, %edx
3935; AVX-NEXT: movl %eax, %ecx
3936; AVX-NEXT: andl $4, %ecx
3937; AVX-NEXT: shll $27, %ecx
3938; AVX-NEXT: orl %edx, %ecx
3939; AVX-NEXT: movl %eax, %edx
3940; AVX-NEXT: andl $8, %edx
3941; AVX-NEXT: shll $25, %edx
3942; AVX-NEXT: orl %ecx, %edx
3943; AVX-NEXT: movl %eax, %esi
3944; AVX-NEXT: andl $16, %esi
3945; AVX-NEXT: shll $23, %esi
3946; AVX-NEXT: orl %edx, %esi
3947; AVX-NEXT: movl %eax, %ecx
3948; AVX-NEXT: andl $32, %ecx
3949; AVX-NEXT: shll $21, %ecx
3950; AVX-NEXT: orl %esi, %ecx
3951; AVX-NEXT: movl %eax, %edx
3952; AVX-NEXT: andl $64, %edx
3953; AVX-NEXT: shll $19, %edx
3954; AVX-NEXT: movl %eax, %esi
3955; AVX-NEXT: shll $17, %esi
3956; AVX-NEXT: andl $16777216, %esi # imm = 0x1000000
3957; AVX-NEXT: orl %edx, %esi
3958; AVX-NEXT: movl %eax, %edx
3959; AVX-NEXT: shll $15, %edx
3960; AVX-NEXT: andl $8388608, %edx # imm = 0x800000
3961; AVX-NEXT: orl %esi, %edx
3962; AVX-NEXT: movl %eax, %esi
3963; AVX-NEXT: shll $13, %esi
3964; AVX-NEXT: andl $4194304, %esi # imm = 0x400000
3965; AVX-NEXT: orl %edx, %esi
3966; AVX-NEXT: movl %eax, %edx
3967; AVX-NEXT: shll $11, %edx
3968; AVX-NEXT: andl $2097152, %edx # imm = 0x200000
3969; AVX-NEXT: orl %esi, %edx
3970; AVX-NEXT: movl %eax, %esi
3971; AVX-NEXT: shll $9, %esi
3972; AVX-NEXT: andl $1048576, %esi # imm = 0x100000
3973; AVX-NEXT: orl %edx, %esi
3974; AVX-NEXT: movl %eax, %edx
3975; AVX-NEXT: shll $7, %edx
3976; AVX-NEXT: andl $524288, %edx # imm = 0x80000
3977; AVX-NEXT: orl %esi, %edx
3978; AVX-NEXT: movl %eax, %esi
3979; AVX-NEXT: shll $5, %esi
3980; AVX-NEXT: andl $262144, %esi # imm = 0x40000
3981; AVX-NEXT: orl %edx, %esi
3982; AVX-NEXT: leal (,%rax,8), %edx
3983; AVX-NEXT: andl $131072, %edx # imm = 0x20000
3984; AVX-NEXT: orl %esi, %edx
3985; AVX-NEXT: leal (%rax,%rax), %esi
3986; AVX-NEXT: andl $65536, %esi # imm = 0x10000
3987; AVX-NEXT: orl %edx, %esi
3988; AVX-NEXT: movl %eax, %edx
3989; AVX-NEXT: shrl %edx
3990; AVX-NEXT: andl $32768, %edx # imm = 0x8000
3991; AVX-NEXT: orl %esi, %edx
3992; AVX-NEXT: movl %eax, %esi
3993; AVX-NEXT: shrl $3, %esi
3994; AVX-NEXT: andl $16384, %esi # imm = 0x4000
3995; AVX-NEXT: orl %edx, %esi
3996; AVX-NEXT: movl %eax, %edx
3997; AVX-NEXT: shrl $5, %edx
3998; AVX-NEXT: andl $8192, %edx # imm = 0x2000
3999; AVX-NEXT: orl %esi, %edx
4000; AVX-NEXT: movl %eax, %esi
4001; AVX-NEXT: shrl $7, %esi
4002; AVX-NEXT: andl $4096, %esi # imm = 0x1000
4003; AVX-NEXT: orl %edx, %esi
4004; AVX-NEXT: movl %eax, %edx
4005; AVX-NEXT: shrl $9, %edx
4006; AVX-NEXT: andl $2048, %edx # imm = 0x800
4007; AVX-NEXT: orl %esi, %edx
4008; AVX-NEXT: movl %eax, %esi
4009; AVX-NEXT: shrl $11, %esi
4010; AVX-NEXT: andl $1024, %esi # imm = 0x400
4011; AVX-NEXT: orl %edx, %esi
4012; AVX-NEXT: movl %eax, %edx
4013; AVX-NEXT: shrl $13, %edx
4014; AVX-NEXT: andl $512, %edx # imm = 0x200
4015; AVX-NEXT: orl %esi, %edx
4016; AVX-NEXT: movl %eax, %esi
4017; AVX-NEXT: shrl $15, %esi
4018; AVX-NEXT: andl $256, %esi # imm = 0x100
4019; AVX-NEXT: orl %edx, %esi
4020; AVX-NEXT: movl %eax, %edx
4021; AVX-NEXT: shrl $17, %edx
4022; AVX-NEXT: andl $128, %edx
4023; AVX-NEXT: orl %esi, %edx
4024; AVX-NEXT: movl %eax, %esi
4025; AVX-NEXT: shrl $19, %esi
4026; AVX-NEXT: andl $64, %esi
4027; AVX-NEXT: orl %edx, %esi
4028; AVX-NEXT: movl %eax, %edx
4029; AVX-NEXT: shrl $21, %edx
4030; AVX-NEXT: andl $32, %edx
4031; AVX-NEXT: orl %esi, %edx
4032; AVX-NEXT: movl %eax, %esi
4033; AVX-NEXT: shrl $23, %esi
4034; AVX-NEXT: andl $16, %esi
4035; AVX-NEXT: orl %edx, %esi
4036; AVX-NEXT: movl %eax, %edx
4037; AVX-NEXT: shrl $25, %edx
4038; AVX-NEXT: andl $8, %edx
4039; AVX-NEXT: orl %esi, %edx
4040; AVX-NEXT: movl %eax, %esi
4041; AVX-NEXT: shrl $27, %esi
4042; AVX-NEXT: andl $4, %esi
4043; AVX-NEXT: orl %edx, %esi
4044; AVX-NEXT: movl %eax, %edx
4045; AVX-NEXT: shrl $29, %edx
4046; AVX-NEXT: andl $2, %edx
4047; AVX-NEXT: orl %esi, %edx
4048; AVX-NEXT: shrl $31, %eax
4049; AVX-NEXT: orl %edx, %eax
4050; AVX-NEXT: orl %ecx, %eax
4051; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
4052; AVX-NEXT: retq
4053;
4054; XOP-LABEL: test_bitreverse_v4i32:
4055; XOP: # BB#0:
4056; XOP-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0
4057; XOP-NEXT: retq
Simon Pilgrimb87ffe82016-03-30 14:14:00 +00004058 %b = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a)
4059 ret <4 x i32> %b
4060}
4061
Simon Pilgrim2d0104c2016-04-24 15:45:06 +00004062define <2 x i64> @test_bitreverse_v2i64(<2 x i64> %a) nounwind {
4063; SSE-LABEL: test_bitreverse_v2i64:
4064; SSE: # BB#0:
4065; SSE-NEXT: movdqa %xmm0, %xmm2
4066; SSE-NEXT: psllq $61, %xmm2
4067; SSE-NEXT: pand {{.*}}(%rip), %xmm2
4068; SSE-NEXT: movdqa %xmm0, %xmm1
4069; SSE-NEXT: psllq $63, %xmm1
4070; SSE-NEXT: pand {{.*}}(%rip), %xmm1
4071; SSE-NEXT: movdqa %xmm0, %xmm3
4072; SSE-NEXT: psllq $59, %xmm3
4073; SSE-NEXT: pand {{.*}}(%rip), %xmm3
4074; SSE-NEXT: por %xmm2, %xmm3
4075; SSE-NEXT: movdqa %xmm0, %xmm2
4076; SSE-NEXT: psllq $57, %xmm2
4077; SSE-NEXT: pand {{.*}}(%rip), %xmm2
4078; SSE-NEXT: por %xmm3, %xmm2
4079; SSE-NEXT: movdqa %xmm0, %xmm3
4080; SSE-NEXT: psllq $55, %xmm3
4081; SSE-NEXT: pand {{.*}}(%rip), %xmm3
4082; SSE-NEXT: por %xmm2, %xmm3
4083; SSE-NEXT: movdqa %xmm0, %xmm2
4084; SSE-NEXT: psllq $53, %xmm2
4085; SSE-NEXT: pand {{.*}}(%rip), %xmm2
4086; SSE-NEXT: por %xmm3, %xmm2
4087; SSE-NEXT: movdqa %xmm0, %xmm3
4088; SSE-NEXT: psllq $51, %xmm3
4089; SSE-NEXT: pand {{.*}}(%rip), %xmm3
4090; SSE-NEXT: por %xmm2, %xmm3
4091; SSE-NEXT: movdqa %xmm0, %xmm2
4092; SSE-NEXT: psllq $49, %xmm2
4093; SSE-NEXT: pand {{.*}}(%rip), %xmm2
4094; SSE-NEXT: por %xmm3, %xmm2
4095; SSE-NEXT: movdqa %xmm0, %xmm3
4096; SSE-NEXT: psllq $47, %xmm3
4097; SSE-NEXT: pand {{.*}}(%rip), %xmm3
4098; SSE-NEXT: por %xmm2, %xmm3
4099; SSE-NEXT: movdqa %xmm0, %xmm2
4100; SSE-NEXT: psllq $45, %xmm2
4101; SSE-NEXT: pand {{.*}}(%rip), %xmm2
4102; SSE-NEXT: por %xmm3, %xmm2
4103; SSE-NEXT: movdqa %xmm0, %xmm3
4104; SSE-NEXT: psllq $43, %xmm3
4105; SSE-NEXT: pand {{.*}}(%rip), %xmm3
4106; SSE-NEXT: por %xmm2, %xmm3
4107; SSE-NEXT: movdqa %xmm0, %xmm2
4108; SSE-NEXT: psllq $41, %xmm2
4109; SSE-NEXT: pand {{.*}}(%rip), %xmm2
4110; SSE-NEXT: por %xmm3, %xmm2
4111; SSE-NEXT: movdqa %xmm0, %xmm3
4112; SSE-NEXT: psllq $39, %xmm3
4113; SSE-NEXT: pand {{.*}}(%rip), %xmm3
4114; SSE-NEXT: por %xmm2, %xmm3
4115; SSE-NEXT: movdqa %xmm0, %xmm2
4116; SSE-NEXT: psllq $37, %xmm2
4117; SSE-NEXT: pand {{.*}}(%rip), %xmm2
4118; SSE-NEXT: por %xmm3, %xmm2
4119; SSE-NEXT: movdqa %xmm0, %xmm3
4120; SSE-NEXT: psllq $35, %xmm3
4121; SSE-NEXT: pand {{.*}}(%rip), %xmm3
4122; SSE-NEXT: por %xmm2, %xmm3
4123; SSE-NEXT: movdqa %xmm0, %xmm2
4124; SSE-NEXT: psllq $33, %xmm2
4125; SSE-NEXT: pand {{.*}}(%rip), %xmm2
4126; SSE-NEXT: por %xmm3, %xmm2
4127; SSE-NEXT: movdqa %xmm0, %xmm3
4128; SSE-NEXT: psllq $31, %xmm3
4129; SSE-NEXT: pand {{.*}}(%rip), %xmm3
4130; SSE-NEXT: por %xmm2, %xmm3
4131; SSE-NEXT: movdqa %xmm0, %xmm2
4132; SSE-NEXT: psllq $29, %xmm2
4133; SSE-NEXT: pand {{.*}}(%rip), %xmm2
4134; SSE-NEXT: por %xmm3, %xmm2
4135; SSE-NEXT: movdqa %xmm0, %xmm3
4136; SSE-NEXT: psllq $27, %xmm3
4137; SSE-NEXT: pand {{.*}}(%rip), %xmm3
4138; SSE-NEXT: por %xmm2, %xmm3
4139; SSE-NEXT: movdqa %xmm0, %xmm2
4140; SSE-NEXT: psllq $25, %xmm2
4141; SSE-NEXT: pand {{.*}}(%rip), %xmm2
4142; SSE-NEXT: por %xmm3, %xmm2
4143; SSE-NEXT: movdqa %xmm0, %xmm3
4144; SSE-NEXT: psllq $23, %xmm3
4145; SSE-NEXT: pand {{.*}}(%rip), %xmm3
4146; SSE-NEXT: por %xmm2, %xmm3
4147; SSE-NEXT: movdqa %xmm0, %xmm2
4148; SSE-NEXT: psllq $21, %xmm2
4149; SSE-NEXT: pand {{.*}}(%rip), %xmm2
4150; SSE-NEXT: por %xmm3, %xmm2
4151; SSE-NEXT: movdqa %xmm0, %xmm3
4152; SSE-NEXT: psllq $19, %xmm3
4153; SSE-NEXT: pand {{.*}}(%rip), %xmm3
4154; SSE-NEXT: por %xmm2, %xmm3
4155; SSE-NEXT: movdqa %xmm0, %xmm2
4156; SSE-NEXT: psllq $17, %xmm2
4157; SSE-NEXT: pand {{.*}}(%rip), %xmm2
4158; SSE-NEXT: por %xmm3, %xmm2
4159; SSE-NEXT: movdqa %xmm0, %xmm3
4160; SSE-NEXT: psllq $15, %xmm3
4161; SSE-NEXT: pand {{.*}}(%rip), %xmm3
4162; SSE-NEXT: por %xmm2, %xmm3
4163; SSE-NEXT: movdqa %xmm0, %xmm2
4164; SSE-NEXT: psllq $13, %xmm2
4165; SSE-NEXT: pand {{.*}}(%rip), %xmm2
4166; SSE-NEXT: por %xmm3, %xmm2
4167; SSE-NEXT: movdqa %xmm0, %xmm3
4168; SSE-NEXT: psllq $11, %xmm3
4169; SSE-NEXT: pand {{.*}}(%rip), %xmm3
4170; SSE-NEXT: por %xmm2, %xmm3
4171; SSE-NEXT: movdqa %xmm0, %xmm2
4172; SSE-NEXT: psllq $9, %xmm2
4173; SSE-NEXT: pand {{.*}}(%rip), %xmm2
4174; SSE-NEXT: por %xmm3, %xmm2
4175; SSE-NEXT: movdqa %xmm0, %xmm3
4176; SSE-NEXT: psllq $7, %xmm3
4177; SSE-NEXT: pand {{.*}}(%rip), %xmm3
4178; SSE-NEXT: por %xmm2, %xmm3
4179; SSE-NEXT: movdqa %xmm0, %xmm2
4180; SSE-NEXT: psllq $5, %xmm2
4181; SSE-NEXT: pand {{.*}}(%rip), %xmm2
4182; SSE-NEXT: por %xmm3, %xmm2
4183; SSE-NEXT: movdqa %xmm0, %xmm3
4184; SSE-NEXT: psllq $3, %xmm3
4185; SSE-NEXT: pand {{.*}}(%rip), %xmm3
4186; SSE-NEXT: por %xmm2, %xmm3
4187; SSE-NEXT: movdqa %xmm0, %xmm2
4188; SSE-NEXT: psllq $1, %xmm2
4189; SSE-NEXT: pand {{.*}}(%rip), %xmm2
4190; SSE-NEXT: por %xmm3, %xmm2
4191; SSE-NEXT: movdqa %xmm0, %xmm3
4192; SSE-NEXT: psrlq $1, %xmm3
4193; SSE-NEXT: pand {{.*}}(%rip), %xmm3
4194; SSE-NEXT: por %xmm2, %xmm3
4195; SSE-NEXT: movdqa %xmm0, %xmm2
4196; SSE-NEXT: psrlq $3, %xmm2
4197; SSE-NEXT: pand {{.*}}(%rip), %xmm2
4198; SSE-NEXT: por %xmm3, %xmm2
4199; SSE-NEXT: movdqa %xmm0, %xmm3
4200; SSE-NEXT: psrlq $5, %xmm3
4201; SSE-NEXT: pand {{.*}}(%rip), %xmm3
4202; SSE-NEXT: por %xmm2, %xmm3
4203; SSE-NEXT: movdqa %xmm0, %xmm2
4204; SSE-NEXT: psrlq $7, %xmm2
4205; SSE-NEXT: pand {{.*}}(%rip), %xmm2
4206; SSE-NEXT: por %xmm3, %xmm2
4207; SSE-NEXT: movdqa %xmm0, %xmm3
4208; SSE-NEXT: psrlq $9, %xmm3
4209; SSE-NEXT: pand {{.*}}(%rip), %xmm3
4210; SSE-NEXT: por %xmm2, %xmm3
4211; SSE-NEXT: movdqa %xmm0, %xmm2
4212; SSE-NEXT: psrlq $11, %xmm2
4213; SSE-NEXT: pand {{.*}}(%rip), %xmm2
4214; SSE-NEXT: por %xmm3, %xmm2
4215; SSE-NEXT: movdqa %xmm0, %xmm3
4216; SSE-NEXT: psrlq $13, %xmm3
4217; SSE-NEXT: pand {{.*}}(%rip), %xmm3
4218; SSE-NEXT: por %xmm2, %xmm3
4219; SSE-NEXT: movdqa %xmm0, %xmm2
4220; SSE-NEXT: psrlq $15, %xmm2
4221; SSE-NEXT: pand {{.*}}(%rip), %xmm2
4222; SSE-NEXT: por %xmm3, %xmm2
4223; SSE-NEXT: movdqa %xmm0, %xmm3
4224; SSE-NEXT: psrlq $17, %xmm3
4225; SSE-NEXT: pand {{.*}}(%rip), %xmm3
4226; SSE-NEXT: por %xmm2, %xmm3
4227; SSE-NEXT: movdqa %xmm0, %xmm2
4228; SSE-NEXT: psrlq $19, %xmm2
4229; SSE-NEXT: pand {{.*}}(%rip), %xmm2
4230; SSE-NEXT: por %xmm3, %xmm2
4231; SSE-NEXT: movdqa %xmm0, %xmm3
4232; SSE-NEXT: psrlq $21, %xmm3
4233; SSE-NEXT: pand {{.*}}(%rip), %xmm3
4234; SSE-NEXT: por %xmm2, %xmm3
4235; SSE-NEXT: movdqa %xmm0, %xmm2
4236; SSE-NEXT: psrlq $23, %xmm2
4237; SSE-NEXT: pand {{.*}}(%rip), %xmm2
4238; SSE-NEXT: por %xmm3, %xmm2
4239; SSE-NEXT: movdqa %xmm0, %xmm3
4240; SSE-NEXT: psrlq $25, %xmm3
4241; SSE-NEXT: pand {{.*}}(%rip), %xmm3
4242; SSE-NEXT: por %xmm2, %xmm3
4243; SSE-NEXT: movdqa %xmm0, %xmm2
4244; SSE-NEXT: psrlq $27, %xmm2
4245; SSE-NEXT: pand {{.*}}(%rip), %xmm2
4246; SSE-NEXT: por %xmm3, %xmm2
4247; SSE-NEXT: movdqa %xmm0, %xmm3
4248; SSE-NEXT: psrlq $29, %xmm3
4249; SSE-NEXT: pand {{.*}}(%rip), %xmm3
4250; SSE-NEXT: por %xmm2, %xmm3
4251; SSE-NEXT: movdqa %xmm0, %xmm2
4252; SSE-NEXT: psrlq $31, %xmm2
4253; SSE-NEXT: pand {{.*}}(%rip), %xmm2
4254; SSE-NEXT: por %xmm3, %xmm2
4255; SSE-NEXT: movdqa %xmm0, %xmm3
4256; SSE-NEXT: psrlq $33, %xmm3
4257; SSE-NEXT: pand {{.*}}(%rip), %xmm3
4258; SSE-NEXT: por %xmm2, %xmm3
4259; SSE-NEXT: movdqa %xmm0, %xmm2
4260; SSE-NEXT: psrlq $35, %xmm2
4261; SSE-NEXT: pand {{.*}}(%rip), %xmm2
4262; SSE-NEXT: por %xmm3, %xmm2
4263; SSE-NEXT: movdqa %xmm0, %xmm3
4264; SSE-NEXT: psrlq $37, %xmm3
4265; SSE-NEXT: pand {{.*}}(%rip), %xmm3
4266; SSE-NEXT: por %xmm2, %xmm3
4267; SSE-NEXT: movdqa %xmm0, %xmm2
4268; SSE-NEXT: psrlq $39, %xmm2
4269; SSE-NEXT: pand {{.*}}(%rip), %xmm2
4270; SSE-NEXT: por %xmm3, %xmm2
4271; SSE-NEXT: movdqa %xmm0, %xmm3
4272; SSE-NEXT: psrlq $41, %xmm3
4273; SSE-NEXT: pand {{.*}}(%rip), %xmm3
4274; SSE-NEXT: por %xmm2, %xmm3
4275; SSE-NEXT: movdqa %xmm0, %xmm2
4276; SSE-NEXT: psrlq $43, %xmm2
4277; SSE-NEXT: pand {{.*}}(%rip), %xmm2
4278; SSE-NEXT: por %xmm3, %xmm2
4279; SSE-NEXT: movdqa %xmm0, %xmm3
4280; SSE-NEXT: psrlq $45, %xmm3
4281; SSE-NEXT: pand {{.*}}(%rip), %xmm3
4282; SSE-NEXT: por %xmm2, %xmm3
4283; SSE-NEXT: movdqa %xmm0, %xmm2
4284; SSE-NEXT: psrlq $47, %xmm2
4285; SSE-NEXT: pand {{.*}}(%rip), %xmm2
4286; SSE-NEXT: por %xmm3, %xmm2
4287; SSE-NEXT: movdqa %xmm0, %xmm3
4288; SSE-NEXT: psrlq $49, %xmm3
4289; SSE-NEXT: pand {{.*}}(%rip), %xmm3
4290; SSE-NEXT: por %xmm2, %xmm3
4291; SSE-NEXT: movdqa %xmm0, %xmm2
4292; SSE-NEXT: psrlq $51, %xmm2
4293; SSE-NEXT: pand {{.*}}(%rip), %xmm2
4294; SSE-NEXT: por %xmm3, %xmm2
4295; SSE-NEXT: movdqa %xmm0, %xmm3
4296; SSE-NEXT: psrlq $53, %xmm3
4297; SSE-NEXT: pand {{.*}}(%rip), %xmm3
4298; SSE-NEXT: por %xmm2, %xmm3
4299; SSE-NEXT: movdqa %xmm0, %xmm2
4300; SSE-NEXT: psrlq $55, %xmm2
4301; SSE-NEXT: pand {{.*}}(%rip), %xmm2
4302; SSE-NEXT: por %xmm3, %xmm2
4303; SSE-NEXT: movdqa %xmm0, %xmm3
4304; SSE-NEXT: psrlq $57, %xmm3
4305; SSE-NEXT: pand {{.*}}(%rip), %xmm3
4306; SSE-NEXT: por %xmm2, %xmm3
4307; SSE-NEXT: movdqa %xmm0, %xmm2
4308; SSE-NEXT: psrlq $59, %xmm2
4309; SSE-NEXT: pand {{.*}}(%rip), %xmm2
4310; SSE-NEXT: por %xmm3, %xmm2
4311; SSE-NEXT: movdqa %xmm0, %xmm3
4312; SSE-NEXT: psrlq $61, %xmm3
4313; SSE-NEXT: pand {{.*}}(%rip), %xmm3
4314; SSE-NEXT: por %xmm2, %xmm3
4315; SSE-NEXT: psrlq $63, %xmm0
4316; SSE-NEXT: pand {{.*}}(%rip), %xmm0
4317; SSE-NEXT: por %xmm3, %xmm0
4318; SSE-NEXT: por %xmm1, %xmm0
4319; SSE-NEXT: retq
4320;
4321; AVX-LABEL: test_bitreverse_v2i64:
4322; AVX: # BB#0:
4323; AVX-NEXT: vpsllq $61, %xmm0, %xmm1
4324; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm2
4325; AVX-NEXT: vpsllq $63, %xmm0, %xmm1
4326; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
4327; AVX-NEXT: vpsllq $59, %xmm0, %xmm3
4328; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4329; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4330; AVX-NEXT: vpsllq $57, %xmm0, %xmm3
4331; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4332; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4333; AVX-NEXT: vpsllq $55, %xmm0, %xmm3
4334; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4335; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4336; AVX-NEXT: vpsllq $53, %xmm0, %xmm3
4337; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4338; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4339; AVX-NEXT: vpsllq $51, %xmm0, %xmm3
4340; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4341; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4342; AVX-NEXT: vpsllq $49, %xmm0, %xmm3
4343; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4344; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4345; AVX-NEXT: vpsllq $47, %xmm0, %xmm3
4346; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4347; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4348; AVX-NEXT: vpsllq $45, %xmm0, %xmm3
4349; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4350; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4351; AVX-NEXT: vpsllq $43, %xmm0, %xmm3
4352; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4353; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4354; AVX-NEXT: vpsllq $41, %xmm0, %xmm3
4355; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4356; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4357; AVX-NEXT: vpsllq $39, %xmm0, %xmm3
4358; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4359; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4360; AVX-NEXT: vpsllq $37, %xmm0, %xmm3
4361; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4362; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4363; AVX-NEXT: vpsllq $35, %xmm0, %xmm3
4364; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4365; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4366; AVX-NEXT: vpsllq $33, %xmm0, %xmm3
4367; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4368; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4369; AVX-NEXT: vpsllq $31, %xmm0, %xmm3
4370; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4371; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4372; AVX-NEXT: vpsllq $29, %xmm0, %xmm3
4373; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4374; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4375; AVX-NEXT: vpsllq $27, %xmm0, %xmm3
4376; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4377; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4378; AVX-NEXT: vpsllq $25, %xmm0, %xmm3
4379; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4380; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4381; AVX-NEXT: vpsllq $23, %xmm0, %xmm3
4382; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4383; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4384; AVX-NEXT: vpsllq $21, %xmm0, %xmm3
4385; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4386; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4387; AVX-NEXT: vpsllq $19, %xmm0, %xmm3
4388; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4389; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4390; AVX-NEXT: vpsllq $17, %xmm0, %xmm3
4391; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4392; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4393; AVX-NEXT: vpsllq $15, %xmm0, %xmm3
4394; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4395; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4396; AVX-NEXT: vpsllq $13, %xmm0, %xmm3
4397; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4398; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4399; AVX-NEXT: vpsllq $11, %xmm0, %xmm3
4400; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4401; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4402; AVX-NEXT: vpsllq $9, %xmm0, %xmm3
4403; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4404; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4405; AVX-NEXT: vpsllq $7, %xmm0, %xmm3
4406; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4407; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4408; AVX-NEXT: vpsllq $5, %xmm0, %xmm3
4409; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4410; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4411; AVX-NEXT: vpsllq $3, %xmm0, %xmm3
4412; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4413; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4414; AVX-NEXT: vpsllq $1, %xmm0, %xmm3
4415; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4416; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4417; AVX-NEXT: vpsrlq $1, %xmm0, %xmm3
4418; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4419; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4420; AVX-NEXT: vpsrlq $3, %xmm0, %xmm3
4421; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4422; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4423; AVX-NEXT: vpsrlq $5, %xmm0, %xmm3
4424; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4425; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4426; AVX-NEXT: vpsrlq $7, %xmm0, %xmm3
4427; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4428; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4429; AVX-NEXT: vpsrlq $9, %xmm0, %xmm3
4430; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4431; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4432; AVX-NEXT: vpsrlq $11, %xmm0, %xmm3
4433; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4434; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4435; AVX-NEXT: vpsrlq $13, %xmm0, %xmm3
4436; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4437; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4438; AVX-NEXT: vpsrlq $15, %xmm0, %xmm3
4439; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4440; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4441; AVX-NEXT: vpsrlq $17, %xmm0, %xmm3
4442; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4443; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4444; AVX-NEXT: vpsrlq $19, %xmm0, %xmm3
4445; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4446; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4447; AVX-NEXT: vpsrlq $21, %xmm0, %xmm3
4448; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4449; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4450; AVX-NEXT: vpsrlq $23, %xmm0, %xmm3
4451; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4452; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4453; AVX-NEXT: vpsrlq $25, %xmm0, %xmm3
4454; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4455; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4456; AVX-NEXT: vpsrlq $27, %xmm0, %xmm3
4457; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4458; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4459; AVX-NEXT: vpsrlq $29, %xmm0, %xmm3
4460; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4461; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4462; AVX-NEXT: vpsrlq $31, %xmm0, %xmm3
4463; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4464; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4465; AVX-NEXT: vpsrlq $33, %xmm0, %xmm3
4466; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4467; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4468; AVX-NEXT: vpsrlq $35, %xmm0, %xmm3
4469; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4470; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4471; AVX-NEXT: vpsrlq $37, %xmm0, %xmm3
4472; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4473; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4474; AVX-NEXT: vpsrlq $39, %xmm0, %xmm3
4475; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4476; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4477; AVX-NEXT: vpsrlq $41, %xmm0, %xmm3
4478; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4479; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4480; AVX-NEXT: vpsrlq $43, %xmm0, %xmm3
4481; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4482; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4483; AVX-NEXT: vpsrlq $45, %xmm0, %xmm3
4484; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4485; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4486; AVX-NEXT: vpsrlq $47, %xmm0, %xmm3
4487; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4488; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4489; AVX-NEXT: vpsrlq $49, %xmm0, %xmm3
4490; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4491; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4492; AVX-NEXT: vpsrlq $51, %xmm0, %xmm3
4493; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4494; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4495; AVX-NEXT: vpsrlq $53, %xmm0, %xmm3
4496; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4497; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4498; AVX-NEXT: vpsrlq $55, %xmm0, %xmm3
4499; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4500; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4501; AVX-NEXT: vpsrlq $57, %xmm0, %xmm3
4502; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4503; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4504; AVX-NEXT: vpsrlq $59, %xmm0, %xmm3
4505; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4506; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4507; AVX-NEXT: vpsrlq $61, %xmm0, %xmm3
4508; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
4509; AVX-NEXT: vpor %xmm3, %xmm2, %xmm2
4510; AVX-NEXT: vpsrlq $63, %xmm0, %xmm0
4511; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
4512; AVX-NEXT: vpor %xmm0, %xmm2, %xmm0
4513; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
4514; AVX-NEXT: retq
4515;
4516; XOP-LABEL: test_bitreverse_v2i64:
4517; XOP: # BB#0:
4518; XOP-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0
4519; XOP-NEXT: retq
Simon Pilgrimb87ffe82016-03-30 14:14:00 +00004520 %b = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a)
4521 ret <2 x i64> %b
4522}
4523
Simon Pilgrim2d0104c2016-04-24 15:45:06 +00004524define <32 x i8> @test_bitreverse_v32i8(<32 x i8> %a) nounwind {
4525; SSE-LABEL: test_bitreverse_v32i8:
4526; SSE: # BB#0:
4527; SSE-NEXT: pushq %rbp
4528; SSE-NEXT: pushq %r15
4529; SSE-NEXT: pushq %r14
4530; SSE-NEXT: pushq %rbx
4531; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
4532; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
4533; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %cl
4534; SSE-NEXT: movb %cl, %bl
4535; SSE-NEXT: shlb $7, %bl
4536; SSE-NEXT: movb %cl, %dl
4537; SSE-NEXT: shlb $5, %dl
4538; SSE-NEXT: andb $64, %dl
4539; SSE-NEXT: movb %cl, %al
4540; SSE-NEXT: shlb $3, %al
4541; SSE-NEXT: andb $32, %al
4542; SSE-NEXT: orb %dl, %al
4543; SSE-NEXT: movb %cl, %dl
4544; SSE-NEXT: addb %dl, %dl
4545; SSE-NEXT: andb $16, %dl
4546; SSE-NEXT: orb %al, %dl
4547; SSE-NEXT: movb %cl, %al
4548; SSE-NEXT: shrb %al
4549; SSE-NEXT: andb $8, %al
4550; SSE-NEXT: orb %dl, %al
4551; SSE-NEXT: movb %cl, %dl
4552; SSE-NEXT: shrb $3, %dl
4553; SSE-NEXT: andb $4, %dl
4554; SSE-NEXT: orb %al, %dl
4555; SSE-NEXT: movb %cl, %al
4556; SSE-NEXT: shrb $5, %al
4557; SSE-NEXT: andb $2, %al
4558; SSE-NEXT: orb %dl, %al
4559; SSE-NEXT: shrb $7, %cl
4560; SSE-NEXT: orb %al, %cl
4561; SSE-NEXT: orb %bl, %cl
4562; SSE-NEXT: movzbl %cl, %eax
4563; SSE-NEXT: movd %eax, %xmm0
4564; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %r10b
4565; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %r14b
4566; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %dil
4567; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %r11b
4568; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %r9b
4569; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %bpl
4570; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %sil
4571; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %dl
4572; SSE-NEXT: movb %dl, %r8b
4573; SSE-NEXT: shlb $7, %r8b
4574; SSE-NEXT: movb %dl, %bl
4575; SSE-NEXT: shlb $5, %bl
4576; SSE-NEXT: andb $64, %bl
4577; SSE-NEXT: movb %dl, %al
4578; SSE-NEXT: shlb $3, %al
4579; SSE-NEXT: andb $32, %al
4580; SSE-NEXT: orb %bl, %al
4581; SSE-NEXT: movb %dl, %cl
4582; SSE-NEXT: addb %cl, %cl
4583; SSE-NEXT: andb $16, %cl
4584; SSE-NEXT: orb %al, %cl
4585; SSE-NEXT: movb %dl, %al
4586; SSE-NEXT: shrb %al
4587; SSE-NEXT: andb $8, %al
4588; SSE-NEXT: orb %cl, %al
4589; SSE-NEXT: movb %dl, %cl
4590; SSE-NEXT: shrb $3, %cl
4591; SSE-NEXT: andb $4, %cl
4592; SSE-NEXT: orb %al, %cl
4593; SSE-NEXT: movb %dl, %al
4594; SSE-NEXT: shrb $5, %al
4595; SSE-NEXT: andb $2, %al
4596; SSE-NEXT: orb %cl, %al
4597; SSE-NEXT: shrb $7, %dl
4598; SSE-NEXT: orb %al, %dl
4599; SSE-NEXT: orb %r8b, %dl
4600; SSE-NEXT: movzbl %dl, %eax
4601; SSE-NEXT: movd %eax, %xmm1
4602; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
4603; SSE-NEXT: movb %r11b, %al
4604; SSE-NEXT: shlb $7, %al
4605; SSE-NEXT: movb %r11b, %cl
4606; SSE-NEXT: shlb $5, %cl
4607; SSE-NEXT: andb $64, %cl
4608; SSE-NEXT: movb %r11b, %dl
4609; SSE-NEXT: shlb $3, %dl
4610; SSE-NEXT: andb $32, %dl
4611; SSE-NEXT: orb %cl, %dl
4612; SSE-NEXT: movb %r11b, %cl
4613; SSE-NEXT: addb %cl, %cl
4614; SSE-NEXT: andb $16, %cl
4615; SSE-NEXT: orb %dl, %cl
4616; SSE-NEXT: movb %r11b, %dl
4617; SSE-NEXT: shrb %dl
4618; SSE-NEXT: andb $8, %dl
4619; SSE-NEXT: orb %cl, %dl
4620; SSE-NEXT: movb %r11b, %cl
4621; SSE-NEXT: shrb $3, %cl
4622; SSE-NEXT: andb $4, %cl
4623; SSE-NEXT: orb %dl, %cl
4624; SSE-NEXT: movb %r11b, %dl
4625; SSE-NEXT: shrb $5, %dl
4626; SSE-NEXT: andb $2, %dl
4627; SSE-NEXT: orb %cl, %dl
4628; SSE-NEXT: shrb $7, %r11b
4629; SSE-NEXT: orb %dl, %r11b
4630; SSE-NEXT: orb %al, %r11b
4631; SSE-NEXT: movzbl %r11b, %eax
4632; SSE-NEXT: movd %eax, %xmm2
4633; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %r11b
4634; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %dl
4635; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %r8b
4636; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %bl
4637; SSE-NEXT: movb %bl, %r15b
4638; SSE-NEXT: shlb $7, %r15b
4639; SSE-NEXT: movb %bl, %cl
4640; SSE-NEXT: shlb $5, %cl
4641; SSE-NEXT: andb $64, %cl
4642; SSE-NEXT: movb %bl, %al
4643; SSE-NEXT: shlb $3, %al
4644; SSE-NEXT: andb $32, %al
4645; SSE-NEXT: orb %cl, %al
4646; SSE-NEXT: movb %bl, %cl
4647; SSE-NEXT: addb %cl, %cl
4648; SSE-NEXT: andb $16, %cl
4649; SSE-NEXT: orb %al, %cl
4650; SSE-NEXT: movb %bl, %al
4651; SSE-NEXT: shrb %al
4652; SSE-NEXT: andb $8, %al
4653; SSE-NEXT: orb %cl, %al
4654; SSE-NEXT: movb %bl, %cl
4655; SSE-NEXT: shrb $3, %cl
4656; SSE-NEXT: andb $4, %cl
4657; SSE-NEXT: orb %al, %cl
4658; SSE-NEXT: movb %bl, %al
4659; SSE-NEXT: shrb $5, %al
4660; SSE-NEXT: andb $2, %al
4661; SSE-NEXT: orb %cl, %al
4662; SSE-NEXT: shrb $7, %bl
4663; SSE-NEXT: orb %al, %bl
4664; SSE-NEXT: orb %r15b, %bl
4665; SSE-NEXT: movzbl %bl, %eax
4666; SSE-NEXT: movd %eax, %xmm0
4667; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
4668; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
4669; SSE-NEXT: movb %r14b, %al
4670; SSE-NEXT: shlb $7, %al
4671; SSE-NEXT: movb %r14b, %cl
4672; SSE-NEXT: shlb $5, %cl
4673; SSE-NEXT: andb $64, %cl
4674; SSE-NEXT: movb %r14b, %bl
4675; SSE-NEXT: shlb $3, %bl
4676; SSE-NEXT: andb $32, %bl
4677; SSE-NEXT: orb %cl, %bl
4678; SSE-NEXT: movb %r14b, %cl
4679; SSE-NEXT: addb %cl, %cl
4680; SSE-NEXT: andb $16, %cl
4681; SSE-NEXT: orb %bl, %cl
4682; SSE-NEXT: movb %r14b, %bl
4683; SSE-NEXT: shrb %bl
4684; SSE-NEXT: andb $8, %bl
4685; SSE-NEXT: orb %cl, %bl
4686; SSE-NEXT: movb %r14b, %cl
4687; SSE-NEXT: shrb $3, %cl
4688; SSE-NEXT: andb $4, %cl
4689; SSE-NEXT: orb %bl, %cl
4690; SSE-NEXT: movb %r14b, %bl
4691; SSE-NEXT: shrb $5, %bl
4692; SSE-NEXT: andb $2, %bl
4693; SSE-NEXT: orb %cl, %bl
4694; SSE-NEXT: shrb $7, %r14b
4695; SSE-NEXT: orb %bl, %r14b
4696; SSE-NEXT: orb %al, %r14b
4697; SSE-NEXT: movzbl %r14b, %eax
4698; SSE-NEXT: movd %eax, %xmm1
4699; SSE-NEXT: movb %dl, %al
4700; SSE-NEXT: shlb $7, %al
4701; SSE-NEXT: movb %dl, %cl
4702; SSE-NEXT: shlb $5, %cl
4703; SSE-NEXT: andb $64, %cl
4704; SSE-NEXT: movb %dl, %bl
4705; SSE-NEXT: shlb $3, %bl
4706; SSE-NEXT: andb $32, %bl
4707; SSE-NEXT: orb %cl, %bl
4708; SSE-NEXT: movb %dl, %cl
4709; SSE-NEXT: addb %cl, %cl
4710; SSE-NEXT: andb $16, %cl
4711; SSE-NEXT: orb %bl, %cl
4712; SSE-NEXT: movb %dl, %bl
4713; SSE-NEXT: shrb %bl
4714; SSE-NEXT: andb $8, %bl
4715; SSE-NEXT: orb %cl, %bl
4716; SSE-NEXT: movb %dl, %cl
4717; SSE-NEXT: shrb $3, %cl
4718; SSE-NEXT: andb $4, %cl
4719; SSE-NEXT: orb %bl, %cl
4720; SSE-NEXT: movb %dl, %bl
4721; SSE-NEXT: shrb $5, %bl
4722; SSE-NEXT: andb $2, %bl
4723; SSE-NEXT: orb %cl, %bl
4724; SSE-NEXT: shrb $7, %dl
4725; SSE-NEXT: orb %bl, %dl
4726; SSE-NEXT: orb %al, %dl
4727; SSE-NEXT: movzbl %dl, %eax
4728; SSE-NEXT: movd %eax, %xmm2
4729; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
4730; SSE-NEXT: movb %bpl, %al
4731; SSE-NEXT: shlb $7, %al
4732; SSE-NEXT: movb %bpl, %cl
4733; SSE-NEXT: shlb $5, %cl
4734; SSE-NEXT: andb $64, %cl
4735; SSE-NEXT: movb %bpl, %dl
4736; SSE-NEXT: shlb $3, %dl
4737; SSE-NEXT: andb $32, %dl
4738; SSE-NEXT: orb %cl, %dl
4739; SSE-NEXT: movb %bpl, %cl
4740; SSE-NEXT: addb %cl, %cl
4741; SSE-NEXT: andb $16, %cl
4742; SSE-NEXT: orb %dl, %cl
4743; SSE-NEXT: movb %bpl, %dl
4744; SSE-NEXT: shrb %dl
4745; SSE-NEXT: andb $8, %dl
4746; SSE-NEXT: orb %cl, %dl
4747; SSE-NEXT: movb %bpl, %cl
4748; SSE-NEXT: shrb $3, %cl
4749; SSE-NEXT: andb $4, %cl
4750; SSE-NEXT: orb %dl, %cl
4751; SSE-NEXT: movb %bpl, %dl
4752; SSE-NEXT: shrb $5, %dl
4753; SSE-NEXT: andb $2, %dl
4754; SSE-NEXT: orb %cl, %dl
4755; SSE-NEXT: shrb $7, %bpl
4756; SSE-NEXT: orb %dl, %bpl
4757; SSE-NEXT: orb %al, %bpl
4758; SSE-NEXT: movzbl %bpl, %eax
4759; SSE-NEXT: movd %eax, %xmm3
4760; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %bpl
4761; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %dl
4762; SSE-NEXT: movb %dl, %al
4763; SSE-NEXT: shlb $7, %al
4764; SSE-NEXT: movb %dl, %bl
4765; SSE-NEXT: shlb $5, %bl
4766; SSE-NEXT: andb $64, %bl
4767; SSE-NEXT: movb %dl, %cl
4768; SSE-NEXT: shlb $3, %cl
4769; SSE-NEXT: andb $32, %cl
4770; SSE-NEXT: orb %bl, %cl
4771; SSE-NEXT: movb %dl, %bl
4772; SSE-NEXT: addb %bl, %bl
4773; SSE-NEXT: andb $16, %bl
4774; SSE-NEXT: orb %cl, %bl
4775; SSE-NEXT: movb %dl, %cl
4776; SSE-NEXT: shrb %cl
4777; SSE-NEXT: andb $8, %cl
4778; SSE-NEXT: orb %bl, %cl
4779; SSE-NEXT: movb %dl, %bl
4780; SSE-NEXT: shrb $3, %bl
4781; SSE-NEXT: andb $4, %bl
4782; SSE-NEXT: orb %cl, %bl
4783; SSE-NEXT: movb %dl, %cl
4784; SSE-NEXT: shrb $5, %cl
4785; SSE-NEXT: andb $2, %cl
4786; SSE-NEXT: orb %bl, %cl
4787; SSE-NEXT: shrb $7, %dl
4788; SSE-NEXT: orb %cl, %dl
4789; SSE-NEXT: orb %al, %dl
4790; SSE-NEXT: movzbl %dl, %eax
4791; SSE-NEXT: movd %eax, %xmm1
4792; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
4793; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
4794; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
4795; SSE-NEXT: movb %r10b, %cl
4796; SSE-NEXT: shlb $7, %cl
4797; SSE-NEXT: movb %r10b, %al
4798; SSE-NEXT: shlb $5, %al
4799; SSE-NEXT: andb $64, %al
4800; SSE-NEXT: movb %r10b, %dl
4801; SSE-NEXT: shlb $3, %dl
4802; SSE-NEXT: andb $32, %dl
4803; SSE-NEXT: orb %al, %dl
4804; SSE-NEXT: movb %r10b, %al
4805; SSE-NEXT: addb %al, %al
4806; SSE-NEXT: andb $16, %al
4807; SSE-NEXT: orb %dl, %al
4808; SSE-NEXT: movb %r10b, %dl
4809; SSE-NEXT: shrb %dl
4810; SSE-NEXT: andb $8, %dl
4811; SSE-NEXT: orb %al, %dl
4812; SSE-NEXT: movb %r10b, %al
4813; SSE-NEXT: shrb $3, %al
4814; SSE-NEXT: andb $4, %al
4815; SSE-NEXT: orb %dl, %al
4816; SSE-NEXT: movb %r10b, %dl
4817; SSE-NEXT: shrb $5, %dl
4818; SSE-NEXT: andb $2, %dl
4819; SSE-NEXT: orb %al, %dl
4820; SSE-NEXT: shrb $7, %r10b
4821; SSE-NEXT: orb %dl, %r10b
4822; SSE-NEXT: orb %cl, %r10b
4823; SSE-NEXT: movzbl %r10b, %eax
4824; SSE-NEXT: movd %eax, %xmm2
4825; SSE-NEXT: movb %r11b, %cl
4826; SSE-NEXT: shlb $7, %cl
4827; SSE-NEXT: movb %r11b, %al
4828; SSE-NEXT: shlb $5, %al
4829; SSE-NEXT: andb $64, %al
4830; SSE-NEXT: movb %r11b, %dl
4831; SSE-NEXT: shlb $3, %dl
4832; SSE-NEXT: andb $32, %dl
4833; SSE-NEXT: orb %al, %dl
4834; SSE-NEXT: movb %r11b, %al
4835; SSE-NEXT: addb %al, %al
4836; SSE-NEXT: andb $16, %al
4837; SSE-NEXT: orb %dl, %al
4838; SSE-NEXT: movb %r11b, %dl
4839; SSE-NEXT: shrb %dl
4840; SSE-NEXT: andb $8, %dl
4841; SSE-NEXT: orb %al, %dl
4842; SSE-NEXT: movb %r11b, %al
4843; SSE-NEXT: shrb $3, %al
4844; SSE-NEXT: andb $4, %al
4845; SSE-NEXT: orb %dl, %al
4846; SSE-NEXT: movb %r11b, %dl
4847; SSE-NEXT: shrb $5, %dl
4848; SSE-NEXT: andb $2, %dl
4849; SSE-NEXT: orb %al, %dl
4850; SSE-NEXT: shrb $7, %r11b
4851; SSE-NEXT: orb %dl, %r11b
4852; SSE-NEXT: orb %cl, %r11b
4853; SSE-NEXT: movzbl %r11b, %eax
4854; SSE-NEXT: movd %eax, %xmm0
4855; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
4856; SSE-NEXT: movb %r9b, %cl
4857; SSE-NEXT: shlb $7, %cl
4858; SSE-NEXT: movb %r9b, %al
4859; SSE-NEXT: shlb $5, %al
4860; SSE-NEXT: andb $64, %al
4861; SSE-NEXT: movb %r9b, %dl
4862; SSE-NEXT: shlb $3, %dl
4863; SSE-NEXT: andb $32, %dl
4864; SSE-NEXT: orb %al, %dl
4865; SSE-NEXT: movb %r9b, %al
4866; SSE-NEXT: addb %al, %al
4867; SSE-NEXT: andb $16, %al
4868; SSE-NEXT: orb %dl, %al
4869; SSE-NEXT: movb %r9b, %dl
4870; SSE-NEXT: shrb %dl
4871; SSE-NEXT: andb $8, %dl
4872; SSE-NEXT: orb %al, %dl
4873; SSE-NEXT: movb %r9b, %al
4874; SSE-NEXT: shrb $3, %al
4875; SSE-NEXT: andb $4, %al
4876; SSE-NEXT: orb %dl, %al
4877; SSE-NEXT: movb %r9b, %dl
4878; SSE-NEXT: shrb $5, %dl
4879; SSE-NEXT: andb $2, %dl
4880; SSE-NEXT: orb %al, %dl
4881; SSE-NEXT: shrb $7, %r9b
4882; SSE-NEXT: orb %dl, %r9b
4883; SSE-NEXT: orb %cl, %r9b
4884; SSE-NEXT: movzbl %r9b, %eax
4885; SSE-NEXT: movd %eax, %xmm3
4886; SSE-NEXT: movb %bpl, %cl
4887; SSE-NEXT: shlb $7, %cl
4888; SSE-NEXT: movb %bpl, %al
4889; SSE-NEXT: shlb $5, %al
4890; SSE-NEXT: andb $64, %al
4891; SSE-NEXT: movb %bpl, %dl
4892; SSE-NEXT: shlb $3, %dl
4893; SSE-NEXT: andb $32, %dl
4894; SSE-NEXT: orb %al, %dl
4895; SSE-NEXT: movb %bpl, %al
4896; SSE-NEXT: addb %al, %al
4897; SSE-NEXT: andb $16, %al
4898; SSE-NEXT: orb %dl, %al
4899; SSE-NEXT: movb %bpl, %dl
4900; SSE-NEXT: shrb %dl
4901; SSE-NEXT: andb $8, %dl
4902; SSE-NEXT: orb %al, %dl
4903; SSE-NEXT: movb %bpl, %al
4904; SSE-NEXT: shrb $3, %al
4905; SSE-NEXT: andb $4, %al
4906; SSE-NEXT: orb %dl, %al
4907; SSE-NEXT: movb %bpl, %dl
4908; SSE-NEXT: shrb $5, %dl
4909; SSE-NEXT: andb $2, %dl
4910; SSE-NEXT: orb %al, %dl
4911; SSE-NEXT: shrb $7, %bpl
4912; SSE-NEXT: orb %dl, %bpl
4913; SSE-NEXT: orb %cl, %bpl
4914; SSE-NEXT: movzbl %bpl, %eax
4915; SSE-NEXT: movd %eax, %xmm2
4916; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
4917; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
4918; SSE-NEXT: movb %dil, %al
4919; SSE-NEXT: shlb $7, %al
4920; SSE-NEXT: movb %dil, %cl
4921; SSE-NEXT: shlb $5, %cl
4922; SSE-NEXT: andb $64, %cl
4923; SSE-NEXT: movb %dil, %dl
4924; SSE-NEXT: shlb $3, %dl
4925; SSE-NEXT: andb $32, %dl
4926; SSE-NEXT: orb %cl, %dl
4927; SSE-NEXT: movb %dil, %cl
4928; SSE-NEXT: addb %cl, %cl
4929; SSE-NEXT: andb $16, %cl
4930; SSE-NEXT: orb %dl, %cl
4931; SSE-NEXT: movb %dil, %dl
4932; SSE-NEXT: shrb %dl
4933; SSE-NEXT: andb $8, %dl
4934; SSE-NEXT: orb %cl, %dl
4935; SSE-NEXT: movb %dil, %cl
4936; SSE-NEXT: shrb $3, %cl
4937; SSE-NEXT: andb $4, %cl
4938; SSE-NEXT: orb %dl, %cl
4939; SSE-NEXT: movb %dil, %dl
4940; SSE-NEXT: shrb $5, %dl
4941; SSE-NEXT: andb $2, %dl
4942; SSE-NEXT: orb %cl, %dl
4943; SSE-NEXT: shrb $7, %dil
4944; SSE-NEXT: orb %dl, %dil
4945; SSE-NEXT: orb %al, %dil
4946; SSE-NEXT: movzbl %dil, %eax
4947; SSE-NEXT: movd %eax, %xmm0
4948; SSE-NEXT: movb %r8b, %al
4949; SSE-NEXT: shlb $7, %al
4950; SSE-NEXT: movb %r8b, %cl
4951; SSE-NEXT: shlb $5, %cl
4952; SSE-NEXT: andb $64, %cl
4953; SSE-NEXT: movb %r8b, %dl
4954; SSE-NEXT: shlb $3, %dl
4955; SSE-NEXT: andb $32, %dl
4956; SSE-NEXT: orb %cl, %dl
4957; SSE-NEXT: movb %r8b, %cl
4958; SSE-NEXT: addb %cl, %cl
4959; SSE-NEXT: andb $16, %cl
4960; SSE-NEXT: orb %dl, %cl
4961; SSE-NEXT: movb %r8b, %dl
4962; SSE-NEXT: shrb %dl
4963; SSE-NEXT: andb $8, %dl
4964; SSE-NEXT: orb %cl, %dl
4965; SSE-NEXT: movb %r8b, %cl
4966; SSE-NEXT: shrb $3, %cl
4967; SSE-NEXT: andb $4, %cl
4968; SSE-NEXT: orb %dl, %cl
4969; SSE-NEXT: movb %r8b, %dl
4970; SSE-NEXT: shrb $5, %dl
4971; SSE-NEXT: andb $2, %dl
4972; SSE-NEXT: orb %cl, %dl
4973; SSE-NEXT: shrb $7, %r8b
4974; SSE-NEXT: orb %dl, %r8b
4975; SSE-NEXT: orb %al, %r8b
4976; SSE-NEXT: movzbl %r8b, %eax
4977; SSE-NEXT: movd %eax, %xmm3
4978; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
4979; SSE-NEXT: movb %sil, %al
4980; SSE-NEXT: shlb $7, %al
4981; SSE-NEXT: movb %sil, %cl
4982; SSE-NEXT: shlb $5, %cl
4983; SSE-NEXT: andb $64, %cl
4984; SSE-NEXT: movb %sil, %dl
4985; SSE-NEXT: shlb $3, %dl
4986; SSE-NEXT: andb $32, %dl
4987; SSE-NEXT: orb %cl, %dl
4988; SSE-NEXT: movb %sil, %cl
4989; SSE-NEXT: addb %cl, %cl
4990; SSE-NEXT: andb $16, %cl
4991; SSE-NEXT: orb %dl, %cl
4992; SSE-NEXT: movb %sil, %dl
4993; SSE-NEXT: shrb %dl
4994; SSE-NEXT: andb $8, %dl
4995; SSE-NEXT: orb %cl, %dl
4996; SSE-NEXT: movb %sil, %cl
4997; SSE-NEXT: shrb $3, %cl
4998; SSE-NEXT: andb $4, %cl
4999; SSE-NEXT: orb %dl, %cl
5000; SSE-NEXT: movb %sil, %dl
5001; SSE-NEXT: shrb $5, %dl
5002; SSE-NEXT: andb $2, %dl
5003; SSE-NEXT: orb %cl, %dl
5004; SSE-NEXT: shrb $7, %sil
5005; SSE-NEXT: orb %dl, %sil
5006; SSE-NEXT: orb %al, %sil
5007; SSE-NEXT: movzbl %sil, %eax
5008; SSE-NEXT: movd %eax, %xmm4
5009; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %cl
5010; SSE-NEXT: movb %cl, %bl
5011; SSE-NEXT: shlb $7, %bl
5012; SSE-NEXT: movb %cl, %dl
5013; SSE-NEXT: shlb $5, %dl
5014; SSE-NEXT: andb $64, %dl
5015; SSE-NEXT: movb %cl, %al
5016; SSE-NEXT: shlb $3, %al
5017; SSE-NEXT: andb $32, %al
5018; SSE-NEXT: orb %dl, %al
5019; SSE-NEXT: movb %cl, %dl
5020; SSE-NEXT: addb %dl, %dl
5021; SSE-NEXT: andb $16, %dl
5022; SSE-NEXT: orb %al, %dl
5023; SSE-NEXT: movb %cl, %al
5024; SSE-NEXT: shrb %al
5025; SSE-NEXT: andb $8, %al
5026; SSE-NEXT: orb %dl, %al
5027; SSE-NEXT: movb %cl, %dl
5028; SSE-NEXT: shrb $3, %dl
5029; SSE-NEXT: andb $4, %dl
5030; SSE-NEXT: orb %al, %dl
5031; SSE-NEXT: movb %cl, %al
5032; SSE-NEXT: shrb $5, %al
5033; SSE-NEXT: andb $2, %al
5034; SSE-NEXT: orb %dl, %al
5035; SSE-NEXT: shrb $7, %cl
5036; SSE-NEXT: orb %al, %cl
5037; SSE-NEXT: orb %bl, %cl
5038; SSE-NEXT: movzbl %cl, %eax
5039; SSE-NEXT: movd %eax, %xmm0
5040; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
5041; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
5042; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
5043; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
5044; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %cl
5045; SSE-NEXT: movb %cl, %bl
5046; SSE-NEXT: shlb $7, %bl
5047; SSE-NEXT: movb %cl, %dl
5048; SSE-NEXT: shlb $5, %dl
5049; SSE-NEXT: andb $64, %dl
5050; SSE-NEXT: movb %cl, %al
5051; SSE-NEXT: shlb $3, %al
5052; SSE-NEXT: andb $32, %al
5053; SSE-NEXT: orb %dl, %al
5054; SSE-NEXT: movb %cl, %dl
5055; SSE-NEXT: addb %dl, %dl
5056; SSE-NEXT: andb $16, %dl
5057; SSE-NEXT: orb %al, %dl
5058; SSE-NEXT: movb %cl, %al
5059; SSE-NEXT: shrb %al
5060; SSE-NEXT: andb $8, %al
5061; SSE-NEXT: orb %dl, %al
5062; SSE-NEXT: movb %cl, %dl
5063; SSE-NEXT: shrb $3, %dl
5064; SSE-NEXT: andb $4, %dl
5065; SSE-NEXT: orb %al, %dl
5066; SSE-NEXT: movb %cl, %al
5067; SSE-NEXT: shrb $5, %al
5068; SSE-NEXT: andb $2, %al
5069; SSE-NEXT: orb %dl, %al
5070; SSE-NEXT: shrb $7, %cl
5071; SSE-NEXT: orb %al, %cl
5072; SSE-NEXT: orb %bl, %cl
5073; SSE-NEXT: movzbl %cl, %eax
5074; SSE-NEXT: movd %eax, %xmm1
5075; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %r10b
5076; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %r14b
5077; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %dil
5078; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %r11b
5079; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %r9b
5080; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %bpl
5081; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %sil
5082; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %dl
5083; SSE-NEXT: movb %dl, %r8b
5084; SSE-NEXT: shlb $7, %r8b
5085; SSE-NEXT: movb %dl, %al
5086; SSE-NEXT: shlb $5, %al
5087; SSE-NEXT: andb $64, %al
5088; SSE-NEXT: movb %dl, %bl
5089; SSE-NEXT: shlb $3, %bl
5090; SSE-NEXT: andb $32, %bl
5091; SSE-NEXT: orb %al, %bl
5092; SSE-NEXT: movb %dl, %al
5093; SSE-NEXT: addb %al, %al
5094; SSE-NEXT: andb $16, %al
5095; SSE-NEXT: orb %bl, %al
5096; SSE-NEXT: movb %dl, %cl
5097; SSE-NEXT: shrb %cl
5098; SSE-NEXT: andb $8, %cl
5099; SSE-NEXT: orb %al, %cl
5100; SSE-NEXT: movb %dl, %al
5101; SSE-NEXT: shrb $3, %al
5102; SSE-NEXT: andb $4, %al
5103; SSE-NEXT: orb %cl, %al
5104; SSE-NEXT: movb %dl, %cl
5105; SSE-NEXT: shrb $5, %cl
5106; SSE-NEXT: andb $2, %cl
5107; SSE-NEXT: orb %al, %cl
5108; SSE-NEXT: shrb $7, %dl
5109; SSE-NEXT: orb %cl, %dl
5110; SSE-NEXT: orb %r8b, %dl
5111; SSE-NEXT: movzbl %dl, %eax
5112; SSE-NEXT: movd %eax, %xmm2
5113; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
5114; SSE-NEXT: movb %r11b, %al
5115; SSE-NEXT: shlb $7, %al
5116; SSE-NEXT: movb %r11b, %cl
5117; SSE-NEXT: shlb $5, %cl
5118; SSE-NEXT: andb $64, %cl
5119; SSE-NEXT: movb %r11b, %dl
5120; SSE-NEXT: shlb $3, %dl
5121; SSE-NEXT: andb $32, %dl
5122; SSE-NEXT: orb %cl, %dl
5123; SSE-NEXT: movb %r11b, %cl
5124; SSE-NEXT: addb %cl, %cl
5125; SSE-NEXT: andb $16, %cl
5126; SSE-NEXT: orb %dl, %cl
5127; SSE-NEXT: movb %r11b, %dl
5128; SSE-NEXT: shrb %dl
5129; SSE-NEXT: andb $8, %dl
5130; SSE-NEXT: orb %cl, %dl
5131; SSE-NEXT: movb %r11b, %cl
5132; SSE-NEXT: shrb $3, %cl
5133; SSE-NEXT: andb $4, %cl
5134; SSE-NEXT: orb %dl, %cl
5135; SSE-NEXT: movb %r11b, %dl
5136; SSE-NEXT: shrb $5, %dl
5137; SSE-NEXT: andb $2, %dl
5138; SSE-NEXT: orb %cl, %dl
5139; SSE-NEXT: shrb $7, %r11b
5140; SSE-NEXT: orb %dl, %r11b
5141; SSE-NEXT: orb %al, %r11b
5142; SSE-NEXT: movzbl %r11b, %eax
5143; SSE-NEXT: movd %eax, %xmm3
5144; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %r11b
5145; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %dl
5146; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %r8b
5147; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %al
5148; SSE-NEXT: movb %al, %r15b
5149; SSE-NEXT: shlb $7, %r15b
5150; SSE-NEXT: movb %al, %cl
5151; SSE-NEXT: shlb $5, %cl
5152; SSE-NEXT: andb $64, %cl
5153; SSE-NEXT: movb %al, %bl
5154; SSE-NEXT: shlb $3, %bl
5155; SSE-NEXT: andb $32, %bl
5156; SSE-NEXT: orb %cl, %bl
5157; SSE-NEXT: movb %al, %cl
5158; SSE-NEXT: addb %cl, %cl
5159; SSE-NEXT: andb $16, %cl
5160; SSE-NEXT: orb %bl, %cl
5161; SSE-NEXT: movb %al, %bl
5162; SSE-NEXT: shrb %bl
5163; SSE-NEXT: andb $8, %bl
5164; SSE-NEXT: orb %cl, %bl
5165; SSE-NEXT: movb %al, %cl
5166; SSE-NEXT: shrb $3, %cl
5167; SSE-NEXT: andb $4, %cl
5168; SSE-NEXT: orb %bl, %cl
5169; SSE-NEXT: movb %al, %bl
5170; SSE-NEXT: shrb $5, %bl
5171; SSE-NEXT: andb $2, %bl
5172; SSE-NEXT: orb %cl, %bl
5173; SSE-NEXT: shrb $7, %al
5174; SSE-NEXT: orb %bl, %al
5175; SSE-NEXT: orb %r15b, %al
5176; SSE-NEXT: movzbl %al, %eax
5177; SSE-NEXT: movd %eax, %xmm1
5178; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
5179; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
5180; SSE-NEXT: movb %r14b, %al
5181; SSE-NEXT: shlb $7, %al
5182; SSE-NEXT: movb %r14b, %cl
5183; SSE-NEXT: shlb $5, %cl
5184; SSE-NEXT: andb $64, %cl
5185; SSE-NEXT: movb %r14b, %bl
5186; SSE-NEXT: shlb $3, %bl
5187; SSE-NEXT: andb $32, %bl
5188; SSE-NEXT: orb %cl, %bl
5189; SSE-NEXT: movb %r14b, %cl
5190; SSE-NEXT: addb %cl, %cl
5191; SSE-NEXT: andb $16, %cl
5192; SSE-NEXT: orb %bl, %cl
5193; SSE-NEXT: movb %r14b, %bl
5194; SSE-NEXT: shrb %bl
5195; SSE-NEXT: andb $8, %bl
5196; SSE-NEXT: orb %cl, %bl
5197; SSE-NEXT: movb %r14b, %cl
5198; SSE-NEXT: shrb $3, %cl
5199; SSE-NEXT: andb $4, %cl
5200; SSE-NEXT: orb %bl, %cl
5201; SSE-NEXT: movb %r14b, %bl
5202; SSE-NEXT: shrb $5, %bl
5203; SSE-NEXT: andb $2, %bl
5204; SSE-NEXT: orb %cl, %bl
5205; SSE-NEXT: shrb $7, %r14b
5206; SSE-NEXT: orb %bl, %r14b
5207; SSE-NEXT: orb %al, %r14b
5208; SSE-NEXT: movzbl %r14b, %eax
5209; SSE-NEXT: movd %eax, %xmm2
5210; SSE-NEXT: movb %dl, %al
5211; SSE-NEXT: shlb $7, %al
5212; SSE-NEXT: movb %dl, %cl
5213; SSE-NEXT: shlb $5, %cl
5214; SSE-NEXT: andb $64, %cl
5215; SSE-NEXT: movb %dl, %bl
5216; SSE-NEXT: shlb $3, %bl
5217; SSE-NEXT: andb $32, %bl
5218; SSE-NEXT: orb %cl, %bl
5219; SSE-NEXT: movb %dl, %cl
5220; SSE-NEXT: addb %cl, %cl
5221; SSE-NEXT: andb $16, %cl
5222; SSE-NEXT: orb %bl, %cl
5223; SSE-NEXT: movb %dl, %bl
5224; SSE-NEXT: shrb %bl
5225; SSE-NEXT: andb $8, %bl
5226; SSE-NEXT: orb %cl, %bl
5227; SSE-NEXT: movb %dl, %cl
5228; SSE-NEXT: shrb $3, %cl
5229; SSE-NEXT: andb $4, %cl
5230; SSE-NEXT: orb %bl, %cl
5231; SSE-NEXT: movb %dl, %bl
5232; SSE-NEXT: shrb $5, %bl
5233; SSE-NEXT: andb $2, %bl
5234; SSE-NEXT: orb %cl, %bl
5235; SSE-NEXT: shrb $7, %dl
5236; SSE-NEXT: orb %bl, %dl
5237; SSE-NEXT: orb %al, %dl
5238; SSE-NEXT: movzbl %dl, %eax
5239; SSE-NEXT: movd %eax, %xmm3
5240; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
5241; SSE-NEXT: movb %bpl, %al
5242; SSE-NEXT: shlb $7, %al
5243; SSE-NEXT: movb %bpl, %cl
5244; SSE-NEXT: shlb $5, %cl
5245; SSE-NEXT: andb $64, %cl
5246; SSE-NEXT: movb %bpl, %dl
5247; SSE-NEXT: shlb $3, %dl
5248; SSE-NEXT: andb $32, %dl
5249; SSE-NEXT: orb %cl, %dl
5250; SSE-NEXT: movb %bpl, %cl
5251; SSE-NEXT: addb %cl, %cl
5252; SSE-NEXT: andb $16, %cl
5253; SSE-NEXT: orb %dl, %cl
5254; SSE-NEXT: movb %bpl, %dl
5255; SSE-NEXT: shrb %dl
5256; SSE-NEXT: andb $8, %dl
5257; SSE-NEXT: orb %cl, %dl
5258; SSE-NEXT: movb %bpl, %cl
5259; SSE-NEXT: shrb $3, %cl
5260; SSE-NEXT: andb $4, %cl
5261; SSE-NEXT: orb %dl, %cl
5262; SSE-NEXT: movb %bpl, %dl
5263; SSE-NEXT: shrb $5, %dl
5264; SSE-NEXT: andb $2, %dl
5265; SSE-NEXT: orb %cl, %dl
5266; SSE-NEXT: shrb $7, %bpl
5267; SSE-NEXT: orb %dl, %bpl
5268; SSE-NEXT: orb %al, %bpl
5269; SSE-NEXT: movzbl %bpl, %eax
5270; SSE-NEXT: movd %eax, %xmm4
5271; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %bpl
5272; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %dl
5273; SSE-NEXT: movb %dl, %al
5274; SSE-NEXT: shlb $7, %al
5275; SSE-NEXT: movb %dl, %bl
5276; SSE-NEXT: shlb $5, %bl
5277; SSE-NEXT: andb $64, %bl
5278; SSE-NEXT: movb %dl, %cl
5279; SSE-NEXT: shlb $3, %cl
5280; SSE-NEXT: andb $32, %cl
5281; SSE-NEXT: orb %bl, %cl
5282; SSE-NEXT: movb %dl, %bl
5283; SSE-NEXT: addb %bl, %bl
5284; SSE-NEXT: andb $16, %bl
5285; SSE-NEXT: orb %cl, %bl
5286; SSE-NEXT: movb %dl, %cl
5287; SSE-NEXT: shrb %cl
5288; SSE-NEXT: andb $8, %cl
5289; SSE-NEXT: orb %bl, %cl
5290; SSE-NEXT: movb %dl, %bl
5291; SSE-NEXT: shrb $3, %bl
5292; SSE-NEXT: andb $4, %bl
5293; SSE-NEXT: orb %cl, %bl
5294; SSE-NEXT: movb %dl, %cl
5295; SSE-NEXT: shrb $5, %cl
5296; SSE-NEXT: andb $2, %cl
5297; SSE-NEXT: orb %bl, %cl
5298; SSE-NEXT: shrb $7, %dl
5299; SSE-NEXT: orb %cl, %dl
5300; SSE-NEXT: orb %al, %dl
5301; SSE-NEXT: movzbl %dl, %eax
5302; SSE-NEXT: movd %eax, %xmm2
5303; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
5304; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
5305; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
5306; SSE-NEXT: movb %r10b, %cl
5307; SSE-NEXT: shlb $7, %cl
5308; SSE-NEXT: movb %r10b, %al
5309; SSE-NEXT: shlb $5, %al
5310; SSE-NEXT: andb $64, %al
5311; SSE-NEXT: movb %r10b, %dl
5312; SSE-NEXT: shlb $3, %dl
5313; SSE-NEXT: andb $32, %dl
5314; SSE-NEXT: orb %al, %dl
5315; SSE-NEXT: movb %r10b, %al
5316; SSE-NEXT: addb %al, %al
5317; SSE-NEXT: andb $16, %al
5318; SSE-NEXT: orb %dl, %al
5319; SSE-NEXT: movb %r10b, %dl
5320; SSE-NEXT: shrb %dl
5321; SSE-NEXT: andb $8, %dl
5322; SSE-NEXT: orb %al, %dl
5323; SSE-NEXT: movb %r10b, %al
5324; SSE-NEXT: shrb $3, %al
5325; SSE-NEXT: andb $4, %al
5326; SSE-NEXT: orb %dl, %al
5327; SSE-NEXT: movb %r10b, %dl
5328; SSE-NEXT: shrb $5, %dl
5329; SSE-NEXT: andb $2, %dl
5330; SSE-NEXT: orb %al, %dl
5331; SSE-NEXT: shrb $7, %r10b
5332; SSE-NEXT: orb %dl, %r10b
5333; SSE-NEXT: orb %cl, %r10b
5334; SSE-NEXT: movzbl %r10b, %eax
5335; SSE-NEXT: movd %eax, %xmm3
5336; SSE-NEXT: movb %r11b, %cl
5337; SSE-NEXT: shlb $7, %cl
5338; SSE-NEXT: movb %r11b, %al
5339; SSE-NEXT: shlb $5, %al
5340; SSE-NEXT: andb $64, %al
5341; SSE-NEXT: movb %r11b, %dl
5342; SSE-NEXT: shlb $3, %dl
5343; SSE-NEXT: andb $32, %dl
5344; SSE-NEXT: orb %al, %dl
5345; SSE-NEXT: movb %r11b, %al
5346; SSE-NEXT: addb %al, %al
5347; SSE-NEXT: andb $16, %al
5348; SSE-NEXT: orb %dl, %al
5349; SSE-NEXT: movb %r11b, %dl
5350; SSE-NEXT: shrb %dl
5351; SSE-NEXT: andb $8, %dl
5352; SSE-NEXT: orb %al, %dl
5353; SSE-NEXT: movb %r11b, %al
5354; SSE-NEXT: shrb $3, %al
5355; SSE-NEXT: andb $4, %al
5356; SSE-NEXT: orb %dl, %al
5357; SSE-NEXT: movb %r11b, %dl
5358; SSE-NEXT: shrb $5, %dl
5359; SSE-NEXT: andb $2, %dl
5360; SSE-NEXT: orb %al, %dl
5361; SSE-NEXT: shrb $7, %r11b
5362; SSE-NEXT: orb %dl, %r11b
5363; SSE-NEXT: orb %cl, %r11b
5364; SSE-NEXT: movzbl %r11b, %eax
5365; SSE-NEXT: movd %eax, %xmm1
5366; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
5367; SSE-NEXT: movb %r9b, %cl
5368; SSE-NEXT: shlb $7, %cl
5369; SSE-NEXT: movb %r9b, %al
5370; SSE-NEXT: shlb $5, %al
5371; SSE-NEXT: andb $64, %al
5372; SSE-NEXT: movb %r9b, %dl
5373; SSE-NEXT: shlb $3, %dl
5374; SSE-NEXT: andb $32, %dl
5375; SSE-NEXT: orb %al, %dl
5376; SSE-NEXT: movb %r9b, %al
5377; SSE-NEXT: addb %al, %al
5378; SSE-NEXT: andb $16, %al
5379; SSE-NEXT: orb %dl, %al
5380; SSE-NEXT: movb %r9b, %dl
5381; SSE-NEXT: shrb %dl
5382; SSE-NEXT: andb $8, %dl
5383; SSE-NEXT: orb %al, %dl
5384; SSE-NEXT: movb %r9b, %al
5385; SSE-NEXT: shrb $3, %al
5386; SSE-NEXT: andb $4, %al
5387; SSE-NEXT: orb %dl, %al
5388; SSE-NEXT: movb %r9b, %dl
5389; SSE-NEXT: shrb $5, %dl
5390; SSE-NEXT: andb $2, %dl
5391; SSE-NEXT: orb %al, %dl
5392; SSE-NEXT: shrb $7, %r9b
5393; SSE-NEXT: orb %dl, %r9b
5394; SSE-NEXT: orb %cl, %r9b
5395; SSE-NEXT: movzbl %r9b, %eax
5396; SSE-NEXT: movd %eax, %xmm4
5397; SSE-NEXT: movb %bpl, %cl
5398; SSE-NEXT: shlb $7, %cl
5399; SSE-NEXT: movb %bpl, %al
5400; SSE-NEXT: shlb $5, %al
5401; SSE-NEXT: andb $64, %al
5402; SSE-NEXT: movb %bpl, %dl
5403; SSE-NEXT: shlb $3, %dl
5404; SSE-NEXT: andb $32, %dl
5405; SSE-NEXT: orb %al, %dl
5406; SSE-NEXT: movb %bpl, %al
5407; SSE-NEXT: addb %al, %al
5408; SSE-NEXT: andb $16, %al
5409; SSE-NEXT: orb %dl, %al
5410; SSE-NEXT: movb %bpl, %dl
5411; SSE-NEXT: shrb %dl
5412; SSE-NEXT: andb $8, %dl
5413; SSE-NEXT: orb %al, %dl
5414; SSE-NEXT: movb %bpl, %al
5415; SSE-NEXT: shrb $3, %al
5416; SSE-NEXT: andb $4, %al
5417; SSE-NEXT: orb %dl, %al
5418; SSE-NEXT: movb %bpl, %dl
5419; SSE-NEXT: shrb $5, %dl
5420; SSE-NEXT: andb $2, %dl
5421; SSE-NEXT: orb %al, %dl
5422; SSE-NEXT: shrb $7, %bpl
5423; SSE-NEXT: orb %dl, %bpl
5424; SSE-NEXT: orb %cl, %bpl
5425; SSE-NEXT: movzbl %bpl, %eax
5426; SSE-NEXT: movd %eax, %xmm3
5427; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
5428; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
5429; SSE-NEXT: movb %dil, %al
5430; SSE-NEXT: shlb $7, %al
5431; SSE-NEXT: movb %dil, %cl
5432; SSE-NEXT: shlb $5, %cl
5433; SSE-NEXT: andb $64, %cl
5434; SSE-NEXT: movb %dil, %dl
5435; SSE-NEXT: shlb $3, %dl
5436; SSE-NEXT: andb $32, %dl
5437; SSE-NEXT: orb %cl, %dl
5438; SSE-NEXT: movb %dil, %cl
5439; SSE-NEXT: addb %cl, %cl
5440; SSE-NEXT: andb $16, %cl
5441; SSE-NEXT: orb %dl, %cl
5442; SSE-NEXT: movb %dil, %dl
5443; SSE-NEXT: shrb %dl
5444; SSE-NEXT: andb $8, %dl
5445; SSE-NEXT: orb %cl, %dl
5446; SSE-NEXT: movb %dil, %cl
5447; SSE-NEXT: shrb $3, %cl
5448; SSE-NEXT: andb $4, %cl
5449; SSE-NEXT: orb %dl, %cl
5450; SSE-NEXT: movb %dil, %dl
5451; SSE-NEXT: shrb $5, %dl
5452; SSE-NEXT: andb $2, %dl
5453; SSE-NEXT: orb %cl, %dl
5454; SSE-NEXT: shrb $7, %dil
5455; SSE-NEXT: orb %dl, %dil
5456; SSE-NEXT: orb %al, %dil
5457; SSE-NEXT: movzbl %dil, %eax
5458; SSE-NEXT: movd %eax, %xmm1
5459; SSE-NEXT: movb %r8b, %al
5460; SSE-NEXT: shlb $7, %al
5461; SSE-NEXT: movb %r8b, %cl
5462; SSE-NEXT: shlb $5, %cl
5463; SSE-NEXT: andb $64, %cl
5464; SSE-NEXT: movb %r8b, %dl
5465; SSE-NEXT: shlb $3, %dl
5466; SSE-NEXT: andb $32, %dl
5467; SSE-NEXT: orb %cl, %dl
5468; SSE-NEXT: movb %r8b, %cl
5469; SSE-NEXT: addb %cl, %cl
5470; SSE-NEXT: andb $16, %cl
5471; SSE-NEXT: orb %dl, %cl
5472; SSE-NEXT: movb %r8b, %dl
5473; SSE-NEXT: shrb %dl
5474; SSE-NEXT: andb $8, %dl
5475; SSE-NEXT: orb %cl, %dl
5476; SSE-NEXT: movb %r8b, %cl
5477; SSE-NEXT: shrb $3, %cl
5478; SSE-NEXT: andb $4, %cl
5479; SSE-NEXT: orb %dl, %cl
5480; SSE-NEXT: movb %r8b, %dl
5481; SSE-NEXT: shrb $5, %dl
5482; SSE-NEXT: andb $2, %dl
5483; SSE-NEXT: orb %cl, %dl
5484; SSE-NEXT: shrb $7, %r8b
5485; SSE-NEXT: orb %dl, %r8b
5486; SSE-NEXT: orb %al, %r8b
5487; SSE-NEXT: movzbl %r8b, %eax
5488; SSE-NEXT: movd %eax, %xmm4
5489; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
5490; SSE-NEXT: movb %sil, %al
5491; SSE-NEXT: shlb $7, %al
5492; SSE-NEXT: movb %sil, %cl
5493; SSE-NEXT: shlb $5, %cl
5494; SSE-NEXT: andb $64, %cl
5495; SSE-NEXT: movb %sil, %dl
5496; SSE-NEXT: shlb $3, %dl
5497; SSE-NEXT: andb $32, %dl
5498; SSE-NEXT: orb %cl, %dl
5499; SSE-NEXT: movb %sil, %cl
5500; SSE-NEXT: addb %cl, %cl
5501; SSE-NEXT: andb $16, %cl
5502; SSE-NEXT: orb %dl, %cl
5503; SSE-NEXT: movb %sil, %dl
5504; SSE-NEXT: shrb %dl
5505; SSE-NEXT: andb $8, %dl
5506; SSE-NEXT: orb %cl, %dl
5507; SSE-NEXT: movb %sil, %cl
5508; SSE-NEXT: shrb $3, %cl
5509; SSE-NEXT: andb $4, %cl
5510; SSE-NEXT: orb %dl, %cl
5511; SSE-NEXT: movb %sil, %dl
5512; SSE-NEXT: shrb $5, %dl
5513; SSE-NEXT: andb $2, %dl
5514; SSE-NEXT: orb %cl, %dl
5515; SSE-NEXT: shrb $7, %sil
5516; SSE-NEXT: orb %dl, %sil
5517; SSE-NEXT: orb %al, %sil
5518; SSE-NEXT: movzbl %sil, %eax
5519; SSE-NEXT: movd %eax, %xmm5
5520; SSE-NEXT: movb -{{[0-9]+}}(%rsp), %cl
5521; SSE-NEXT: movb %cl, %bl
5522; SSE-NEXT: shlb $7, %bl
5523; SSE-NEXT: movb %cl, %dl
5524; SSE-NEXT: shlb $5, %dl
5525; SSE-NEXT: andb $64, %dl
5526; SSE-NEXT: movb %cl, %al
5527; SSE-NEXT: shlb $3, %al
5528; SSE-NEXT: andb $32, %al
5529; SSE-NEXT: orb %dl, %al
5530; SSE-NEXT: movb %cl, %dl
5531; SSE-NEXT: addb %dl, %dl
5532; SSE-NEXT: andb $16, %dl
5533; SSE-NEXT: orb %al, %dl
5534; SSE-NEXT: movb %cl, %al
5535; SSE-NEXT: shrb %al
5536; SSE-NEXT: andb $8, %al
5537; SSE-NEXT: orb %dl, %al
5538; SSE-NEXT: movb %cl, %dl
5539; SSE-NEXT: shrb $3, %dl
5540; SSE-NEXT: andb $4, %dl
5541; SSE-NEXT: orb %al, %dl
5542; SSE-NEXT: movb %cl, %al
5543; SSE-NEXT: shrb $5, %al
5544; SSE-NEXT: andb $2, %al
5545; SSE-NEXT: orb %dl, %al
5546; SSE-NEXT: shrb $7, %cl
5547; SSE-NEXT: orb %al, %cl
5548; SSE-NEXT: orb %bl, %cl
5549; SSE-NEXT: movzbl %cl, %eax
5550; SSE-NEXT: movd %eax, %xmm1
5551; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3],xmm1[4],xmm5[4],xmm1[5],xmm5[5],xmm1[6],xmm5[6],xmm1[7],xmm5[7]
5552; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
5553; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
5554; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
5555; SSE-NEXT: popq %rbx
5556; SSE-NEXT: popq %r14
5557; SSE-NEXT: popq %r15
5558; SSE-NEXT: popq %rbp
5559; SSE-NEXT: retq
5560;
5561; AVX1-LABEL: test_bitreverse_v32i8:
5562; AVX1: # BB#0:
5563; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
5564; AVX1-NEXT: vpextrb $1, %xmm1, %eax
5565; AVX1-NEXT: movb %al, %sil
5566; AVX1-NEXT: shlb $7, %sil
5567; AVX1-NEXT: movb %al, %dl
5568; AVX1-NEXT: shlb $5, %dl
5569; AVX1-NEXT: andb $64, %dl
5570; AVX1-NEXT: movb %al, %cl
5571; AVX1-NEXT: shlb $3, %cl
5572; AVX1-NEXT: andb $32, %cl
5573; AVX1-NEXT: orb %dl, %cl
5574; AVX1-NEXT: movb %al, %dl
5575; AVX1-NEXT: addb %dl, %dl
5576; AVX1-NEXT: andb $16, %dl
5577; AVX1-NEXT: orb %cl, %dl
5578; AVX1-NEXT: movb %al, %cl
5579; AVX1-NEXT: shrb %cl
5580; AVX1-NEXT: andb $8, %cl
5581; AVX1-NEXT: orb %dl, %cl
5582; AVX1-NEXT: movb %al, %dl
5583; AVX1-NEXT: shrb $3, %dl
5584; AVX1-NEXT: andb $4, %dl
5585; AVX1-NEXT: orb %cl, %dl
5586; AVX1-NEXT: movb %al, %cl
5587; AVX1-NEXT: shrb $5, %cl
5588; AVX1-NEXT: andb $2, %cl
5589; AVX1-NEXT: orb %dl, %cl
5590; AVX1-NEXT: shrb $7, %al
5591; AVX1-NEXT: orb %cl, %al
5592; AVX1-NEXT: orb %sil, %al
5593; AVX1-NEXT: movzbl %al, %esi
5594; AVX1-NEXT: vpextrb $0, %xmm1, %ecx
5595; AVX1-NEXT: movb %cl, %dil
5596; AVX1-NEXT: shlb $7, %dil
5597; AVX1-NEXT: movb %cl, %al
5598; AVX1-NEXT: shlb $5, %al
5599; AVX1-NEXT: andb $64, %al
5600; AVX1-NEXT: movb %cl, %dl
5601; AVX1-NEXT: shlb $3, %dl
5602; AVX1-NEXT: andb $32, %dl
5603; AVX1-NEXT: orb %al, %dl
5604; AVX1-NEXT: movb %cl, %al
5605; AVX1-NEXT: addb %al, %al
5606; AVX1-NEXT: andb $16, %al
5607; AVX1-NEXT: orb %dl, %al
5608; AVX1-NEXT: movb %cl, %dl
5609; AVX1-NEXT: shrb %dl
5610; AVX1-NEXT: andb $8, %dl
5611; AVX1-NEXT: orb %al, %dl
5612; AVX1-NEXT: movb %cl, %al
5613; AVX1-NEXT: shrb $3, %al
5614; AVX1-NEXT: andb $4, %al
5615; AVX1-NEXT: orb %dl, %al
5616; AVX1-NEXT: movb %cl, %dl
5617; AVX1-NEXT: shrb $5, %dl
5618; AVX1-NEXT: andb $2, %dl
5619; AVX1-NEXT: orb %al, %dl
5620; AVX1-NEXT: shrb $7, %cl
5621; AVX1-NEXT: orb %dl, %cl
5622; AVX1-NEXT: orb %dil, %cl
5623; AVX1-NEXT: movzbl %cl, %eax
5624; AVX1-NEXT: vmovd %eax, %xmm2
5625; AVX1-NEXT: vpinsrb $1, %esi, %xmm2, %xmm2
5626; AVX1-NEXT: vpextrb $2, %xmm1, %eax
5627; AVX1-NEXT: movb %al, %sil
5628; AVX1-NEXT: shlb $7, %sil
5629; AVX1-NEXT: movb %al, %dl
5630; AVX1-NEXT: shlb $5, %dl
5631; AVX1-NEXT: andb $64, %dl
5632; AVX1-NEXT: movb %al, %cl
5633; AVX1-NEXT: shlb $3, %cl
5634; AVX1-NEXT: andb $32, %cl
5635; AVX1-NEXT: orb %dl, %cl
5636; AVX1-NEXT: movb %al, %dl
5637; AVX1-NEXT: addb %dl, %dl
5638; AVX1-NEXT: andb $16, %dl
5639; AVX1-NEXT: orb %cl, %dl
5640; AVX1-NEXT: movb %al, %cl
5641; AVX1-NEXT: shrb %cl
5642; AVX1-NEXT: andb $8, %cl
5643; AVX1-NEXT: orb %dl, %cl
5644; AVX1-NEXT: movb %al, %dl
5645; AVX1-NEXT: shrb $3, %dl
5646; AVX1-NEXT: andb $4, %dl
5647; AVX1-NEXT: orb %cl, %dl
5648; AVX1-NEXT: movb %al, %cl
5649; AVX1-NEXT: shrb $5, %cl
5650; AVX1-NEXT: andb $2, %cl
5651; AVX1-NEXT: orb %dl, %cl
5652; AVX1-NEXT: shrb $7, %al
5653; AVX1-NEXT: orb %cl, %al
5654; AVX1-NEXT: orb %sil, %al
5655; AVX1-NEXT: movzbl %al, %eax
5656; AVX1-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
5657; AVX1-NEXT: vpextrb $3, %xmm1, %eax
5658; AVX1-NEXT: movb %al, %sil
5659; AVX1-NEXT: shlb $7, %sil
5660; AVX1-NEXT: movb %al, %dl
5661; AVX1-NEXT: shlb $5, %dl
5662; AVX1-NEXT: andb $64, %dl
5663; AVX1-NEXT: movb %al, %cl
5664; AVX1-NEXT: shlb $3, %cl
5665; AVX1-NEXT: andb $32, %cl
5666; AVX1-NEXT: orb %dl, %cl
5667; AVX1-NEXT: movb %al, %dl
5668; AVX1-NEXT: addb %dl, %dl
5669; AVX1-NEXT: andb $16, %dl
5670; AVX1-NEXT: orb %cl, %dl
5671; AVX1-NEXT: movb %al, %cl
5672; AVX1-NEXT: shrb %cl
5673; AVX1-NEXT: andb $8, %cl
5674; AVX1-NEXT: orb %dl, %cl
5675; AVX1-NEXT: movb %al, %dl
5676; AVX1-NEXT: shrb $3, %dl
5677; AVX1-NEXT: andb $4, %dl
5678; AVX1-NEXT: orb %cl, %dl
5679; AVX1-NEXT: movb %al, %cl
5680; AVX1-NEXT: shrb $5, %cl
5681; AVX1-NEXT: andb $2, %cl
5682; AVX1-NEXT: orb %dl, %cl
5683; AVX1-NEXT: shrb $7, %al
5684; AVX1-NEXT: orb %cl, %al
5685; AVX1-NEXT: orb %sil, %al
5686; AVX1-NEXT: movzbl %al, %eax
5687; AVX1-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
5688; AVX1-NEXT: vpextrb $4, %xmm1, %eax
5689; AVX1-NEXT: movb %al, %sil
5690; AVX1-NEXT: shlb $7, %sil
5691; AVX1-NEXT: movb %al, %dl
5692; AVX1-NEXT: shlb $5, %dl
5693; AVX1-NEXT: andb $64, %dl
5694; AVX1-NEXT: movb %al, %cl
5695; AVX1-NEXT: shlb $3, %cl
5696; AVX1-NEXT: andb $32, %cl
5697; AVX1-NEXT: orb %dl, %cl
5698; AVX1-NEXT: movb %al, %dl
5699; AVX1-NEXT: addb %dl, %dl
5700; AVX1-NEXT: andb $16, %dl
5701; AVX1-NEXT: orb %cl, %dl
5702; AVX1-NEXT: movb %al, %cl
5703; AVX1-NEXT: shrb %cl
5704; AVX1-NEXT: andb $8, %cl
5705; AVX1-NEXT: orb %dl, %cl
5706; AVX1-NEXT: movb %al, %dl
5707; AVX1-NEXT: shrb $3, %dl
5708; AVX1-NEXT: andb $4, %dl
5709; AVX1-NEXT: orb %cl, %dl
5710; AVX1-NEXT: movb %al, %cl
5711; AVX1-NEXT: shrb $5, %cl
5712; AVX1-NEXT: andb $2, %cl
5713; AVX1-NEXT: orb %dl, %cl
5714; AVX1-NEXT: shrb $7, %al
5715; AVX1-NEXT: orb %cl, %al
5716; AVX1-NEXT: orb %sil, %al
5717; AVX1-NEXT: movzbl %al, %eax
5718; AVX1-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
5719; AVX1-NEXT: vpextrb $5, %xmm1, %eax
5720; AVX1-NEXT: movb %al, %sil
5721; AVX1-NEXT: shlb $7, %sil
5722; AVX1-NEXT: movb %al, %dl
5723; AVX1-NEXT: shlb $5, %dl
5724; AVX1-NEXT: andb $64, %dl
5725; AVX1-NEXT: movb %al, %cl
5726; AVX1-NEXT: shlb $3, %cl
5727; AVX1-NEXT: andb $32, %cl
5728; AVX1-NEXT: orb %dl, %cl
5729; AVX1-NEXT: movb %al, %dl
5730; AVX1-NEXT: addb %dl, %dl
5731; AVX1-NEXT: andb $16, %dl
5732; AVX1-NEXT: orb %cl, %dl
5733; AVX1-NEXT: movb %al, %cl
5734; AVX1-NEXT: shrb %cl
5735; AVX1-NEXT: andb $8, %cl
5736; AVX1-NEXT: orb %dl, %cl
5737; AVX1-NEXT: movb %al, %dl
5738; AVX1-NEXT: shrb $3, %dl
5739; AVX1-NEXT: andb $4, %dl
5740; AVX1-NEXT: orb %cl, %dl
5741; AVX1-NEXT: movb %al, %cl
5742; AVX1-NEXT: shrb $5, %cl
5743; AVX1-NEXT: andb $2, %cl
5744; AVX1-NEXT: orb %dl, %cl
5745; AVX1-NEXT: shrb $7, %al
5746; AVX1-NEXT: orb %cl, %al
5747; AVX1-NEXT: orb %sil, %al
5748; AVX1-NEXT: movzbl %al, %eax
5749; AVX1-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
5750; AVX1-NEXT: vpextrb $6, %xmm1, %eax
5751; AVX1-NEXT: movb %al, %sil
5752; AVX1-NEXT: shlb $7, %sil
5753; AVX1-NEXT: movb %al, %dl
5754; AVX1-NEXT: shlb $5, %dl
5755; AVX1-NEXT: andb $64, %dl
5756; AVX1-NEXT: movb %al, %cl
5757; AVX1-NEXT: shlb $3, %cl
5758; AVX1-NEXT: andb $32, %cl
5759; AVX1-NEXT: orb %dl, %cl
5760; AVX1-NEXT: movb %al, %dl
5761; AVX1-NEXT: addb %dl, %dl
5762; AVX1-NEXT: andb $16, %dl
5763; AVX1-NEXT: orb %cl, %dl
5764; AVX1-NEXT: movb %al, %cl
5765; AVX1-NEXT: shrb %cl
5766; AVX1-NEXT: andb $8, %cl
5767; AVX1-NEXT: orb %dl, %cl
5768; AVX1-NEXT: movb %al, %dl
5769; AVX1-NEXT: shrb $3, %dl
5770; AVX1-NEXT: andb $4, %dl
5771; AVX1-NEXT: orb %cl, %dl
5772; AVX1-NEXT: movb %al, %cl
5773; AVX1-NEXT: shrb $5, %cl
5774; AVX1-NEXT: andb $2, %cl
5775; AVX1-NEXT: orb %dl, %cl
5776; AVX1-NEXT: shrb $7, %al
5777; AVX1-NEXT: orb %cl, %al
5778; AVX1-NEXT: orb %sil, %al
5779; AVX1-NEXT: movzbl %al, %eax
5780; AVX1-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
5781; AVX1-NEXT: vpextrb $7, %xmm1, %eax
5782; AVX1-NEXT: movb %al, %sil
5783; AVX1-NEXT: shlb $7, %sil
5784; AVX1-NEXT: movb %al, %dl
5785; AVX1-NEXT: shlb $5, %dl
5786; AVX1-NEXT: andb $64, %dl
5787; AVX1-NEXT: movb %al, %cl
5788; AVX1-NEXT: shlb $3, %cl
5789; AVX1-NEXT: andb $32, %cl
5790; AVX1-NEXT: orb %dl, %cl
5791; AVX1-NEXT: movb %al, %dl
5792; AVX1-NEXT: addb %dl, %dl
5793; AVX1-NEXT: andb $16, %dl
5794; AVX1-NEXT: orb %cl, %dl
5795; AVX1-NEXT: movb %al, %cl
5796; AVX1-NEXT: shrb %cl
5797; AVX1-NEXT: andb $8, %cl
5798; AVX1-NEXT: orb %dl, %cl
5799; AVX1-NEXT: movb %al, %dl
5800; AVX1-NEXT: shrb $3, %dl
5801; AVX1-NEXT: andb $4, %dl
5802; AVX1-NEXT: orb %cl, %dl
5803; AVX1-NEXT: movb %al, %cl
5804; AVX1-NEXT: shrb $5, %cl
5805; AVX1-NEXT: andb $2, %cl
5806; AVX1-NEXT: orb %dl, %cl
5807; AVX1-NEXT: shrb $7, %al
5808; AVX1-NEXT: orb %cl, %al
5809; AVX1-NEXT: orb %sil, %al
5810; AVX1-NEXT: movzbl %al, %eax
5811; AVX1-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
5812; AVX1-NEXT: vpextrb $8, %xmm1, %eax
5813; AVX1-NEXT: movb %al, %sil
5814; AVX1-NEXT: shlb $7, %sil
5815; AVX1-NEXT: movb %al, %dl
5816; AVX1-NEXT: shlb $5, %dl
5817; AVX1-NEXT: andb $64, %dl
5818; AVX1-NEXT: movb %al, %cl
5819; AVX1-NEXT: shlb $3, %cl
5820; AVX1-NEXT: andb $32, %cl
5821; AVX1-NEXT: orb %dl, %cl
5822; AVX1-NEXT: movb %al, %dl
5823; AVX1-NEXT: addb %dl, %dl
5824; AVX1-NEXT: andb $16, %dl
5825; AVX1-NEXT: orb %cl, %dl
5826; AVX1-NEXT: movb %al, %cl
5827; AVX1-NEXT: shrb %cl
5828; AVX1-NEXT: andb $8, %cl
5829; AVX1-NEXT: orb %dl, %cl
5830; AVX1-NEXT: movb %al, %dl
5831; AVX1-NEXT: shrb $3, %dl
5832; AVX1-NEXT: andb $4, %dl
5833; AVX1-NEXT: orb %cl, %dl
5834; AVX1-NEXT: movb %al, %cl
5835; AVX1-NEXT: shrb $5, %cl
5836; AVX1-NEXT: andb $2, %cl
5837; AVX1-NEXT: orb %dl, %cl
5838; AVX1-NEXT: shrb $7, %al
5839; AVX1-NEXT: orb %cl, %al
5840; AVX1-NEXT: orb %sil, %al
5841; AVX1-NEXT: movzbl %al, %eax
5842; AVX1-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
5843; AVX1-NEXT: vpextrb $9, %xmm1, %eax
5844; AVX1-NEXT: movb %al, %sil
5845; AVX1-NEXT: shlb $7, %sil
5846; AVX1-NEXT: movb %al, %dl
5847; AVX1-NEXT: shlb $5, %dl
5848; AVX1-NEXT: andb $64, %dl
5849; AVX1-NEXT: movb %al, %cl
5850; AVX1-NEXT: shlb $3, %cl
5851; AVX1-NEXT: andb $32, %cl
5852; AVX1-NEXT: orb %dl, %cl
5853; AVX1-NEXT: movb %al, %dl
5854; AVX1-NEXT: addb %dl, %dl
5855; AVX1-NEXT: andb $16, %dl
5856; AVX1-NEXT: orb %cl, %dl
5857; AVX1-NEXT: movb %al, %cl
5858; AVX1-NEXT: shrb %cl
5859; AVX1-NEXT: andb $8, %cl
5860; AVX1-NEXT: orb %dl, %cl
5861; AVX1-NEXT: movb %al, %dl
5862; AVX1-NEXT: shrb $3, %dl
5863; AVX1-NEXT: andb $4, %dl
5864; AVX1-NEXT: orb %cl, %dl
5865; AVX1-NEXT: movb %al, %cl
5866; AVX1-NEXT: shrb $5, %cl
5867; AVX1-NEXT: andb $2, %cl
5868; AVX1-NEXT: orb %dl, %cl
5869; AVX1-NEXT: shrb $7, %al
5870; AVX1-NEXT: orb %cl, %al
5871; AVX1-NEXT: orb %sil, %al
5872; AVX1-NEXT: movzbl %al, %eax
5873; AVX1-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
5874; AVX1-NEXT: vpextrb $10, %xmm1, %eax
5875; AVX1-NEXT: movb %al, %sil
5876; AVX1-NEXT: shlb $7, %sil
5877; AVX1-NEXT: movb %al, %dl
5878; AVX1-NEXT: shlb $5, %dl
5879; AVX1-NEXT: andb $64, %dl
5880; AVX1-NEXT: movb %al, %cl
5881; AVX1-NEXT: shlb $3, %cl
5882; AVX1-NEXT: andb $32, %cl
5883; AVX1-NEXT: orb %dl, %cl
5884; AVX1-NEXT: movb %al, %dl
5885; AVX1-NEXT: addb %dl, %dl
5886; AVX1-NEXT: andb $16, %dl
5887; AVX1-NEXT: orb %cl, %dl
5888; AVX1-NEXT: movb %al, %cl
5889; AVX1-NEXT: shrb %cl
5890; AVX1-NEXT: andb $8, %cl
5891; AVX1-NEXT: orb %dl, %cl
5892; AVX1-NEXT: movb %al, %dl
5893; AVX1-NEXT: shrb $3, %dl
5894; AVX1-NEXT: andb $4, %dl
5895; AVX1-NEXT: orb %cl, %dl
5896; AVX1-NEXT: movb %al, %cl
5897; AVX1-NEXT: shrb $5, %cl
5898; AVX1-NEXT: andb $2, %cl
5899; AVX1-NEXT: orb %dl, %cl
5900; AVX1-NEXT: shrb $7, %al
5901; AVX1-NEXT: orb %cl, %al
5902; AVX1-NEXT: orb %sil, %al
5903; AVX1-NEXT: movzbl %al, %eax
5904; AVX1-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
5905; AVX1-NEXT: vpextrb $11, %xmm1, %eax
5906; AVX1-NEXT: movb %al, %sil
5907; AVX1-NEXT: shlb $7, %sil
5908; AVX1-NEXT: movb %al, %dl
5909; AVX1-NEXT: shlb $5, %dl
5910; AVX1-NEXT: andb $64, %dl
5911; AVX1-NEXT: movb %al, %cl
5912; AVX1-NEXT: shlb $3, %cl
5913; AVX1-NEXT: andb $32, %cl
5914; AVX1-NEXT: orb %dl, %cl
5915; AVX1-NEXT: movb %al, %dl
5916; AVX1-NEXT: addb %dl, %dl
5917; AVX1-NEXT: andb $16, %dl
5918; AVX1-NEXT: orb %cl, %dl
5919; AVX1-NEXT: movb %al, %cl
5920; AVX1-NEXT: shrb %cl
5921; AVX1-NEXT: andb $8, %cl
5922; AVX1-NEXT: orb %dl, %cl
5923; AVX1-NEXT: movb %al, %dl
5924; AVX1-NEXT: shrb $3, %dl
5925; AVX1-NEXT: andb $4, %dl
5926; AVX1-NEXT: orb %cl, %dl
5927; AVX1-NEXT: movb %al, %cl
5928; AVX1-NEXT: shrb $5, %cl
5929; AVX1-NEXT: andb $2, %cl
5930; AVX1-NEXT: orb %dl, %cl
5931; AVX1-NEXT: shrb $7, %al
5932; AVX1-NEXT: orb %cl, %al
5933; AVX1-NEXT: orb %sil, %al
5934; AVX1-NEXT: movzbl %al, %eax
5935; AVX1-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
5936; AVX1-NEXT: vpextrb $12, %xmm1, %eax
5937; AVX1-NEXT: movb %al, %sil
5938; AVX1-NEXT: shlb $7, %sil
5939; AVX1-NEXT: movb %al, %dl
5940; AVX1-NEXT: shlb $5, %dl
5941; AVX1-NEXT: andb $64, %dl
5942; AVX1-NEXT: movb %al, %cl
5943; AVX1-NEXT: shlb $3, %cl
5944; AVX1-NEXT: andb $32, %cl
5945; AVX1-NEXT: orb %dl, %cl
5946; AVX1-NEXT: movb %al, %dl
5947; AVX1-NEXT: addb %dl, %dl
5948; AVX1-NEXT: andb $16, %dl
5949; AVX1-NEXT: orb %cl, %dl
5950; AVX1-NEXT: movb %al, %cl
5951; AVX1-NEXT: shrb %cl
5952; AVX1-NEXT: andb $8, %cl
5953; AVX1-NEXT: orb %dl, %cl
5954; AVX1-NEXT: movb %al, %dl
5955; AVX1-NEXT: shrb $3, %dl
5956; AVX1-NEXT: andb $4, %dl
5957; AVX1-NEXT: orb %cl, %dl
5958; AVX1-NEXT: movb %al, %cl
5959; AVX1-NEXT: shrb $5, %cl
5960; AVX1-NEXT: andb $2, %cl
5961; AVX1-NEXT: orb %dl, %cl
5962; AVX1-NEXT: shrb $7, %al
5963; AVX1-NEXT: orb %cl, %al
5964; AVX1-NEXT: orb %sil, %al
5965; AVX1-NEXT: movzbl %al, %eax
5966; AVX1-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
5967; AVX1-NEXT: vpextrb $13, %xmm1, %eax
5968; AVX1-NEXT: movb %al, %sil
5969; AVX1-NEXT: shlb $7, %sil
5970; AVX1-NEXT: movb %al, %dl
5971; AVX1-NEXT: shlb $5, %dl
5972; AVX1-NEXT: andb $64, %dl
5973; AVX1-NEXT: movb %al, %cl
5974; AVX1-NEXT: shlb $3, %cl
5975; AVX1-NEXT: andb $32, %cl
5976; AVX1-NEXT: orb %dl, %cl
5977; AVX1-NEXT: movb %al, %dl
5978; AVX1-NEXT: addb %dl, %dl
5979; AVX1-NEXT: andb $16, %dl
5980; AVX1-NEXT: orb %cl, %dl
5981; AVX1-NEXT: movb %al, %cl
5982; AVX1-NEXT: shrb %cl
5983; AVX1-NEXT: andb $8, %cl
5984; AVX1-NEXT: orb %dl, %cl
5985; AVX1-NEXT: movb %al, %dl
5986; AVX1-NEXT: shrb $3, %dl
5987; AVX1-NEXT: andb $4, %dl
5988; AVX1-NEXT: orb %cl, %dl
5989; AVX1-NEXT: movb %al, %cl
5990; AVX1-NEXT: shrb $5, %cl
5991; AVX1-NEXT: andb $2, %cl
5992; AVX1-NEXT: orb %dl, %cl
5993; AVX1-NEXT: shrb $7, %al
5994; AVX1-NEXT: orb %cl, %al
5995; AVX1-NEXT: orb %sil, %al
5996; AVX1-NEXT: movzbl %al, %eax
5997; AVX1-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
5998; AVX1-NEXT: vpextrb $14, %xmm1, %eax
5999; AVX1-NEXT: movb %al, %sil
6000; AVX1-NEXT: shlb $7, %sil
6001; AVX1-NEXT: movb %al, %dl
6002; AVX1-NEXT: shlb $5, %dl
6003; AVX1-NEXT: andb $64, %dl
6004; AVX1-NEXT: movb %al, %cl
6005; AVX1-NEXT: shlb $3, %cl
6006; AVX1-NEXT: andb $32, %cl
6007; AVX1-NEXT: orb %dl, %cl
6008; AVX1-NEXT: movb %al, %dl
6009; AVX1-NEXT: addb %dl, %dl
6010; AVX1-NEXT: andb $16, %dl
6011; AVX1-NEXT: orb %cl, %dl
6012; AVX1-NEXT: movb %al, %cl
6013; AVX1-NEXT: shrb %cl
6014; AVX1-NEXT: andb $8, %cl
6015; AVX1-NEXT: orb %dl, %cl
6016; AVX1-NEXT: movb %al, %dl
6017; AVX1-NEXT: shrb $3, %dl
6018; AVX1-NEXT: andb $4, %dl
6019; AVX1-NEXT: orb %cl, %dl
6020; AVX1-NEXT: movb %al, %cl
6021; AVX1-NEXT: shrb $5, %cl
6022; AVX1-NEXT: andb $2, %cl
6023; AVX1-NEXT: orb %dl, %cl
6024; AVX1-NEXT: shrb $7, %al
6025; AVX1-NEXT: orb %cl, %al
6026; AVX1-NEXT: orb %sil, %al
6027; AVX1-NEXT: movzbl %al, %eax
6028; AVX1-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
6029; AVX1-NEXT: vpextrb $15, %xmm1, %eax
6030; AVX1-NEXT: movb %al, %sil
6031; AVX1-NEXT: shlb $7, %sil
6032; AVX1-NEXT: movb %al, %dl
6033; AVX1-NEXT: shlb $5, %dl
6034; AVX1-NEXT: andb $64, %dl
6035; AVX1-NEXT: movb %al, %cl
6036; AVX1-NEXT: shlb $3, %cl
6037; AVX1-NEXT: andb $32, %cl
6038; AVX1-NEXT: orb %dl, %cl
6039; AVX1-NEXT: movb %al, %dl
6040; AVX1-NEXT: addb %dl, %dl
6041; AVX1-NEXT: andb $16, %dl
6042; AVX1-NEXT: orb %cl, %dl
6043; AVX1-NEXT: movb %al, %cl
6044; AVX1-NEXT: shrb %cl
6045; AVX1-NEXT: andb $8, %cl
6046; AVX1-NEXT: orb %dl, %cl
6047; AVX1-NEXT: movb %al, %dl
6048; AVX1-NEXT: shrb $3, %dl
6049; AVX1-NEXT: andb $4, %dl
6050; AVX1-NEXT: orb %cl, %dl
6051; AVX1-NEXT: movb %al, %cl
6052; AVX1-NEXT: shrb $5, %cl
6053; AVX1-NEXT: andb $2, %cl
6054; AVX1-NEXT: orb %dl, %cl
6055; AVX1-NEXT: shrb $7, %al
6056; AVX1-NEXT: orb %cl, %al
6057; AVX1-NEXT: orb %sil, %al
6058; AVX1-NEXT: movzbl %al, %eax
6059; AVX1-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1
6060; AVX1-NEXT: vpextrb $1, %xmm0, %eax
6061; AVX1-NEXT: movb %al, %sil
6062; AVX1-NEXT: shlb $7, %sil
6063; AVX1-NEXT: movb %al, %dl
6064; AVX1-NEXT: shlb $5, %dl
6065; AVX1-NEXT: andb $64, %dl
6066; AVX1-NEXT: movb %al, %cl
6067; AVX1-NEXT: shlb $3, %cl
6068; AVX1-NEXT: andb $32, %cl
6069; AVX1-NEXT: orb %dl, %cl
6070; AVX1-NEXT: movb %al, %dl
6071; AVX1-NEXT: addb %dl, %dl
6072; AVX1-NEXT: andb $16, %dl
6073; AVX1-NEXT: orb %cl, %dl
6074; AVX1-NEXT: movb %al, %cl
6075; AVX1-NEXT: shrb %cl
6076; AVX1-NEXT: andb $8, %cl
6077; AVX1-NEXT: orb %dl, %cl
6078; AVX1-NEXT: movb %al, %dl
6079; AVX1-NEXT: shrb $3, %dl
6080; AVX1-NEXT: andb $4, %dl
6081; AVX1-NEXT: orb %cl, %dl
6082; AVX1-NEXT: movb %al, %cl
6083; AVX1-NEXT: shrb $5, %cl
6084; AVX1-NEXT: andb $2, %cl
6085; AVX1-NEXT: orb %dl, %cl
6086; AVX1-NEXT: shrb $7, %al
6087; AVX1-NEXT: orb %cl, %al
6088; AVX1-NEXT: orb %sil, %al
6089; AVX1-NEXT: movzbl %al, %esi
6090; AVX1-NEXT: vpextrb $0, %xmm0, %ecx
6091; AVX1-NEXT: movb %cl, %dil
6092; AVX1-NEXT: shlb $7, %dil
6093; AVX1-NEXT: movb %cl, %al
6094; AVX1-NEXT: shlb $5, %al
6095; AVX1-NEXT: andb $64, %al
6096; AVX1-NEXT: movb %cl, %dl
6097; AVX1-NEXT: shlb $3, %dl
6098; AVX1-NEXT: andb $32, %dl
6099; AVX1-NEXT: orb %al, %dl
6100; AVX1-NEXT: movb %cl, %al
6101; AVX1-NEXT: addb %al, %al
6102; AVX1-NEXT: andb $16, %al
6103; AVX1-NEXT: orb %dl, %al
6104; AVX1-NEXT: movb %cl, %dl
6105; AVX1-NEXT: shrb %dl
6106; AVX1-NEXT: andb $8, %dl
6107; AVX1-NEXT: orb %al, %dl
6108; AVX1-NEXT: movb %cl, %al
6109; AVX1-NEXT: shrb $3, %al
6110; AVX1-NEXT: andb $4, %al
6111; AVX1-NEXT: orb %dl, %al
6112; AVX1-NEXT: movb %cl, %dl
6113; AVX1-NEXT: shrb $5, %dl
6114; AVX1-NEXT: andb $2, %dl
6115; AVX1-NEXT: orb %al, %dl
6116; AVX1-NEXT: shrb $7, %cl
6117; AVX1-NEXT: orb %dl, %cl
6118; AVX1-NEXT: orb %dil, %cl
6119; AVX1-NEXT: movzbl %cl, %eax
6120; AVX1-NEXT: vmovd %eax, %xmm2
6121; AVX1-NEXT: vpinsrb $1, %esi, %xmm2, %xmm2
6122; AVX1-NEXT: vpextrb $2, %xmm0, %eax
6123; AVX1-NEXT: movb %al, %sil
6124; AVX1-NEXT: shlb $7, %sil
6125; AVX1-NEXT: movb %al, %dl
6126; AVX1-NEXT: shlb $5, %dl
6127; AVX1-NEXT: andb $64, %dl
6128; AVX1-NEXT: movb %al, %cl
6129; AVX1-NEXT: shlb $3, %cl
6130; AVX1-NEXT: andb $32, %cl
6131; AVX1-NEXT: orb %dl, %cl
6132; AVX1-NEXT: movb %al, %dl
6133; AVX1-NEXT: addb %dl, %dl
6134; AVX1-NEXT: andb $16, %dl
6135; AVX1-NEXT: orb %cl, %dl
6136; AVX1-NEXT: movb %al, %cl
6137; AVX1-NEXT: shrb %cl
6138; AVX1-NEXT: andb $8, %cl
6139; AVX1-NEXT: orb %dl, %cl
6140; AVX1-NEXT: movb %al, %dl
6141; AVX1-NEXT: shrb $3, %dl
6142; AVX1-NEXT: andb $4, %dl
6143; AVX1-NEXT: orb %cl, %dl
6144; AVX1-NEXT: movb %al, %cl
6145; AVX1-NEXT: shrb $5, %cl
6146; AVX1-NEXT: andb $2, %cl
6147; AVX1-NEXT: orb %dl, %cl
6148; AVX1-NEXT: shrb $7, %al
6149; AVX1-NEXT: orb %cl, %al
6150; AVX1-NEXT: orb %sil, %al
6151; AVX1-NEXT: movzbl %al, %eax
6152; AVX1-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
6153; AVX1-NEXT: vpextrb $3, %xmm0, %eax
6154; AVX1-NEXT: movb %al, %sil
6155; AVX1-NEXT: shlb $7, %sil
6156; AVX1-NEXT: movb %al, %dl
6157; AVX1-NEXT: shlb $5, %dl
6158; AVX1-NEXT: andb $64, %dl
6159; AVX1-NEXT: movb %al, %cl
6160; AVX1-NEXT: shlb $3, %cl
6161; AVX1-NEXT: andb $32, %cl
6162; AVX1-NEXT: orb %dl, %cl
6163; AVX1-NEXT: movb %al, %dl
6164; AVX1-NEXT: addb %dl, %dl
6165; AVX1-NEXT: andb $16, %dl
6166; AVX1-NEXT: orb %cl, %dl
6167; AVX1-NEXT: movb %al, %cl
6168; AVX1-NEXT: shrb %cl
6169; AVX1-NEXT: andb $8, %cl
6170; AVX1-NEXT: orb %dl, %cl
6171; AVX1-NEXT: movb %al, %dl
6172; AVX1-NEXT: shrb $3, %dl
6173; AVX1-NEXT: andb $4, %dl
6174; AVX1-NEXT: orb %cl, %dl
6175; AVX1-NEXT: movb %al, %cl
6176; AVX1-NEXT: shrb $5, %cl
6177; AVX1-NEXT: andb $2, %cl
6178; AVX1-NEXT: orb %dl, %cl
6179; AVX1-NEXT: shrb $7, %al
6180; AVX1-NEXT: orb %cl, %al
6181; AVX1-NEXT: orb %sil, %al
6182; AVX1-NEXT: movzbl %al, %eax
6183; AVX1-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
6184; AVX1-NEXT: vpextrb $4, %xmm0, %eax
6185; AVX1-NEXT: movb %al, %sil
6186; AVX1-NEXT: shlb $7, %sil
6187; AVX1-NEXT: movb %al, %dl
6188; AVX1-NEXT: shlb $5, %dl
6189; AVX1-NEXT: andb $64, %dl
6190; AVX1-NEXT: movb %al, %cl
6191; AVX1-NEXT: shlb $3, %cl
6192; AVX1-NEXT: andb $32, %cl
6193; AVX1-NEXT: orb %dl, %cl
6194; AVX1-NEXT: movb %al, %dl
6195; AVX1-NEXT: addb %dl, %dl
6196; AVX1-NEXT: andb $16, %dl
6197; AVX1-NEXT: orb %cl, %dl
6198; AVX1-NEXT: movb %al, %cl
6199; AVX1-NEXT: shrb %cl
6200; AVX1-NEXT: andb $8, %cl
6201; AVX1-NEXT: orb %dl, %cl
6202; AVX1-NEXT: movb %al, %dl
6203; AVX1-NEXT: shrb $3, %dl
6204; AVX1-NEXT: andb $4, %dl
6205; AVX1-NEXT: orb %cl, %dl
6206; AVX1-NEXT: movb %al, %cl
6207; AVX1-NEXT: shrb $5, %cl
6208; AVX1-NEXT: andb $2, %cl
6209; AVX1-NEXT: orb %dl, %cl
6210; AVX1-NEXT: shrb $7, %al
6211; AVX1-NEXT: orb %cl, %al
6212; AVX1-NEXT: orb %sil, %al
6213; AVX1-NEXT: movzbl %al, %eax
6214; AVX1-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
6215; AVX1-NEXT: vpextrb $5, %xmm0, %eax
6216; AVX1-NEXT: movb %al, %sil
6217; AVX1-NEXT: shlb $7, %sil
6218; AVX1-NEXT: movb %al, %dl
6219; AVX1-NEXT: shlb $5, %dl
6220; AVX1-NEXT: andb $64, %dl
6221; AVX1-NEXT: movb %al, %cl
6222; AVX1-NEXT: shlb $3, %cl
6223; AVX1-NEXT: andb $32, %cl
6224; AVX1-NEXT: orb %dl, %cl
6225; AVX1-NEXT: movb %al, %dl
6226; AVX1-NEXT: addb %dl, %dl
6227; AVX1-NEXT: andb $16, %dl
6228; AVX1-NEXT: orb %cl, %dl
6229; AVX1-NEXT: movb %al, %cl
6230; AVX1-NEXT: shrb %cl
6231; AVX1-NEXT: andb $8, %cl
6232; AVX1-NEXT: orb %dl, %cl
6233; AVX1-NEXT: movb %al, %dl
6234; AVX1-NEXT: shrb $3, %dl
6235; AVX1-NEXT: andb $4, %dl
6236; AVX1-NEXT: orb %cl, %dl
6237; AVX1-NEXT: movb %al, %cl
6238; AVX1-NEXT: shrb $5, %cl
6239; AVX1-NEXT: andb $2, %cl
6240; AVX1-NEXT: orb %dl, %cl
6241; AVX1-NEXT: shrb $7, %al
6242; AVX1-NEXT: orb %cl, %al
6243; AVX1-NEXT: orb %sil, %al
6244; AVX1-NEXT: movzbl %al, %eax
6245; AVX1-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
6246; AVX1-NEXT: vpextrb $6, %xmm0, %eax
6247; AVX1-NEXT: movb %al, %sil
6248; AVX1-NEXT: shlb $7, %sil
6249; AVX1-NEXT: movb %al, %dl
6250; AVX1-NEXT: shlb $5, %dl
6251; AVX1-NEXT: andb $64, %dl
6252; AVX1-NEXT: movb %al, %cl
6253; AVX1-NEXT: shlb $3, %cl
6254; AVX1-NEXT: andb $32, %cl
6255; AVX1-NEXT: orb %dl, %cl
6256; AVX1-NEXT: movb %al, %dl
6257; AVX1-NEXT: addb %dl, %dl
6258; AVX1-NEXT: andb $16, %dl
6259; AVX1-NEXT: orb %cl, %dl
6260; AVX1-NEXT: movb %al, %cl
6261; AVX1-NEXT: shrb %cl
6262; AVX1-NEXT: andb $8, %cl
6263; AVX1-NEXT: orb %dl, %cl
6264; AVX1-NEXT: movb %al, %dl
6265; AVX1-NEXT: shrb $3, %dl
6266; AVX1-NEXT: andb $4, %dl
6267; AVX1-NEXT: orb %cl, %dl
6268; AVX1-NEXT: movb %al, %cl
6269; AVX1-NEXT: shrb $5, %cl
6270; AVX1-NEXT: andb $2, %cl
6271; AVX1-NEXT: orb %dl, %cl
6272; AVX1-NEXT: shrb $7, %al
6273; AVX1-NEXT: orb %cl, %al
6274; AVX1-NEXT: orb %sil, %al
6275; AVX1-NEXT: movzbl %al, %eax
6276; AVX1-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
6277; AVX1-NEXT: vpextrb $7, %xmm0, %eax
6278; AVX1-NEXT: movb %al, %sil
6279; AVX1-NEXT: shlb $7, %sil
6280; AVX1-NEXT: movb %al, %dl
6281; AVX1-NEXT: shlb $5, %dl
6282; AVX1-NEXT: andb $64, %dl
6283; AVX1-NEXT: movb %al, %cl
6284; AVX1-NEXT: shlb $3, %cl
6285; AVX1-NEXT: andb $32, %cl
6286; AVX1-NEXT: orb %dl, %cl
6287; AVX1-NEXT: movb %al, %dl
6288; AVX1-NEXT: addb %dl, %dl
6289; AVX1-NEXT: andb $16, %dl
6290; AVX1-NEXT: orb %cl, %dl
6291; AVX1-NEXT: movb %al, %cl
6292; AVX1-NEXT: shrb %cl
6293; AVX1-NEXT: andb $8, %cl
6294; AVX1-NEXT: orb %dl, %cl
6295; AVX1-NEXT: movb %al, %dl
6296; AVX1-NEXT: shrb $3, %dl
6297; AVX1-NEXT: andb $4, %dl
6298; AVX1-NEXT: orb %cl, %dl
6299; AVX1-NEXT: movb %al, %cl
6300; AVX1-NEXT: shrb $5, %cl
6301; AVX1-NEXT: andb $2, %cl
6302; AVX1-NEXT: orb %dl, %cl
6303; AVX1-NEXT: shrb $7, %al
6304; AVX1-NEXT: orb %cl, %al
6305; AVX1-NEXT: orb %sil, %al
6306; AVX1-NEXT: movzbl %al, %eax
6307; AVX1-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
6308; AVX1-NEXT: vpextrb $8, %xmm0, %eax
6309; AVX1-NEXT: movb %al, %sil
6310; AVX1-NEXT: shlb $7, %sil
6311; AVX1-NEXT: movb %al, %dl
6312; AVX1-NEXT: shlb $5, %dl
6313; AVX1-NEXT: andb $64, %dl
6314; AVX1-NEXT: movb %al, %cl
6315; AVX1-NEXT: shlb $3, %cl
6316; AVX1-NEXT: andb $32, %cl
6317; AVX1-NEXT: orb %dl, %cl
6318; AVX1-NEXT: movb %al, %dl
6319; AVX1-NEXT: addb %dl, %dl
6320; AVX1-NEXT: andb $16, %dl
6321; AVX1-NEXT: orb %cl, %dl
6322; AVX1-NEXT: movb %al, %cl
6323; AVX1-NEXT: shrb %cl
6324; AVX1-NEXT: andb $8, %cl
6325; AVX1-NEXT: orb %dl, %cl
6326; AVX1-NEXT: movb %al, %dl
6327; AVX1-NEXT: shrb $3, %dl
6328; AVX1-NEXT: andb $4, %dl
6329; AVX1-NEXT: orb %cl, %dl
6330; AVX1-NEXT: movb %al, %cl
6331; AVX1-NEXT: shrb $5, %cl
6332; AVX1-NEXT: andb $2, %cl
6333; AVX1-NEXT: orb %dl, %cl
6334; AVX1-NEXT: shrb $7, %al
6335; AVX1-NEXT: orb %cl, %al
6336; AVX1-NEXT: orb %sil, %al
6337; AVX1-NEXT: movzbl %al, %eax
6338; AVX1-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
6339; AVX1-NEXT: vpextrb $9, %xmm0, %eax
6340; AVX1-NEXT: movb %al, %sil
6341; AVX1-NEXT: shlb $7, %sil
6342; AVX1-NEXT: movb %al, %dl
6343; AVX1-NEXT: shlb $5, %dl
6344; AVX1-NEXT: andb $64, %dl
6345; AVX1-NEXT: movb %al, %cl
6346; AVX1-NEXT: shlb $3, %cl
6347; AVX1-NEXT: andb $32, %cl
6348; AVX1-NEXT: orb %dl, %cl
6349; AVX1-NEXT: movb %al, %dl
6350; AVX1-NEXT: addb %dl, %dl
6351; AVX1-NEXT: andb $16, %dl
6352; AVX1-NEXT: orb %cl, %dl
6353; AVX1-NEXT: movb %al, %cl
6354; AVX1-NEXT: shrb %cl
6355; AVX1-NEXT: andb $8, %cl
6356; AVX1-NEXT: orb %dl, %cl
6357; AVX1-NEXT: movb %al, %dl
6358; AVX1-NEXT: shrb $3, %dl
6359; AVX1-NEXT: andb $4, %dl
6360; AVX1-NEXT: orb %cl, %dl
6361; AVX1-NEXT: movb %al, %cl
6362; AVX1-NEXT: shrb $5, %cl
6363; AVX1-NEXT: andb $2, %cl
6364; AVX1-NEXT: orb %dl, %cl
6365; AVX1-NEXT: shrb $7, %al
6366; AVX1-NEXT: orb %cl, %al
6367; AVX1-NEXT: orb %sil, %al
6368; AVX1-NEXT: movzbl %al, %eax
6369; AVX1-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
6370; AVX1-NEXT: vpextrb $10, %xmm0, %eax
6371; AVX1-NEXT: movb %al, %sil
6372; AVX1-NEXT: shlb $7, %sil
6373; AVX1-NEXT: movb %al, %dl
6374; AVX1-NEXT: shlb $5, %dl
6375; AVX1-NEXT: andb $64, %dl
6376; AVX1-NEXT: movb %al, %cl
6377; AVX1-NEXT: shlb $3, %cl
6378; AVX1-NEXT: andb $32, %cl
6379; AVX1-NEXT: orb %dl, %cl
6380; AVX1-NEXT: movb %al, %dl
6381; AVX1-NEXT: addb %dl, %dl
6382; AVX1-NEXT: andb $16, %dl
6383; AVX1-NEXT: orb %cl, %dl
6384; AVX1-NEXT: movb %al, %cl
6385; AVX1-NEXT: shrb %cl
6386; AVX1-NEXT: andb $8, %cl
6387; AVX1-NEXT: orb %dl, %cl
6388; AVX1-NEXT: movb %al, %dl
6389; AVX1-NEXT: shrb $3, %dl
6390; AVX1-NEXT: andb $4, %dl
6391; AVX1-NEXT: orb %cl, %dl
6392; AVX1-NEXT: movb %al, %cl
6393; AVX1-NEXT: shrb $5, %cl
6394; AVX1-NEXT: andb $2, %cl
6395; AVX1-NEXT: orb %dl, %cl
6396; AVX1-NEXT: shrb $7, %al
6397; AVX1-NEXT: orb %cl, %al
6398; AVX1-NEXT: orb %sil, %al
6399; AVX1-NEXT: movzbl %al, %eax
6400; AVX1-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
6401; AVX1-NEXT: vpextrb $11, %xmm0, %eax
6402; AVX1-NEXT: movb %al, %sil
6403; AVX1-NEXT: shlb $7, %sil
6404; AVX1-NEXT: movb %al, %dl
6405; AVX1-NEXT: shlb $5, %dl
6406; AVX1-NEXT: andb $64, %dl
6407; AVX1-NEXT: movb %al, %cl
6408; AVX1-NEXT: shlb $3, %cl
6409; AVX1-NEXT: andb $32, %cl
6410; AVX1-NEXT: orb %dl, %cl
6411; AVX1-NEXT: movb %al, %dl
6412; AVX1-NEXT: addb %dl, %dl
6413; AVX1-NEXT: andb $16, %dl
6414; AVX1-NEXT: orb %cl, %dl
6415; AVX1-NEXT: movb %al, %cl
6416; AVX1-NEXT: shrb %cl
6417; AVX1-NEXT: andb $8, %cl
6418; AVX1-NEXT: orb %dl, %cl
6419; AVX1-NEXT: movb %al, %dl
6420; AVX1-NEXT: shrb $3, %dl
6421; AVX1-NEXT: andb $4, %dl
6422; AVX1-NEXT: orb %cl, %dl
6423; AVX1-NEXT: movb %al, %cl
6424; AVX1-NEXT: shrb $5, %cl
6425; AVX1-NEXT: andb $2, %cl
6426; AVX1-NEXT: orb %dl, %cl
6427; AVX1-NEXT: shrb $7, %al
6428; AVX1-NEXT: orb %cl, %al
6429; AVX1-NEXT: orb %sil, %al
6430; AVX1-NEXT: movzbl %al, %eax
6431; AVX1-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
6432; AVX1-NEXT: vpextrb $12, %xmm0, %eax
6433; AVX1-NEXT: movb %al, %sil
6434; AVX1-NEXT: shlb $7, %sil
6435; AVX1-NEXT: movb %al, %dl
6436; AVX1-NEXT: shlb $5, %dl
6437; AVX1-NEXT: andb $64, %dl
6438; AVX1-NEXT: movb %al, %cl
6439; AVX1-NEXT: shlb $3, %cl
6440; AVX1-NEXT: andb $32, %cl
6441; AVX1-NEXT: orb %dl, %cl
6442; AVX1-NEXT: movb %al, %dl
6443; AVX1-NEXT: addb %dl, %dl
6444; AVX1-NEXT: andb $16, %dl
6445; AVX1-NEXT: orb %cl, %dl
6446; AVX1-NEXT: movb %al, %cl
6447; AVX1-NEXT: shrb %cl
6448; AVX1-NEXT: andb $8, %cl
6449; AVX1-NEXT: orb %dl, %cl
6450; AVX1-NEXT: movb %al, %dl
6451; AVX1-NEXT: shrb $3, %dl
6452; AVX1-NEXT: andb $4, %dl
6453; AVX1-NEXT: orb %cl, %dl
6454; AVX1-NEXT: movb %al, %cl
6455; AVX1-NEXT: shrb $5, %cl
6456; AVX1-NEXT: andb $2, %cl
6457; AVX1-NEXT: orb %dl, %cl
6458; AVX1-NEXT: shrb $7, %al
6459; AVX1-NEXT: orb %cl, %al
6460; AVX1-NEXT: orb %sil, %al
6461; AVX1-NEXT: movzbl %al, %eax
6462; AVX1-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
6463; AVX1-NEXT: vpextrb $13, %xmm0, %eax
6464; AVX1-NEXT: movb %al, %sil
6465; AVX1-NEXT: shlb $7, %sil
6466; AVX1-NEXT: movb %al, %dl
6467; AVX1-NEXT: shlb $5, %dl
6468; AVX1-NEXT: andb $64, %dl
6469; AVX1-NEXT: movb %al, %cl
6470; AVX1-NEXT: shlb $3, %cl
6471; AVX1-NEXT: andb $32, %cl
6472; AVX1-NEXT: orb %dl, %cl
6473; AVX1-NEXT: movb %al, %dl
6474; AVX1-NEXT: addb %dl, %dl
6475; AVX1-NEXT: andb $16, %dl
6476; AVX1-NEXT: orb %cl, %dl
6477; AVX1-NEXT: movb %al, %cl
6478; AVX1-NEXT: shrb %cl
6479; AVX1-NEXT: andb $8, %cl
6480; AVX1-NEXT: orb %dl, %cl
6481; AVX1-NEXT: movb %al, %dl
6482; AVX1-NEXT: shrb $3, %dl
6483; AVX1-NEXT: andb $4, %dl
6484; AVX1-NEXT: orb %cl, %dl
6485; AVX1-NEXT: movb %al, %cl
6486; AVX1-NEXT: shrb $5, %cl
6487; AVX1-NEXT: andb $2, %cl
6488; AVX1-NEXT: orb %dl, %cl
6489; AVX1-NEXT: shrb $7, %al
6490; AVX1-NEXT: orb %cl, %al
6491; AVX1-NEXT: orb %sil, %al
6492; AVX1-NEXT: movzbl %al, %eax
6493; AVX1-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
6494; AVX1-NEXT: vpextrb $14, %xmm0, %eax
6495; AVX1-NEXT: movb %al, %sil
6496; AVX1-NEXT: shlb $7, %sil
6497; AVX1-NEXT: movb %al, %dl
6498; AVX1-NEXT: shlb $5, %dl
6499; AVX1-NEXT: andb $64, %dl
6500; AVX1-NEXT: movb %al, %cl
6501; AVX1-NEXT: shlb $3, %cl
6502; AVX1-NEXT: andb $32, %cl
6503; AVX1-NEXT: orb %dl, %cl
6504; AVX1-NEXT: movb %al, %dl
6505; AVX1-NEXT: addb %dl, %dl
6506; AVX1-NEXT: andb $16, %dl
6507; AVX1-NEXT: orb %cl, %dl
6508; AVX1-NEXT: movb %al, %cl
6509; AVX1-NEXT: shrb %cl
6510; AVX1-NEXT: andb $8, %cl
6511; AVX1-NEXT: orb %dl, %cl
6512; AVX1-NEXT: movb %al, %dl
6513; AVX1-NEXT: shrb $3, %dl
6514; AVX1-NEXT: andb $4, %dl
6515; AVX1-NEXT: orb %cl, %dl
6516; AVX1-NEXT: movb %al, %cl
6517; AVX1-NEXT: shrb $5, %cl
6518; AVX1-NEXT: andb $2, %cl
6519; AVX1-NEXT: orb %dl, %cl
6520; AVX1-NEXT: shrb $7, %al
6521; AVX1-NEXT: orb %cl, %al
6522; AVX1-NEXT: orb %sil, %al
6523; AVX1-NEXT: movzbl %al, %eax
6524; AVX1-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
6525; AVX1-NEXT: vpextrb $15, %xmm0, %eax
6526; AVX1-NEXT: movb %al, %sil
6527; AVX1-NEXT: shlb $7, %sil
6528; AVX1-NEXT: movb %al, %dl
6529; AVX1-NEXT: shlb $5, %dl
6530; AVX1-NEXT: andb $64, %dl
6531; AVX1-NEXT: movb %al, %cl
6532; AVX1-NEXT: shlb $3, %cl
6533; AVX1-NEXT: andb $32, %cl
6534; AVX1-NEXT: orb %dl, %cl
6535; AVX1-NEXT: movb %al, %dl
6536; AVX1-NEXT: addb %dl, %dl
6537; AVX1-NEXT: andb $16, %dl
6538; AVX1-NEXT: orb %cl, %dl
6539; AVX1-NEXT: movb %al, %cl
6540; AVX1-NEXT: shrb %cl
6541; AVX1-NEXT: andb $8, %cl
6542; AVX1-NEXT: orb %dl, %cl
6543; AVX1-NEXT: movb %al, %dl
6544; AVX1-NEXT: shrb $3, %dl
6545; AVX1-NEXT: andb $4, %dl
6546; AVX1-NEXT: orb %cl, %dl
6547; AVX1-NEXT: movb %al, %cl
6548; AVX1-NEXT: shrb $5, %cl
6549; AVX1-NEXT: andb $2, %cl
6550; AVX1-NEXT: orb %dl, %cl
6551; AVX1-NEXT: shrb $7, %al
6552; AVX1-NEXT: orb %cl, %al
6553; AVX1-NEXT: orb %sil, %al
6554; AVX1-NEXT: movzbl %al, %eax
6555; AVX1-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0
6556; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
6557; AVX1-NEXT: retq
6558;
6559; AVX2-LABEL: test_bitreverse_v32i8:
6560; AVX2: # BB#0:
6561; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
6562; AVX2-NEXT: vpextrb $1, %xmm1, %eax
6563; AVX2-NEXT: movb %al, %sil
6564; AVX2-NEXT: shlb $7, %sil
6565; AVX2-NEXT: movb %al, %dl
6566; AVX2-NEXT: shlb $5, %dl
6567; AVX2-NEXT: andb $64, %dl
6568; AVX2-NEXT: movb %al, %cl
6569; AVX2-NEXT: shlb $3, %cl
6570; AVX2-NEXT: andb $32, %cl
6571; AVX2-NEXT: orb %dl, %cl
6572; AVX2-NEXT: movb %al, %dl
6573; AVX2-NEXT: addb %dl, %dl
6574; AVX2-NEXT: andb $16, %dl
6575; AVX2-NEXT: orb %cl, %dl
6576; AVX2-NEXT: movb %al, %cl
6577; AVX2-NEXT: shrb %cl
6578; AVX2-NEXT: andb $8, %cl
6579; AVX2-NEXT: orb %dl, %cl
6580; AVX2-NEXT: movb %al, %dl
6581; AVX2-NEXT: shrb $3, %dl
6582; AVX2-NEXT: andb $4, %dl
6583; AVX2-NEXT: orb %cl, %dl
6584; AVX2-NEXT: movb %al, %cl
6585; AVX2-NEXT: shrb $5, %cl
6586; AVX2-NEXT: andb $2, %cl
6587; AVX2-NEXT: orb %dl, %cl
6588; AVX2-NEXT: shrb $7, %al
6589; AVX2-NEXT: orb %cl, %al
6590; AVX2-NEXT: orb %sil, %al
6591; AVX2-NEXT: movzbl %al, %esi
6592; AVX2-NEXT: vpextrb $0, %xmm1, %ecx
6593; AVX2-NEXT: movb %cl, %dil
6594; AVX2-NEXT: shlb $7, %dil
6595; AVX2-NEXT: movb %cl, %al
6596; AVX2-NEXT: shlb $5, %al
6597; AVX2-NEXT: andb $64, %al
6598; AVX2-NEXT: movb %cl, %dl
6599; AVX2-NEXT: shlb $3, %dl
6600; AVX2-NEXT: andb $32, %dl
6601; AVX2-NEXT: orb %al, %dl
6602; AVX2-NEXT: movb %cl, %al
6603; AVX2-NEXT: addb %al, %al
6604; AVX2-NEXT: andb $16, %al
6605; AVX2-NEXT: orb %dl, %al
6606; AVX2-NEXT: movb %cl, %dl
6607; AVX2-NEXT: shrb %dl
6608; AVX2-NEXT: andb $8, %dl
6609; AVX2-NEXT: orb %al, %dl
6610; AVX2-NEXT: movb %cl, %al
6611; AVX2-NEXT: shrb $3, %al
6612; AVX2-NEXT: andb $4, %al
6613; AVX2-NEXT: orb %dl, %al
6614; AVX2-NEXT: movb %cl, %dl
6615; AVX2-NEXT: shrb $5, %dl
6616; AVX2-NEXT: andb $2, %dl
6617; AVX2-NEXT: orb %al, %dl
6618; AVX2-NEXT: shrb $7, %cl
6619; AVX2-NEXT: orb %dl, %cl
6620; AVX2-NEXT: orb %dil, %cl
6621; AVX2-NEXT: movzbl %cl, %eax
6622; AVX2-NEXT: vmovd %eax, %xmm2
6623; AVX2-NEXT: vpinsrb $1, %esi, %xmm2, %xmm2
6624; AVX2-NEXT: vpextrb $2, %xmm1, %eax
6625; AVX2-NEXT: movb %al, %sil
6626; AVX2-NEXT: shlb $7, %sil
6627; AVX2-NEXT: movb %al, %dl
6628; AVX2-NEXT: shlb $5, %dl
6629; AVX2-NEXT: andb $64, %dl
6630; AVX2-NEXT: movb %al, %cl
6631; AVX2-NEXT: shlb $3, %cl
6632; AVX2-NEXT: andb $32, %cl
6633; AVX2-NEXT: orb %dl, %cl
6634; AVX2-NEXT: movb %al, %dl
6635; AVX2-NEXT: addb %dl, %dl
6636; AVX2-NEXT: andb $16, %dl
6637; AVX2-NEXT: orb %cl, %dl
6638; AVX2-NEXT: movb %al, %cl
6639; AVX2-NEXT: shrb %cl
6640; AVX2-NEXT: andb $8, %cl
6641; AVX2-NEXT: orb %dl, %cl
6642; AVX2-NEXT: movb %al, %dl
6643; AVX2-NEXT: shrb $3, %dl
6644; AVX2-NEXT: andb $4, %dl
6645; AVX2-NEXT: orb %cl, %dl
6646; AVX2-NEXT: movb %al, %cl
6647; AVX2-NEXT: shrb $5, %cl
6648; AVX2-NEXT: andb $2, %cl
6649; AVX2-NEXT: orb %dl, %cl
6650; AVX2-NEXT: shrb $7, %al
6651; AVX2-NEXT: orb %cl, %al
6652; AVX2-NEXT: orb %sil, %al
6653; AVX2-NEXT: movzbl %al, %eax
6654; AVX2-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
6655; AVX2-NEXT: vpextrb $3, %xmm1, %eax
6656; AVX2-NEXT: movb %al, %sil
6657; AVX2-NEXT: shlb $7, %sil
6658; AVX2-NEXT: movb %al, %dl
6659; AVX2-NEXT: shlb $5, %dl
6660; AVX2-NEXT: andb $64, %dl
6661; AVX2-NEXT: movb %al, %cl
6662; AVX2-NEXT: shlb $3, %cl
6663; AVX2-NEXT: andb $32, %cl
6664; AVX2-NEXT: orb %dl, %cl
6665; AVX2-NEXT: movb %al, %dl
6666; AVX2-NEXT: addb %dl, %dl
6667; AVX2-NEXT: andb $16, %dl
6668; AVX2-NEXT: orb %cl, %dl
6669; AVX2-NEXT: movb %al, %cl
6670; AVX2-NEXT: shrb %cl
6671; AVX2-NEXT: andb $8, %cl
6672; AVX2-NEXT: orb %dl, %cl
6673; AVX2-NEXT: movb %al, %dl
6674; AVX2-NEXT: shrb $3, %dl
6675; AVX2-NEXT: andb $4, %dl
6676; AVX2-NEXT: orb %cl, %dl
6677; AVX2-NEXT: movb %al, %cl
6678; AVX2-NEXT: shrb $5, %cl
6679; AVX2-NEXT: andb $2, %cl
6680; AVX2-NEXT: orb %dl, %cl
6681; AVX2-NEXT: shrb $7, %al
6682; AVX2-NEXT: orb %cl, %al
6683; AVX2-NEXT: orb %sil, %al
6684; AVX2-NEXT: movzbl %al, %eax
6685; AVX2-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
6686; AVX2-NEXT: vpextrb $4, %xmm1, %eax
6687; AVX2-NEXT: movb %al, %sil
6688; AVX2-NEXT: shlb $7, %sil
6689; AVX2-NEXT: movb %al, %dl
6690; AVX2-NEXT: shlb $5, %dl
6691; AVX2-NEXT: andb $64, %dl
6692; AVX2-NEXT: movb %al, %cl
6693; AVX2-NEXT: shlb $3, %cl
6694; AVX2-NEXT: andb $32, %cl
6695; AVX2-NEXT: orb %dl, %cl
6696; AVX2-NEXT: movb %al, %dl
6697; AVX2-NEXT: addb %dl, %dl
6698; AVX2-NEXT: andb $16, %dl
6699; AVX2-NEXT: orb %cl, %dl
6700; AVX2-NEXT: movb %al, %cl
6701; AVX2-NEXT: shrb %cl
6702; AVX2-NEXT: andb $8, %cl
6703; AVX2-NEXT: orb %dl, %cl
6704; AVX2-NEXT: movb %al, %dl
6705; AVX2-NEXT: shrb $3, %dl
6706; AVX2-NEXT: andb $4, %dl
6707; AVX2-NEXT: orb %cl, %dl
6708; AVX2-NEXT: movb %al, %cl
6709; AVX2-NEXT: shrb $5, %cl
6710; AVX2-NEXT: andb $2, %cl
6711; AVX2-NEXT: orb %dl, %cl
6712; AVX2-NEXT: shrb $7, %al
6713; AVX2-NEXT: orb %cl, %al
6714; AVX2-NEXT: orb %sil, %al
6715; AVX2-NEXT: movzbl %al, %eax
6716; AVX2-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
6717; AVX2-NEXT: vpextrb $5, %xmm1, %eax
6718; AVX2-NEXT: movb %al, %sil
6719; AVX2-NEXT: shlb $7, %sil
6720; AVX2-NEXT: movb %al, %dl
6721; AVX2-NEXT: shlb $5, %dl
6722; AVX2-NEXT: andb $64, %dl
6723; AVX2-NEXT: movb %al, %cl
6724; AVX2-NEXT: shlb $3, %cl
6725; AVX2-NEXT: andb $32, %cl
6726; AVX2-NEXT: orb %dl, %cl
6727; AVX2-NEXT: movb %al, %dl
6728; AVX2-NEXT: addb %dl, %dl
6729; AVX2-NEXT: andb $16, %dl
6730; AVX2-NEXT: orb %cl, %dl
6731; AVX2-NEXT: movb %al, %cl
6732; AVX2-NEXT: shrb %cl
6733; AVX2-NEXT: andb $8, %cl
6734; AVX2-NEXT: orb %dl, %cl
6735; AVX2-NEXT: movb %al, %dl
6736; AVX2-NEXT: shrb $3, %dl
6737; AVX2-NEXT: andb $4, %dl
6738; AVX2-NEXT: orb %cl, %dl
6739; AVX2-NEXT: movb %al, %cl
6740; AVX2-NEXT: shrb $5, %cl
6741; AVX2-NEXT: andb $2, %cl
6742; AVX2-NEXT: orb %dl, %cl
6743; AVX2-NEXT: shrb $7, %al
6744; AVX2-NEXT: orb %cl, %al
6745; AVX2-NEXT: orb %sil, %al
6746; AVX2-NEXT: movzbl %al, %eax
6747; AVX2-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
6748; AVX2-NEXT: vpextrb $6, %xmm1, %eax
6749; AVX2-NEXT: movb %al, %sil
6750; AVX2-NEXT: shlb $7, %sil
6751; AVX2-NEXT: movb %al, %dl
6752; AVX2-NEXT: shlb $5, %dl
6753; AVX2-NEXT: andb $64, %dl
6754; AVX2-NEXT: movb %al, %cl
6755; AVX2-NEXT: shlb $3, %cl
6756; AVX2-NEXT: andb $32, %cl
6757; AVX2-NEXT: orb %dl, %cl
6758; AVX2-NEXT: movb %al, %dl
6759; AVX2-NEXT: addb %dl, %dl
6760; AVX2-NEXT: andb $16, %dl
6761; AVX2-NEXT: orb %cl, %dl
6762; AVX2-NEXT: movb %al, %cl
6763; AVX2-NEXT: shrb %cl
6764; AVX2-NEXT: andb $8, %cl
6765; AVX2-NEXT: orb %dl, %cl
6766; AVX2-NEXT: movb %al, %dl
6767; AVX2-NEXT: shrb $3, %dl
6768; AVX2-NEXT: andb $4, %dl
6769; AVX2-NEXT: orb %cl, %dl
6770; AVX2-NEXT: movb %al, %cl
6771; AVX2-NEXT: shrb $5, %cl
6772; AVX2-NEXT: andb $2, %cl
6773; AVX2-NEXT: orb %dl, %cl
6774; AVX2-NEXT: shrb $7, %al
6775; AVX2-NEXT: orb %cl, %al
6776; AVX2-NEXT: orb %sil, %al
6777; AVX2-NEXT: movzbl %al, %eax
6778; AVX2-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
6779; AVX2-NEXT: vpextrb $7, %xmm1, %eax
6780; AVX2-NEXT: movb %al, %sil
6781; AVX2-NEXT: shlb $7, %sil
6782; AVX2-NEXT: movb %al, %dl
6783; AVX2-NEXT: shlb $5, %dl
6784; AVX2-NEXT: andb $64, %dl
6785; AVX2-NEXT: movb %al, %cl
6786; AVX2-NEXT: shlb $3, %cl
6787; AVX2-NEXT: andb $32, %cl
6788; AVX2-NEXT: orb %dl, %cl
6789; AVX2-NEXT: movb %al, %dl
6790; AVX2-NEXT: addb %dl, %dl
6791; AVX2-NEXT: andb $16, %dl
6792; AVX2-NEXT: orb %cl, %dl
6793; AVX2-NEXT: movb %al, %cl
6794; AVX2-NEXT: shrb %cl
6795; AVX2-NEXT: andb $8, %cl
6796; AVX2-NEXT: orb %dl, %cl
6797; AVX2-NEXT: movb %al, %dl
6798; AVX2-NEXT: shrb $3, %dl
6799; AVX2-NEXT: andb $4, %dl
6800; AVX2-NEXT: orb %cl, %dl
6801; AVX2-NEXT: movb %al, %cl
6802; AVX2-NEXT: shrb $5, %cl
6803; AVX2-NEXT: andb $2, %cl
6804; AVX2-NEXT: orb %dl, %cl
6805; AVX2-NEXT: shrb $7, %al
6806; AVX2-NEXT: orb %cl, %al
6807; AVX2-NEXT: orb %sil, %al
6808; AVX2-NEXT: movzbl %al, %eax
6809; AVX2-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
6810; AVX2-NEXT: vpextrb $8, %xmm1, %eax
6811; AVX2-NEXT: movb %al, %sil
6812; AVX2-NEXT: shlb $7, %sil
6813; AVX2-NEXT: movb %al, %dl
6814; AVX2-NEXT: shlb $5, %dl
6815; AVX2-NEXT: andb $64, %dl
6816; AVX2-NEXT: movb %al, %cl
6817; AVX2-NEXT: shlb $3, %cl
6818; AVX2-NEXT: andb $32, %cl
6819; AVX2-NEXT: orb %dl, %cl
6820; AVX2-NEXT: movb %al, %dl
6821; AVX2-NEXT: addb %dl, %dl
6822; AVX2-NEXT: andb $16, %dl
6823; AVX2-NEXT: orb %cl, %dl
6824; AVX2-NEXT: movb %al, %cl
6825; AVX2-NEXT: shrb %cl
6826; AVX2-NEXT: andb $8, %cl
6827; AVX2-NEXT: orb %dl, %cl
6828; AVX2-NEXT: movb %al, %dl
6829; AVX2-NEXT: shrb $3, %dl
6830; AVX2-NEXT: andb $4, %dl
6831; AVX2-NEXT: orb %cl, %dl
6832; AVX2-NEXT: movb %al, %cl
6833; AVX2-NEXT: shrb $5, %cl
6834; AVX2-NEXT: andb $2, %cl
6835; AVX2-NEXT: orb %dl, %cl
6836; AVX2-NEXT: shrb $7, %al
6837; AVX2-NEXT: orb %cl, %al
6838; AVX2-NEXT: orb %sil, %al
6839; AVX2-NEXT: movzbl %al, %eax
6840; AVX2-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
6841; AVX2-NEXT: vpextrb $9, %xmm1, %eax
6842; AVX2-NEXT: movb %al, %sil
6843; AVX2-NEXT: shlb $7, %sil
6844; AVX2-NEXT: movb %al, %dl
6845; AVX2-NEXT: shlb $5, %dl
6846; AVX2-NEXT: andb $64, %dl
6847; AVX2-NEXT: movb %al, %cl
6848; AVX2-NEXT: shlb $3, %cl
6849; AVX2-NEXT: andb $32, %cl
6850; AVX2-NEXT: orb %dl, %cl
6851; AVX2-NEXT: movb %al, %dl
6852; AVX2-NEXT: addb %dl, %dl
6853; AVX2-NEXT: andb $16, %dl
6854; AVX2-NEXT: orb %cl, %dl
6855; AVX2-NEXT: movb %al, %cl
6856; AVX2-NEXT: shrb %cl
6857; AVX2-NEXT: andb $8, %cl
6858; AVX2-NEXT: orb %dl, %cl
6859; AVX2-NEXT: movb %al, %dl
6860; AVX2-NEXT: shrb $3, %dl
6861; AVX2-NEXT: andb $4, %dl
6862; AVX2-NEXT: orb %cl, %dl
6863; AVX2-NEXT: movb %al, %cl
6864; AVX2-NEXT: shrb $5, %cl
6865; AVX2-NEXT: andb $2, %cl
6866; AVX2-NEXT: orb %dl, %cl
6867; AVX2-NEXT: shrb $7, %al
6868; AVX2-NEXT: orb %cl, %al
6869; AVX2-NEXT: orb %sil, %al
6870; AVX2-NEXT: movzbl %al, %eax
6871; AVX2-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
6872; AVX2-NEXT: vpextrb $10, %xmm1, %eax
6873; AVX2-NEXT: movb %al, %sil
6874; AVX2-NEXT: shlb $7, %sil
6875; AVX2-NEXT: movb %al, %dl
6876; AVX2-NEXT: shlb $5, %dl
6877; AVX2-NEXT: andb $64, %dl
6878; AVX2-NEXT: movb %al, %cl
6879; AVX2-NEXT: shlb $3, %cl
6880; AVX2-NEXT: andb $32, %cl
6881; AVX2-NEXT: orb %dl, %cl
6882; AVX2-NEXT: movb %al, %dl
6883; AVX2-NEXT: addb %dl, %dl
6884; AVX2-NEXT: andb $16, %dl
6885; AVX2-NEXT: orb %cl, %dl
6886; AVX2-NEXT: movb %al, %cl
6887; AVX2-NEXT: shrb %cl
6888; AVX2-NEXT: andb $8, %cl
6889; AVX2-NEXT: orb %dl, %cl
6890; AVX2-NEXT: movb %al, %dl
6891; AVX2-NEXT: shrb $3, %dl
6892; AVX2-NEXT: andb $4, %dl
6893; AVX2-NEXT: orb %cl, %dl
6894; AVX2-NEXT: movb %al, %cl
6895; AVX2-NEXT: shrb $5, %cl
6896; AVX2-NEXT: andb $2, %cl
6897; AVX2-NEXT: orb %dl, %cl
6898; AVX2-NEXT: shrb $7, %al
6899; AVX2-NEXT: orb %cl, %al
6900; AVX2-NEXT: orb %sil, %al
6901; AVX2-NEXT: movzbl %al, %eax
6902; AVX2-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
6903; AVX2-NEXT: vpextrb $11, %xmm1, %eax
6904; AVX2-NEXT: movb %al, %sil
6905; AVX2-NEXT: shlb $7, %sil
6906; AVX2-NEXT: movb %al, %dl
6907; AVX2-NEXT: shlb $5, %dl
6908; AVX2-NEXT: andb $64, %dl
6909; AVX2-NEXT: movb %al, %cl
6910; AVX2-NEXT: shlb $3, %cl
6911; AVX2-NEXT: andb $32, %cl
6912; AVX2-NEXT: orb %dl, %cl
6913; AVX2-NEXT: movb %al, %dl
6914; AVX2-NEXT: addb %dl, %dl
6915; AVX2-NEXT: andb $16, %dl
6916; AVX2-NEXT: orb %cl, %dl
6917; AVX2-NEXT: movb %al, %cl
6918; AVX2-NEXT: shrb %cl
6919; AVX2-NEXT: andb $8, %cl
6920; AVX2-NEXT: orb %dl, %cl
6921; AVX2-NEXT: movb %al, %dl
6922; AVX2-NEXT: shrb $3, %dl
6923; AVX2-NEXT: andb $4, %dl
6924; AVX2-NEXT: orb %cl, %dl
6925; AVX2-NEXT: movb %al, %cl
6926; AVX2-NEXT: shrb $5, %cl
6927; AVX2-NEXT: andb $2, %cl
6928; AVX2-NEXT: orb %dl, %cl
6929; AVX2-NEXT: shrb $7, %al
6930; AVX2-NEXT: orb %cl, %al
6931; AVX2-NEXT: orb %sil, %al
6932; AVX2-NEXT: movzbl %al, %eax
6933; AVX2-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
6934; AVX2-NEXT: vpextrb $12, %xmm1, %eax
6935; AVX2-NEXT: movb %al, %sil
6936; AVX2-NEXT: shlb $7, %sil
6937; AVX2-NEXT: movb %al, %dl
6938; AVX2-NEXT: shlb $5, %dl
6939; AVX2-NEXT: andb $64, %dl
6940; AVX2-NEXT: movb %al, %cl
6941; AVX2-NEXT: shlb $3, %cl
6942; AVX2-NEXT: andb $32, %cl
6943; AVX2-NEXT: orb %dl, %cl
6944; AVX2-NEXT: movb %al, %dl
6945; AVX2-NEXT: addb %dl, %dl
6946; AVX2-NEXT: andb $16, %dl
6947; AVX2-NEXT: orb %cl, %dl
6948; AVX2-NEXT: movb %al, %cl
6949; AVX2-NEXT: shrb %cl
6950; AVX2-NEXT: andb $8, %cl
6951; AVX2-NEXT: orb %dl, %cl
6952; AVX2-NEXT: movb %al, %dl
6953; AVX2-NEXT: shrb $3, %dl
6954; AVX2-NEXT: andb $4, %dl
6955; AVX2-NEXT: orb %cl, %dl
6956; AVX2-NEXT: movb %al, %cl
6957; AVX2-NEXT: shrb $5, %cl
6958; AVX2-NEXT: andb $2, %cl
6959; AVX2-NEXT: orb %dl, %cl
6960; AVX2-NEXT: shrb $7, %al
6961; AVX2-NEXT: orb %cl, %al
6962; AVX2-NEXT: orb %sil, %al
6963; AVX2-NEXT: movzbl %al, %eax
6964; AVX2-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
6965; AVX2-NEXT: vpextrb $13, %xmm1, %eax
6966; AVX2-NEXT: movb %al, %sil
6967; AVX2-NEXT: shlb $7, %sil
6968; AVX2-NEXT: movb %al, %dl
6969; AVX2-NEXT: shlb $5, %dl
6970; AVX2-NEXT: andb $64, %dl
6971; AVX2-NEXT: movb %al, %cl
6972; AVX2-NEXT: shlb $3, %cl
6973; AVX2-NEXT: andb $32, %cl
6974; AVX2-NEXT: orb %dl, %cl
6975; AVX2-NEXT: movb %al, %dl
6976; AVX2-NEXT: addb %dl, %dl
6977; AVX2-NEXT: andb $16, %dl
6978; AVX2-NEXT: orb %cl, %dl
6979; AVX2-NEXT: movb %al, %cl
6980; AVX2-NEXT: shrb %cl
6981; AVX2-NEXT: andb $8, %cl
6982; AVX2-NEXT: orb %dl, %cl
6983; AVX2-NEXT: movb %al, %dl
6984; AVX2-NEXT: shrb $3, %dl
6985; AVX2-NEXT: andb $4, %dl
6986; AVX2-NEXT: orb %cl, %dl
6987; AVX2-NEXT: movb %al, %cl
6988; AVX2-NEXT: shrb $5, %cl
6989; AVX2-NEXT: andb $2, %cl
6990; AVX2-NEXT: orb %dl, %cl
6991; AVX2-NEXT: shrb $7, %al
6992; AVX2-NEXT: orb %cl, %al
6993; AVX2-NEXT: orb %sil, %al
6994; AVX2-NEXT: movzbl %al, %eax
6995; AVX2-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
6996; AVX2-NEXT: vpextrb $14, %xmm1, %eax
6997; AVX2-NEXT: movb %al, %sil
6998; AVX2-NEXT: shlb $7, %sil
6999; AVX2-NEXT: movb %al, %dl
7000; AVX2-NEXT: shlb $5, %dl
7001; AVX2-NEXT: andb $64, %dl
7002; AVX2-NEXT: movb %al, %cl
7003; AVX2-NEXT: shlb $3, %cl
7004; AVX2-NEXT: andb $32, %cl
7005; AVX2-NEXT: orb %dl, %cl
7006; AVX2-NEXT: movb %al, %dl
7007; AVX2-NEXT: addb %dl, %dl
7008; AVX2-NEXT: andb $16, %dl
7009; AVX2-NEXT: orb %cl, %dl
7010; AVX2-NEXT: movb %al, %cl
7011; AVX2-NEXT: shrb %cl
7012; AVX2-NEXT: andb $8, %cl
7013; AVX2-NEXT: orb %dl, %cl
7014; AVX2-NEXT: movb %al, %dl
7015; AVX2-NEXT: shrb $3, %dl
7016; AVX2-NEXT: andb $4, %dl
7017; AVX2-NEXT: orb %cl, %dl
7018; AVX2-NEXT: movb %al, %cl
7019; AVX2-NEXT: shrb $5, %cl
7020; AVX2-NEXT: andb $2, %cl
7021; AVX2-NEXT: orb %dl, %cl
7022; AVX2-NEXT: shrb $7, %al
7023; AVX2-NEXT: orb %cl, %al
7024; AVX2-NEXT: orb %sil, %al
7025; AVX2-NEXT: movzbl %al, %eax
7026; AVX2-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
7027; AVX2-NEXT: vpextrb $15, %xmm1, %eax
7028; AVX2-NEXT: movb %al, %sil
7029; AVX2-NEXT: shlb $7, %sil
7030; AVX2-NEXT: movb %al, %dl
7031; AVX2-NEXT: shlb $5, %dl
7032; AVX2-NEXT: andb $64, %dl
7033; AVX2-NEXT: movb %al, %cl
7034; AVX2-NEXT: shlb $3, %cl
7035; AVX2-NEXT: andb $32, %cl
7036; AVX2-NEXT: orb %dl, %cl
7037; AVX2-NEXT: movb %al, %dl
7038; AVX2-NEXT: addb %dl, %dl
7039; AVX2-NEXT: andb $16, %dl
7040; AVX2-NEXT: orb %cl, %dl
7041; AVX2-NEXT: movb %al, %cl
7042; AVX2-NEXT: shrb %cl
7043; AVX2-NEXT: andb $8, %cl
7044; AVX2-NEXT: orb %dl, %cl
7045; AVX2-NEXT: movb %al, %dl
7046; AVX2-NEXT: shrb $3, %dl
7047; AVX2-NEXT: andb $4, %dl
7048; AVX2-NEXT: orb %cl, %dl
7049; AVX2-NEXT: movb %al, %cl
7050; AVX2-NEXT: shrb $5, %cl
7051; AVX2-NEXT: andb $2, %cl
7052; AVX2-NEXT: orb %dl, %cl
7053; AVX2-NEXT: shrb $7, %al
7054; AVX2-NEXT: orb %cl, %al
7055; AVX2-NEXT: orb %sil, %al
7056; AVX2-NEXT: movzbl %al, %eax
7057; AVX2-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1
7058; AVX2-NEXT: vpextrb $1, %xmm0, %eax
7059; AVX2-NEXT: movb %al, %sil
7060; AVX2-NEXT: shlb $7, %sil
7061; AVX2-NEXT: movb %al, %dl
7062; AVX2-NEXT: shlb $5, %dl
7063; AVX2-NEXT: andb $64, %dl
7064; AVX2-NEXT: movb %al, %cl
7065; AVX2-NEXT: shlb $3, %cl
7066; AVX2-NEXT: andb $32, %cl
7067; AVX2-NEXT: orb %dl, %cl
7068; AVX2-NEXT: movb %al, %dl
7069; AVX2-NEXT: addb %dl, %dl
7070; AVX2-NEXT: andb $16, %dl
7071; AVX2-NEXT: orb %cl, %dl
7072; AVX2-NEXT: movb %al, %cl
7073; AVX2-NEXT: shrb %cl
7074; AVX2-NEXT: andb $8, %cl
7075; AVX2-NEXT: orb %dl, %cl
7076; AVX2-NEXT: movb %al, %dl
7077; AVX2-NEXT: shrb $3, %dl
7078; AVX2-NEXT: andb $4, %dl
7079; AVX2-NEXT: orb %cl, %dl
7080; AVX2-NEXT: movb %al, %cl
7081; AVX2-NEXT: shrb $5, %cl
7082; AVX2-NEXT: andb $2, %cl
7083; AVX2-NEXT: orb %dl, %cl
7084; AVX2-NEXT: shrb $7, %al
7085; AVX2-NEXT: orb %cl, %al
7086; AVX2-NEXT: orb %sil, %al
7087; AVX2-NEXT: movzbl %al, %esi
7088; AVX2-NEXT: vpextrb $0, %xmm0, %ecx
7089; AVX2-NEXT: movb %cl, %dil
7090; AVX2-NEXT: shlb $7, %dil
7091; AVX2-NEXT: movb %cl, %al
7092; AVX2-NEXT: shlb $5, %al
7093; AVX2-NEXT: andb $64, %al
7094; AVX2-NEXT: movb %cl, %dl
7095; AVX2-NEXT: shlb $3, %dl
7096; AVX2-NEXT: andb $32, %dl
7097; AVX2-NEXT: orb %al, %dl
7098; AVX2-NEXT: movb %cl, %al
7099; AVX2-NEXT: addb %al, %al
7100; AVX2-NEXT: andb $16, %al
7101; AVX2-NEXT: orb %dl, %al
7102; AVX2-NEXT: movb %cl, %dl
7103; AVX2-NEXT: shrb %dl
7104; AVX2-NEXT: andb $8, %dl
7105; AVX2-NEXT: orb %al, %dl
7106; AVX2-NEXT: movb %cl, %al
7107; AVX2-NEXT: shrb $3, %al
7108; AVX2-NEXT: andb $4, %al
7109; AVX2-NEXT: orb %dl, %al
7110; AVX2-NEXT: movb %cl, %dl
7111; AVX2-NEXT: shrb $5, %dl
7112; AVX2-NEXT: andb $2, %dl
7113; AVX2-NEXT: orb %al, %dl
7114; AVX2-NEXT: shrb $7, %cl
7115; AVX2-NEXT: orb %dl, %cl
7116; AVX2-NEXT: orb %dil, %cl
7117; AVX2-NEXT: movzbl %cl, %eax
7118; AVX2-NEXT: vmovd %eax, %xmm2
7119; AVX2-NEXT: vpinsrb $1, %esi, %xmm2, %xmm2
7120; AVX2-NEXT: vpextrb $2, %xmm0, %eax
7121; AVX2-NEXT: movb %al, %sil
7122; AVX2-NEXT: shlb $7, %sil
7123; AVX2-NEXT: movb %al, %dl
7124; AVX2-NEXT: shlb $5, %dl
7125; AVX2-NEXT: andb $64, %dl
7126; AVX2-NEXT: movb %al, %cl
7127; AVX2-NEXT: shlb $3, %cl
7128; AVX2-NEXT: andb $32, %cl
7129; AVX2-NEXT: orb %dl, %cl
7130; AVX2-NEXT: movb %al, %dl
7131; AVX2-NEXT: addb %dl, %dl
7132; AVX2-NEXT: andb $16, %dl
7133; AVX2-NEXT: orb %cl, %dl
7134; AVX2-NEXT: movb %al, %cl
7135; AVX2-NEXT: shrb %cl
7136; AVX2-NEXT: andb $8, %cl
7137; AVX2-NEXT: orb %dl, %cl
7138; AVX2-NEXT: movb %al, %dl
7139; AVX2-NEXT: shrb $3, %dl
7140; AVX2-NEXT: andb $4, %dl
7141; AVX2-NEXT: orb %cl, %dl
7142; AVX2-NEXT: movb %al, %cl
7143; AVX2-NEXT: shrb $5, %cl
7144; AVX2-NEXT: andb $2, %cl
7145; AVX2-NEXT: orb %dl, %cl
7146; AVX2-NEXT: shrb $7, %al
7147; AVX2-NEXT: orb %cl, %al
7148; AVX2-NEXT: orb %sil, %al
7149; AVX2-NEXT: movzbl %al, %eax
7150; AVX2-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
7151; AVX2-NEXT: vpextrb $3, %xmm0, %eax
7152; AVX2-NEXT: movb %al, %sil
7153; AVX2-NEXT: shlb $7, %sil
7154; AVX2-NEXT: movb %al, %dl
7155; AVX2-NEXT: shlb $5, %dl
7156; AVX2-NEXT: andb $64, %dl
7157; AVX2-NEXT: movb %al, %cl
7158; AVX2-NEXT: shlb $3, %cl
7159; AVX2-NEXT: andb $32, %cl
7160; AVX2-NEXT: orb %dl, %cl
7161; AVX2-NEXT: movb %al, %dl
7162; AVX2-NEXT: addb %dl, %dl
7163; AVX2-NEXT: andb $16, %dl
7164; AVX2-NEXT: orb %cl, %dl
7165; AVX2-NEXT: movb %al, %cl
7166; AVX2-NEXT: shrb %cl
7167; AVX2-NEXT: andb $8, %cl
7168; AVX2-NEXT: orb %dl, %cl
7169; AVX2-NEXT: movb %al, %dl
7170; AVX2-NEXT: shrb $3, %dl
7171; AVX2-NEXT: andb $4, %dl
7172; AVX2-NEXT: orb %cl, %dl
7173; AVX2-NEXT: movb %al, %cl
7174; AVX2-NEXT: shrb $5, %cl
7175; AVX2-NEXT: andb $2, %cl
7176; AVX2-NEXT: orb %dl, %cl
7177; AVX2-NEXT: shrb $7, %al
7178; AVX2-NEXT: orb %cl, %al
7179; AVX2-NEXT: orb %sil, %al
7180; AVX2-NEXT: movzbl %al, %eax
7181; AVX2-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
7182; AVX2-NEXT: vpextrb $4, %xmm0, %eax
7183; AVX2-NEXT: movb %al, %sil
7184; AVX2-NEXT: shlb $7, %sil
7185; AVX2-NEXT: movb %al, %dl
7186; AVX2-NEXT: shlb $5, %dl
7187; AVX2-NEXT: andb $64, %dl
7188; AVX2-NEXT: movb %al, %cl
7189; AVX2-NEXT: shlb $3, %cl
7190; AVX2-NEXT: andb $32, %cl
7191; AVX2-NEXT: orb %dl, %cl
7192; AVX2-NEXT: movb %al, %dl
7193; AVX2-NEXT: addb %dl, %dl
7194; AVX2-NEXT: andb $16, %dl
7195; AVX2-NEXT: orb %cl, %dl
7196; AVX2-NEXT: movb %al, %cl
7197; AVX2-NEXT: shrb %cl
7198; AVX2-NEXT: andb $8, %cl
7199; AVX2-NEXT: orb %dl, %cl
7200; AVX2-NEXT: movb %al, %dl
7201; AVX2-NEXT: shrb $3, %dl
7202; AVX2-NEXT: andb $4, %dl
7203; AVX2-NEXT: orb %cl, %dl
7204; AVX2-NEXT: movb %al, %cl
7205; AVX2-NEXT: shrb $5, %cl
7206; AVX2-NEXT: andb $2, %cl
7207; AVX2-NEXT: orb %dl, %cl
7208; AVX2-NEXT: shrb $7, %al
7209; AVX2-NEXT: orb %cl, %al
7210; AVX2-NEXT: orb %sil, %al
7211; AVX2-NEXT: movzbl %al, %eax
7212; AVX2-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
7213; AVX2-NEXT: vpextrb $5, %xmm0, %eax
7214; AVX2-NEXT: movb %al, %sil
7215; AVX2-NEXT: shlb $7, %sil
7216; AVX2-NEXT: movb %al, %dl
7217; AVX2-NEXT: shlb $5, %dl
7218; AVX2-NEXT: andb $64, %dl
7219; AVX2-NEXT: movb %al, %cl
7220; AVX2-NEXT: shlb $3, %cl
7221; AVX2-NEXT: andb $32, %cl
7222; AVX2-NEXT: orb %dl, %cl
7223; AVX2-NEXT: movb %al, %dl
7224; AVX2-NEXT: addb %dl, %dl
7225; AVX2-NEXT: andb $16, %dl
7226; AVX2-NEXT: orb %cl, %dl
7227; AVX2-NEXT: movb %al, %cl
7228; AVX2-NEXT: shrb %cl
7229; AVX2-NEXT: andb $8, %cl
7230; AVX2-NEXT: orb %dl, %cl
7231; AVX2-NEXT: movb %al, %dl
7232; AVX2-NEXT: shrb $3, %dl
7233; AVX2-NEXT: andb $4, %dl
7234; AVX2-NEXT: orb %cl, %dl
7235; AVX2-NEXT: movb %al, %cl
7236; AVX2-NEXT: shrb $5, %cl
7237; AVX2-NEXT: andb $2, %cl
7238; AVX2-NEXT: orb %dl, %cl
7239; AVX2-NEXT: shrb $7, %al
7240; AVX2-NEXT: orb %cl, %al
7241; AVX2-NEXT: orb %sil, %al
7242; AVX2-NEXT: movzbl %al, %eax
7243; AVX2-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
7244; AVX2-NEXT: vpextrb $6, %xmm0, %eax
7245; AVX2-NEXT: movb %al, %sil
7246; AVX2-NEXT: shlb $7, %sil
7247; AVX2-NEXT: movb %al, %dl
7248; AVX2-NEXT: shlb $5, %dl
7249; AVX2-NEXT: andb $64, %dl
7250; AVX2-NEXT: movb %al, %cl
7251; AVX2-NEXT: shlb $3, %cl
7252; AVX2-NEXT: andb $32, %cl
7253; AVX2-NEXT: orb %dl, %cl
7254; AVX2-NEXT: movb %al, %dl
7255; AVX2-NEXT: addb %dl, %dl
7256; AVX2-NEXT: andb $16, %dl
7257; AVX2-NEXT: orb %cl, %dl
7258; AVX2-NEXT: movb %al, %cl
7259; AVX2-NEXT: shrb %cl
7260; AVX2-NEXT: andb $8, %cl
7261; AVX2-NEXT: orb %dl, %cl
7262; AVX2-NEXT: movb %al, %dl
7263; AVX2-NEXT: shrb $3, %dl
7264; AVX2-NEXT: andb $4, %dl
7265; AVX2-NEXT: orb %cl, %dl
7266; AVX2-NEXT: movb %al, %cl
7267; AVX2-NEXT: shrb $5, %cl
7268; AVX2-NEXT: andb $2, %cl
7269; AVX2-NEXT: orb %dl, %cl
7270; AVX2-NEXT: shrb $7, %al
7271; AVX2-NEXT: orb %cl, %al
7272; AVX2-NEXT: orb %sil, %al
7273; AVX2-NEXT: movzbl %al, %eax
7274; AVX2-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
7275; AVX2-NEXT: vpextrb $7, %xmm0, %eax
7276; AVX2-NEXT: movb %al, %sil
7277; AVX2-NEXT: shlb $7, %sil
7278; AVX2-NEXT: movb %al, %dl
7279; AVX2-NEXT: shlb $5, %dl
7280; AVX2-NEXT: andb $64, %dl
7281; AVX2-NEXT: movb %al, %cl
7282; AVX2-NEXT: shlb $3, %cl
7283; AVX2-NEXT: andb $32, %cl
7284; AVX2-NEXT: orb %dl, %cl
7285; AVX2-NEXT: movb %al, %dl
7286; AVX2-NEXT: addb %dl, %dl
7287; AVX2-NEXT: andb $16, %dl
7288; AVX2-NEXT: orb %cl, %dl
7289; AVX2-NEXT: movb %al, %cl
7290; AVX2-NEXT: shrb %cl
7291; AVX2-NEXT: andb $8, %cl
7292; AVX2-NEXT: orb %dl, %cl
7293; AVX2-NEXT: movb %al, %dl
7294; AVX2-NEXT: shrb $3, %dl
7295; AVX2-NEXT: andb $4, %dl
7296; AVX2-NEXT: orb %cl, %dl
7297; AVX2-NEXT: movb %al, %cl
7298; AVX2-NEXT: shrb $5, %cl
7299; AVX2-NEXT: andb $2, %cl
7300; AVX2-NEXT: orb %dl, %cl
7301; AVX2-NEXT: shrb $7, %al
7302; AVX2-NEXT: orb %cl, %al
7303; AVX2-NEXT: orb %sil, %al
7304; AVX2-NEXT: movzbl %al, %eax
7305; AVX2-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
7306; AVX2-NEXT: vpextrb $8, %xmm0, %eax
7307; AVX2-NEXT: movb %al, %sil
7308; AVX2-NEXT: shlb $7, %sil
7309; AVX2-NEXT: movb %al, %dl
7310; AVX2-NEXT: shlb $5, %dl
7311; AVX2-NEXT: andb $64, %dl
7312; AVX2-NEXT: movb %al, %cl
7313; AVX2-NEXT: shlb $3, %cl
7314; AVX2-NEXT: andb $32, %cl
7315; AVX2-NEXT: orb %dl, %cl
7316; AVX2-NEXT: movb %al, %dl
7317; AVX2-NEXT: addb %dl, %dl
7318; AVX2-NEXT: andb $16, %dl
7319; AVX2-NEXT: orb %cl, %dl
7320; AVX2-NEXT: movb %al, %cl
7321; AVX2-NEXT: shrb %cl
7322; AVX2-NEXT: andb $8, %cl
7323; AVX2-NEXT: orb %dl, %cl
7324; AVX2-NEXT: movb %al, %dl
7325; AVX2-NEXT: shrb $3, %dl
7326; AVX2-NEXT: andb $4, %dl
7327; AVX2-NEXT: orb %cl, %dl
7328; AVX2-NEXT: movb %al, %cl
7329; AVX2-NEXT: shrb $5, %cl
7330; AVX2-NEXT: andb $2, %cl
7331; AVX2-NEXT: orb %dl, %cl
7332; AVX2-NEXT: shrb $7, %al
7333; AVX2-NEXT: orb %cl, %al
7334; AVX2-NEXT: orb %sil, %al
7335; AVX2-NEXT: movzbl %al, %eax
7336; AVX2-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
7337; AVX2-NEXT: vpextrb $9, %xmm0, %eax
7338; AVX2-NEXT: movb %al, %sil
7339; AVX2-NEXT: shlb $7, %sil
7340; AVX2-NEXT: movb %al, %dl
7341; AVX2-NEXT: shlb $5, %dl
7342; AVX2-NEXT: andb $64, %dl
7343; AVX2-NEXT: movb %al, %cl
7344; AVX2-NEXT: shlb $3, %cl
7345; AVX2-NEXT: andb $32, %cl
7346; AVX2-NEXT: orb %dl, %cl
7347; AVX2-NEXT: movb %al, %dl
7348; AVX2-NEXT: addb %dl, %dl
7349; AVX2-NEXT: andb $16, %dl
7350; AVX2-NEXT: orb %cl, %dl
7351; AVX2-NEXT: movb %al, %cl
7352; AVX2-NEXT: shrb %cl
7353; AVX2-NEXT: andb $8, %cl
7354; AVX2-NEXT: orb %dl, %cl
7355; AVX2-NEXT: movb %al, %dl
7356; AVX2-NEXT: shrb $3, %dl
7357; AVX2-NEXT: andb $4, %dl
7358; AVX2-NEXT: orb %cl, %dl
7359; AVX2-NEXT: movb %al, %cl
7360; AVX2-NEXT: shrb $5, %cl
7361; AVX2-NEXT: andb $2, %cl
7362; AVX2-NEXT: orb %dl, %cl
7363; AVX2-NEXT: shrb $7, %al
7364; AVX2-NEXT: orb %cl, %al
7365; AVX2-NEXT: orb %sil, %al
7366; AVX2-NEXT: movzbl %al, %eax
7367; AVX2-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
7368; AVX2-NEXT: vpextrb $10, %xmm0, %eax
7369; AVX2-NEXT: movb %al, %sil
7370; AVX2-NEXT: shlb $7, %sil
7371; AVX2-NEXT: movb %al, %dl
7372; AVX2-NEXT: shlb $5, %dl
7373; AVX2-NEXT: andb $64, %dl
7374; AVX2-NEXT: movb %al, %cl
7375; AVX2-NEXT: shlb $3, %cl
7376; AVX2-NEXT: andb $32, %cl
7377; AVX2-NEXT: orb %dl, %cl
7378; AVX2-NEXT: movb %al, %dl
7379; AVX2-NEXT: addb %dl, %dl
7380; AVX2-NEXT: andb $16, %dl
7381; AVX2-NEXT: orb %cl, %dl
7382; AVX2-NEXT: movb %al, %cl
7383; AVX2-NEXT: shrb %cl
7384; AVX2-NEXT: andb $8, %cl
7385; AVX2-NEXT: orb %dl, %cl
7386; AVX2-NEXT: movb %al, %dl
7387; AVX2-NEXT: shrb $3, %dl
7388; AVX2-NEXT: andb $4, %dl
7389; AVX2-NEXT: orb %cl, %dl
7390; AVX2-NEXT: movb %al, %cl
7391; AVX2-NEXT: shrb $5, %cl
7392; AVX2-NEXT: andb $2, %cl
7393; AVX2-NEXT: orb %dl, %cl
7394; AVX2-NEXT: shrb $7, %al
7395; AVX2-NEXT: orb %cl, %al
7396; AVX2-NEXT: orb %sil, %al
7397; AVX2-NEXT: movzbl %al, %eax
7398; AVX2-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
7399; AVX2-NEXT: vpextrb $11, %xmm0, %eax
7400; AVX2-NEXT: movb %al, %sil
7401; AVX2-NEXT: shlb $7, %sil
7402; AVX2-NEXT: movb %al, %dl
7403; AVX2-NEXT: shlb $5, %dl
7404; AVX2-NEXT: andb $64, %dl
7405; AVX2-NEXT: movb %al, %cl
7406; AVX2-NEXT: shlb $3, %cl
7407; AVX2-NEXT: andb $32, %cl
7408; AVX2-NEXT: orb %dl, %cl
7409; AVX2-NEXT: movb %al, %dl
7410; AVX2-NEXT: addb %dl, %dl
7411; AVX2-NEXT: andb $16, %dl
7412; AVX2-NEXT: orb %cl, %dl
7413; AVX2-NEXT: movb %al, %cl
7414; AVX2-NEXT: shrb %cl
7415; AVX2-NEXT: andb $8, %cl
7416; AVX2-NEXT: orb %dl, %cl
7417; AVX2-NEXT: movb %al, %dl
7418; AVX2-NEXT: shrb $3, %dl
7419; AVX2-NEXT: andb $4, %dl
7420; AVX2-NEXT: orb %cl, %dl
7421; AVX2-NEXT: movb %al, %cl
7422; AVX2-NEXT: shrb $5, %cl
7423; AVX2-NEXT: andb $2, %cl
7424; AVX2-NEXT: orb %dl, %cl
7425; AVX2-NEXT: shrb $7, %al
7426; AVX2-NEXT: orb %cl, %al
7427; AVX2-NEXT: orb %sil, %al
7428; AVX2-NEXT: movzbl %al, %eax
7429; AVX2-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
7430; AVX2-NEXT: vpextrb $12, %xmm0, %eax
7431; AVX2-NEXT: movb %al, %sil
7432; AVX2-NEXT: shlb $7, %sil
7433; AVX2-NEXT: movb %al, %dl
7434; AVX2-NEXT: shlb $5, %dl
7435; AVX2-NEXT: andb $64, %dl
7436; AVX2-NEXT: movb %al, %cl
7437; AVX2-NEXT: shlb $3, %cl
7438; AVX2-NEXT: andb $32, %cl
7439; AVX2-NEXT: orb %dl, %cl
7440; AVX2-NEXT: movb %al, %dl
7441; AVX2-NEXT: addb %dl, %dl
7442; AVX2-NEXT: andb $16, %dl
7443; AVX2-NEXT: orb %cl, %dl
7444; AVX2-NEXT: movb %al, %cl
7445; AVX2-NEXT: shrb %cl
7446; AVX2-NEXT: andb $8, %cl
7447; AVX2-NEXT: orb %dl, %cl
7448; AVX2-NEXT: movb %al, %dl
7449; AVX2-NEXT: shrb $3, %dl
7450; AVX2-NEXT: andb $4, %dl
7451; AVX2-NEXT: orb %cl, %dl
7452; AVX2-NEXT: movb %al, %cl
7453; AVX2-NEXT: shrb $5, %cl
7454; AVX2-NEXT: andb $2, %cl
7455; AVX2-NEXT: orb %dl, %cl
7456; AVX2-NEXT: shrb $7, %al
7457; AVX2-NEXT: orb %cl, %al
7458; AVX2-NEXT: orb %sil, %al
7459; AVX2-NEXT: movzbl %al, %eax
7460; AVX2-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
7461; AVX2-NEXT: vpextrb $13, %xmm0, %eax
7462; AVX2-NEXT: movb %al, %sil
7463; AVX2-NEXT: shlb $7, %sil
7464; AVX2-NEXT: movb %al, %dl
7465; AVX2-NEXT: shlb $5, %dl
7466; AVX2-NEXT: andb $64, %dl
7467; AVX2-NEXT: movb %al, %cl
7468; AVX2-NEXT: shlb $3, %cl
7469; AVX2-NEXT: andb $32, %cl
7470; AVX2-NEXT: orb %dl, %cl
7471; AVX2-NEXT: movb %al, %dl
7472; AVX2-NEXT: addb %dl, %dl
7473; AVX2-NEXT: andb $16, %dl
7474; AVX2-NEXT: orb %cl, %dl
7475; AVX2-NEXT: movb %al, %cl
7476; AVX2-NEXT: shrb %cl
7477; AVX2-NEXT: andb $8, %cl
7478; AVX2-NEXT: orb %dl, %cl
7479; AVX2-NEXT: movb %al, %dl
7480; AVX2-NEXT: shrb $3, %dl
7481; AVX2-NEXT: andb $4, %dl
7482; AVX2-NEXT: orb %cl, %dl
7483; AVX2-NEXT: movb %al, %cl
7484; AVX2-NEXT: shrb $5, %cl
7485; AVX2-NEXT: andb $2, %cl
7486; AVX2-NEXT: orb %dl, %cl
7487; AVX2-NEXT: shrb $7, %al
7488; AVX2-NEXT: orb %cl, %al
7489; AVX2-NEXT: orb %sil, %al
7490; AVX2-NEXT: movzbl %al, %eax
7491; AVX2-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
7492; AVX2-NEXT: vpextrb $14, %xmm0, %eax
7493; AVX2-NEXT: movb %al, %sil
7494; AVX2-NEXT: shlb $7, %sil
7495; AVX2-NEXT: movb %al, %dl
7496; AVX2-NEXT: shlb $5, %dl
7497; AVX2-NEXT: andb $64, %dl
7498; AVX2-NEXT: movb %al, %cl
7499; AVX2-NEXT: shlb $3, %cl
7500; AVX2-NEXT: andb $32, %cl
7501; AVX2-NEXT: orb %dl, %cl
7502; AVX2-NEXT: movb %al, %dl
7503; AVX2-NEXT: addb %dl, %dl
7504; AVX2-NEXT: andb $16, %dl
7505; AVX2-NEXT: orb %cl, %dl
7506; AVX2-NEXT: movb %al, %cl
7507; AVX2-NEXT: shrb %cl
7508; AVX2-NEXT: andb $8, %cl
7509; AVX2-NEXT: orb %dl, %cl
7510; AVX2-NEXT: movb %al, %dl
7511; AVX2-NEXT: shrb $3, %dl
7512; AVX2-NEXT: andb $4, %dl
7513; AVX2-NEXT: orb %cl, %dl
7514; AVX2-NEXT: movb %al, %cl
7515; AVX2-NEXT: shrb $5, %cl
7516; AVX2-NEXT: andb $2, %cl
7517; AVX2-NEXT: orb %dl, %cl
7518; AVX2-NEXT: shrb $7, %al
7519; AVX2-NEXT: orb %cl, %al
7520; AVX2-NEXT: orb %sil, %al
7521; AVX2-NEXT: movzbl %al, %eax
7522; AVX2-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
7523; AVX2-NEXT: vpextrb $15, %xmm0, %eax
7524; AVX2-NEXT: movb %al, %sil
7525; AVX2-NEXT: shlb $7, %sil
7526; AVX2-NEXT: movb %al, %dl
7527; AVX2-NEXT: shlb $5, %dl
7528; AVX2-NEXT: andb $64, %dl
7529; AVX2-NEXT: movb %al, %cl
7530; AVX2-NEXT: shlb $3, %cl
7531; AVX2-NEXT: andb $32, %cl
7532; AVX2-NEXT: orb %dl, %cl
7533; AVX2-NEXT: movb %al, %dl
7534; AVX2-NEXT: addb %dl, %dl
7535; AVX2-NEXT: andb $16, %dl
7536; AVX2-NEXT: orb %cl, %dl
7537; AVX2-NEXT: movb %al, %cl
7538; AVX2-NEXT: shrb %cl
7539; AVX2-NEXT: andb $8, %cl
7540; AVX2-NEXT: orb %dl, %cl
7541; AVX2-NEXT: movb %al, %dl
7542; AVX2-NEXT: shrb $3, %dl
7543; AVX2-NEXT: andb $4, %dl
7544; AVX2-NEXT: orb %cl, %dl
7545; AVX2-NEXT: movb %al, %cl
7546; AVX2-NEXT: shrb $5, %cl
7547; AVX2-NEXT: andb $2, %cl
7548; AVX2-NEXT: orb %dl, %cl
7549; AVX2-NEXT: shrb $7, %al
7550; AVX2-NEXT: orb %cl, %al
7551; AVX2-NEXT: orb %sil, %al
7552; AVX2-NEXT: movzbl %al, %eax
7553; AVX2-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0
7554; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
7555; AVX2-NEXT: retq
7556;
Simon Pilgrimb87ffe82016-03-30 14:14:00 +00007557; XOPAVX1-LABEL: test_bitreverse_v32i8:
7558; XOPAVX1: # BB#0:
7559; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
7560; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95]
7561; XOPAVX1-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm1
7562; XOPAVX1-NEXT: vpperm %xmm2, %xmm0, %xmm0, %xmm0
7563; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
7564; XOPAVX1-NEXT: retq
7565;
7566; XOPAVX2-LABEL: test_bitreverse_v32i8:
7567; XOPAVX2: # BB#0:
7568; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
7569; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95]
7570; XOPAVX2-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm1
7571; XOPAVX2-NEXT: vpperm %xmm2, %xmm0, %xmm0, %xmm0
7572; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
7573; XOPAVX2-NEXT: retq
7574 %b = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> %a)
7575 ret <32 x i8> %b
7576}
7577
Simon Pilgrim2d0104c2016-04-24 15:45:06 +00007578define <16 x i16> @test_bitreverse_v16i16(<16 x i16> %a) nounwind {
7579; SSE-LABEL: test_bitreverse_v16i16:
7580; SSE: # BB#0:
7581; SSE-NEXT: pextrw $7, %xmm0, %eax
7582; SSE-NEXT: movl %eax, %ecx
7583; SSE-NEXT: shll $15, %ecx
7584; SSE-NEXT: movl %eax, %edx
7585; SSE-NEXT: andl $2, %edx
7586; SSE-NEXT: shll $13, %edx
7587; SSE-NEXT: orl %ecx, %edx
7588; SSE-NEXT: movl %eax, %ecx
7589; SSE-NEXT: andl $4, %ecx
7590; SSE-NEXT: shll $11, %ecx
7591; SSE-NEXT: orl %edx, %ecx
7592; SSE-NEXT: movl %eax, %edx
7593; SSE-NEXT: andl $8, %edx
7594; SSE-NEXT: shll $9, %edx
7595; SSE-NEXT: orl %ecx, %edx
7596; SSE-NEXT: movl %eax, %esi
7597; SSE-NEXT: andl $16, %esi
7598; SSE-NEXT: shll $7, %esi
7599; SSE-NEXT: orl %edx, %esi
7600; SSE-NEXT: movl %eax, %ecx
7601; SSE-NEXT: andl $32, %ecx
7602; SSE-NEXT: shll $5, %ecx
7603; SSE-NEXT: orl %esi, %ecx
7604; SSE-NEXT: movl %eax, %edx
7605; SSE-NEXT: andl $64, %edx
7606; SSE-NEXT: shll $3, %edx
7607; SSE-NEXT: leal (%rax,%rax), %esi
7608; SSE-NEXT: andl $256, %esi # imm = 0x100
7609; SSE-NEXT: orl %edx, %esi
7610; SSE-NEXT: movl %eax, %edx
7611; SSE-NEXT: shrl %edx
7612; SSE-NEXT: andl $128, %edx
7613; SSE-NEXT: orl %esi, %edx
7614; SSE-NEXT: movl %eax, %esi
7615; SSE-NEXT: shrl $3, %esi
7616; SSE-NEXT: andl $64, %esi
7617; SSE-NEXT: orl %edx, %esi
7618; SSE-NEXT: movl %eax, %edx
7619; SSE-NEXT: shrl $5, %edx
7620; SSE-NEXT: andl $32, %edx
7621; SSE-NEXT: orl %esi, %edx
7622; SSE-NEXT: movl %eax, %esi
7623; SSE-NEXT: shrl $7, %esi
7624; SSE-NEXT: andl $16, %esi
7625; SSE-NEXT: orl %edx, %esi
7626; SSE-NEXT: movl %eax, %edx
7627; SSE-NEXT: shrl $9, %edx
7628; SSE-NEXT: andl $8, %edx
7629; SSE-NEXT: orl %esi, %edx
7630; SSE-NEXT: movl %eax, %esi
7631; SSE-NEXT: shrl $11, %esi
7632; SSE-NEXT: andl $4, %esi
7633; SSE-NEXT: orl %edx, %esi
7634; SSE-NEXT: movl %eax, %edx
7635; SSE-NEXT: shrl $13, %edx
7636; SSE-NEXT: andl $2, %edx
7637; SSE-NEXT: orl %esi, %edx
7638; SSE-NEXT: shrl $15, %eax
7639; SSE-NEXT: orl %edx, %eax
7640; SSE-NEXT: orl %ecx, %eax
7641; SSE-NEXT: movd %eax, %xmm2
7642; SSE-NEXT: pextrw $3, %xmm0, %eax
7643; SSE-NEXT: movl %eax, %ecx
7644; SSE-NEXT: shll $15, %ecx
7645; SSE-NEXT: movl %eax, %edx
7646; SSE-NEXT: andl $2, %edx
7647; SSE-NEXT: shll $13, %edx
7648; SSE-NEXT: orl %ecx, %edx
7649; SSE-NEXT: movl %eax, %ecx
7650; SSE-NEXT: andl $4, %ecx
7651; SSE-NEXT: shll $11, %ecx
7652; SSE-NEXT: orl %edx, %ecx
7653; SSE-NEXT: movl %eax, %edx
7654; SSE-NEXT: andl $8, %edx
7655; SSE-NEXT: shll $9, %edx
7656; SSE-NEXT: orl %ecx, %edx
7657; SSE-NEXT: movl %eax, %esi
7658; SSE-NEXT: andl $16, %esi
7659; SSE-NEXT: shll $7, %esi
7660; SSE-NEXT: orl %edx, %esi
7661; SSE-NEXT: movl %eax, %ecx
7662; SSE-NEXT: andl $32, %ecx
7663; SSE-NEXT: shll $5, %ecx
7664; SSE-NEXT: orl %esi, %ecx
7665; SSE-NEXT: movl %eax, %edx
7666; SSE-NEXT: andl $64, %edx
7667; SSE-NEXT: shll $3, %edx
7668; SSE-NEXT: leal (%rax,%rax), %esi
7669; SSE-NEXT: andl $256, %esi # imm = 0x100
7670; SSE-NEXT: orl %edx, %esi
7671; SSE-NEXT: movl %eax, %edx
7672; SSE-NEXT: shrl %edx
7673; SSE-NEXT: andl $128, %edx
7674; SSE-NEXT: orl %esi, %edx
7675; SSE-NEXT: movl %eax, %esi
7676; SSE-NEXT: shrl $3, %esi
7677; SSE-NEXT: andl $64, %esi
7678; SSE-NEXT: orl %edx, %esi
7679; SSE-NEXT: movl %eax, %edx
7680; SSE-NEXT: shrl $5, %edx
7681; SSE-NEXT: andl $32, %edx
7682; SSE-NEXT: orl %esi, %edx
7683; SSE-NEXT: movl %eax, %esi
7684; SSE-NEXT: shrl $7, %esi
7685; SSE-NEXT: andl $16, %esi
7686; SSE-NEXT: orl %edx, %esi
7687; SSE-NEXT: movl %eax, %edx
7688; SSE-NEXT: shrl $9, %edx
7689; SSE-NEXT: andl $8, %edx
7690; SSE-NEXT: orl %esi, %edx
7691; SSE-NEXT: movl %eax, %esi
7692; SSE-NEXT: shrl $11, %esi
7693; SSE-NEXT: andl $4, %esi
7694; SSE-NEXT: orl %edx, %esi
7695; SSE-NEXT: movl %eax, %edx
7696; SSE-NEXT: shrl $13, %edx
7697; SSE-NEXT: andl $2, %edx
7698; SSE-NEXT: orl %esi, %edx
7699; SSE-NEXT: shrl $15, %eax
7700; SSE-NEXT: orl %edx, %eax
7701; SSE-NEXT: orl %ecx, %eax
7702; SSE-NEXT: movd %eax, %xmm3
7703; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
7704; SSE-NEXT: pextrw $5, %xmm0, %eax
7705; SSE-NEXT: movl %eax, %ecx
7706; SSE-NEXT: shll $15, %ecx
7707; SSE-NEXT: movl %eax, %edx
7708; SSE-NEXT: andl $2, %edx
7709; SSE-NEXT: shll $13, %edx
7710; SSE-NEXT: orl %ecx, %edx
7711; SSE-NEXT: movl %eax, %ecx
7712; SSE-NEXT: andl $4, %ecx
7713; SSE-NEXT: shll $11, %ecx
7714; SSE-NEXT: orl %edx, %ecx
7715; SSE-NEXT: movl %eax, %edx
7716; SSE-NEXT: andl $8, %edx
7717; SSE-NEXT: shll $9, %edx
7718; SSE-NEXT: orl %ecx, %edx
7719; SSE-NEXT: movl %eax, %esi
7720; SSE-NEXT: andl $16, %esi
7721; SSE-NEXT: shll $7, %esi
7722; SSE-NEXT: orl %edx, %esi
7723; SSE-NEXT: movl %eax, %ecx
7724; SSE-NEXT: andl $32, %ecx
7725; SSE-NEXT: shll $5, %ecx
7726; SSE-NEXT: orl %esi, %ecx
7727; SSE-NEXT: movl %eax, %edx
7728; SSE-NEXT: andl $64, %edx
7729; SSE-NEXT: shll $3, %edx
7730; SSE-NEXT: leal (%rax,%rax), %esi
7731; SSE-NEXT: andl $256, %esi # imm = 0x100
7732; SSE-NEXT: orl %edx, %esi
7733; SSE-NEXT: movl %eax, %edx
7734; SSE-NEXT: shrl %edx
7735; SSE-NEXT: andl $128, %edx
7736; SSE-NEXT: orl %esi, %edx
7737; SSE-NEXT: movl %eax, %esi
7738; SSE-NEXT: shrl $3, %esi
7739; SSE-NEXT: andl $64, %esi
7740; SSE-NEXT: orl %edx, %esi
7741; SSE-NEXT: movl %eax, %edx
7742; SSE-NEXT: shrl $5, %edx
7743; SSE-NEXT: andl $32, %edx
7744; SSE-NEXT: orl %esi, %edx
7745; SSE-NEXT: movl %eax, %esi
7746; SSE-NEXT: shrl $7, %esi
7747; SSE-NEXT: andl $16, %esi
7748; SSE-NEXT: orl %edx, %esi
7749; SSE-NEXT: movl %eax, %edx
7750; SSE-NEXT: shrl $9, %edx
7751; SSE-NEXT: andl $8, %edx
7752; SSE-NEXT: orl %esi, %edx
7753; SSE-NEXT: movl %eax, %esi
7754; SSE-NEXT: shrl $11, %esi
7755; SSE-NEXT: andl $4, %esi
7756; SSE-NEXT: orl %edx, %esi
7757; SSE-NEXT: movl %eax, %edx
7758; SSE-NEXT: shrl $13, %edx
7759; SSE-NEXT: andl $2, %edx
7760; SSE-NEXT: orl %esi, %edx
7761; SSE-NEXT: shrl $15, %eax
7762; SSE-NEXT: orl %edx, %eax
7763; SSE-NEXT: orl %ecx, %eax
7764; SSE-NEXT: movd %eax, %xmm4
7765; SSE-NEXT: pextrw $1, %xmm0, %eax
7766; SSE-NEXT: movl %eax, %ecx
7767; SSE-NEXT: shll $15, %ecx
7768; SSE-NEXT: movl %eax, %edx
7769; SSE-NEXT: andl $2, %edx
7770; SSE-NEXT: shll $13, %edx
7771; SSE-NEXT: orl %ecx, %edx
7772; SSE-NEXT: movl %eax, %ecx
7773; SSE-NEXT: andl $4, %ecx
7774; SSE-NEXT: shll $11, %ecx
7775; SSE-NEXT: orl %edx, %ecx
7776; SSE-NEXT: movl %eax, %edx
7777; SSE-NEXT: andl $8, %edx
7778; SSE-NEXT: shll $9, %edx
7779; SSE-NEXT: orl %ecx, %edx
7780; SSE-NEXT: movl %eax, %esi
7781; SSE-NEXT: andl $16, %esi
7782; SSE-NEXT: shll $7, %esi
7783; SSE-NEXT: orl %edx, %esi
7784; SSE-NEXT: movl %eax, %ecx
7785; SSE-NEXT: andl $32, %ecx
7786; SSE-NEXT: shll $5, %ecx
7787; SSE-NEXT: orl %esi, %ecx
7788; SSE-NEXT: movl %eax, %edx
7789; SSE-NEXT: andl $64, %edx
7790; SSE-NEXT: shll $3, %edx
7791; SSE-NEXT: leal (%rax,%rax), %esi
7792; SSE-NEXT: andl $256, %esi # imm = 0x100
7793; SSE-NEXT: orl %edx, %esi
7794; SSE-NEXT: movl %eax, %edx
7795; SSE-NEXT: shrl %edx
7796; SSE-NEXT: andl $128, %edx
7797; SSE-NEXT: orl %esi, %edx
7798; SSE-NEXT: movl %eax, %esi
7799; SSE-NEXT: shrl $3, %esi
7800; SSE-NEXT: andl $64, %esi
7801; SSE-NEXT: orl %edx, %esi
7802; SSE-NEXT: movl %eax, %edx
7803; SSE-NEXT: shrl $5, %edx
7804; SSE-NEXT: andl $32, %edx
7805; SSE-NEXT: orl %esi, %edx
7806; SSE-NEXT: movl %eax, %esi
7807; SSE-NEXT: shrl $7, %esi
7808; SSE-NEXT: andl $16, %esi
7809; SSE-NEXT: orl %edx, %esi
7810; SSE-NEXT: movl %eax, %edx
7811; SSE-NEXT: shrl $9, %edx
7812; SSE-NEXT: andl $8, %edx
7813; SSE-NEXT: orl %esi, %edx
7814; SSE-NEXT: movl %eax, %esi
7815; SSE-NEXT: shrl $11, %esi
7816; SSE-NEXT: andl $4, %esi
7817; SSE-NEXT: orl %edx, %esi
7818; SSE-NEXT: movl %eax, %edx
7819; SSE-NEXT: shrl $13, %edx
7820; SSE-NEXT: andl $2, %edx
7821; SSE-NEXT: orl %esi, %edx
7822; SSE-NEXT: shrl $15, %eax
7823; SSE-NEXT: orl %edx, %eax
7824; SSE-NEXT: orl %ecx, %eax
7825; SSE-NEXT: movd %eax, %xmm2
7826; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
7827; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
7828; SSE-NEXT: pextrw $6, %xmm0, %eax
7829; SSE-NEXT: movl %eax, %ecx
7830; SSE-NEXT: shll $15, %ecx
7831; SSE-NEXT: movl %eax, %edx
7832; SSE-NEXT: andl $2, %edx
7833; SSE-NEXT: shll $13, %edx
7834; SSE-NEXT: orl %ecx, %edx
7835; SSE-NEXT: movl %eax, %ecx
7836; SSE-NEXT: andl $4, %ecx
7837; SSE-NEXT: shll $11, %ecx
7838; SSE-NEXT: orl %edx, %ecx
7839; SSE-NEXT: movl %eax, %edx
7840; SSE-NEXT: andl $8, %edx
7841; SSE-NEXT: shll $9, %edx
7842; SSE-NEXT: orl %ecx, %edx
7843; SSE-NEXT: movl %eax, %esi
7844; SSE-NEXT: andl $16, %esi
7845; SSE-NEXT: shll $7, %esi
7846; SSE-NEXT: orl %edx, %esi
7847; SSE-NEXT: movl %eax, %ecx
7848; SSE-NEXT: andl $32, %ecx
7849; SSE-NEXT: shll $5, %ecx
7850; SSE-NEXT: orl %esi, %ecx
7851; SSE-NEXT: movl %eax, %edx
7852; SSE-NEXT: andl $64, %edx
7853; SSE-NEXT: shll $3, %edx
7854; SSE-NEXT: leal (%rax,%rax), %esi
7855; SSE-NEXT: andl $256, %esi # imm = 0x100
7856; SSE-NEXT: orl %edx, %esi
7857; SSE-NEXT: movl %eax, %edx
7858; SSE-NEXT: shrl %edx
7859; SSE-NEXT: andl $128, %edx
7860; SSE-NEXT: orl %esi, %edx
7861; SSE-NEXT: movl %eax, %esi
7862; SSE-NEXT: shrl $3, %esi
7863; SSE-NEXT: andl $64, %esi
7864; SSE-NEXT: orl %edx, %esi
7865; SSE-NEXT: movl %eax, %edx
7866; SSE-NEXT: shrl $5, %edx
7867; SSE-NEXT: andl $32, %edx
7868; SSE-NEXT: orl %esi, %edx
7869; SSE-NEXT: movl %eax, %esi
7870; SSE-NEXT: shrl $7, %esi
7871; SSE-NEXT: andl $16, %esi
7872; SSE-NEXT: orl %edx, %esi
7873; SSE-NEXT: movl %eax, %edx
7874; SSE-NEXT: shrl $9, %edx
7875; SSE-NEXT: andl $8, %edx
7876; SSE-NEXT: orl %esi, %edx
7877; SSE-NEXT: movl %eax, %esi
7878; SSE-NEXT: shrl $11, %esi
7879; SSE-NEXT: andl $4, %esi
7880; SSE-NEXT: orl %edx, %esi
7881; SSE-NEXT: movl %eax, %edx
7882; SSE-NEXT: shrl $13, %edx
7883; SSE-NEXT: andl $2, %edx
7884; SSE-NEXT: orl %esi, %edx
7885; SSE-NEXT: shrl $15, %eax
7886; SSE-NEXT: orl %edx, %eax
7887; SSE-NEXT: orl %ecx, %eax
7888; SSE-NEXT: movd %eax, %xmm4
7889; SSE-NEXT: pextrw $2, %xmm0, %eax
7890; SSE-NEXT: movl %eax, %ecx
7891; SSE-NEXT: shll $15, %ecx
7892; SSE-NEXT: movl %eax, %edx
7893; SSE-NEXT: andl $2, %edx
7894; SSE-NEXT: shll $13, %edx
7895; SSE-NEXT: orl %ecx, %edx
7896; SSE-NEXT: movl %eax, %ecx
7897; SSE-NEXT: andl $4, %ecx
7898; SSE-NEXT: shll $11, %ecx
7899; SSE-NEXT: orl %edx, %ecx
7900; SSE-NEXT: movl %eax, %edx
7901; SSE-NEXT: andl $8, %edx
7902; SSE-NEXT: shll $9, %edx
7903; SSE-NEXT: orl %ecx, %edx
7904; SSE-NEXT: movl %eax, %esi
7905; SSE-NEXT: andl $16, %esi
7906; SSE-NEXT: shll $7, %esi
7907; SSE-NEXT: orl %edx, %esi
7908; SSE-NEXT: movl %eax, %ecx
7909; SSE-NEXT: andl $32, %ecx
7910; SSE-NEXT: shll $5, %ecx
7911; SSE-NEXT: orl %esi, %ecx
7912; SSE-NEXT: movl %eax, %edx
7913; SSE-NEXT: andl $64, %edx
7914; SSE-NEXT: shll $3, %edx
7915; SSE-NEXT: leal (%rax,%rax), %esi
7916; SSE-NEXT: andl $256, %esi # imm = 0x100
7917; SSE-NEXT: orl %edx, %esi
7918; SSE-NEXT: movl %eax, %edx
7919; SSE-NEXT: shrl %edx
7920; SSE-NEXT: andl $128, %edx
7921; SSE-NEXT: orl %esi, %edx
7922; SSE-NEXT: movl %eax, %esi
7923; SSE-NEXT: shrl $3, %esi
7924; SSE-NEXT: andl $64, %esi
7925; SSE-NEXT: orl %edx, %esi
7926; SSE-NEXT: movl %eax, %edx
7927; SSE-NEXT: shrl $5, %edx
7928; SSE-NEXT: andl $32, %edx
7929; SSE-NEXT: orl %esi, %edx
7930; SSE-NEXT: movl %eax, %esi
7931; SSE-NEXT: shrl $7, %esi
7932; SSE-NEXT: andl $16, %esi
7933; SSE-NEXT: orl %edx, %esi
7934; SSE-NEXT: movl %eax, %edx
7935; SSE-NEXT: shrl $9, %edx
7936; SSE-NEXT: andl $8, %edx
7937; SSE-NEXT: orl %esi, %edx
7938; SSE-NEXT: movl %eax, %esi
7939; SSE-NEXT: shrl $11, %esi
7940; SSE-NEXT: andl $4, %esi
7941; SSE-NEXT: orl %edx, %esi
7942; SSE-NEXT: movl %eax, %edx
7943; SSE-NEXT: shrl $13, %edx
7944; SSE-NEXT: andl $2, %edx
7945; SSE-NEXT: orl %esi, %edx
7946; SSE-NEXT: shrl $15, %eax
7947; SSE-NEXT: orl %edx, %eax
7948; SSE-NEXT: orl %ecx, %eax
7949; SSE-NEXT: movd %eax, %xmm3
7950; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
7951; SSE-NEXT: pextrw $4, %xmm0, %eax
7952; SSE-NEXT: movl %eax, %ecx
7953; SSE-NEXT: shll $15, %ecx
7954; SSE-NEXT: movl %eax, %edx
7955; SSE-NEXT: andl $2, %edx
7956; SSE-NEXT: shll $13, %edx
7957; SSE-NEXT: orl %ecx, %edx
7958; SSE-NEXT: movl %eax, %ecx
7959; SSE-NEXT: andl $4, %ecx
7960; SSE-NEXT: shll $11, %ecx
7961; SSE-NEXT: orl %edx, %ecx
7962; SSE-NEXT: movl %eax, %edx
7963; SSE-NEXT: andl $8, %edx
7964; SSE-NEXT: shll $9, %edx
7965; SSE-NEXT: orl %ecx, %edx
7966; SSE-NEXT: movl %eax, %esi
7967; SSE-NEXT: andl $16, %esi
7968; SSE-NEXT: shll $7, %esi
7969; SSE-NEXT: orl %edx, %esi
7970; SSE-NEXT: movl %eax, %ecx
7971; SSE-NEXT: andl $32, %ecx
7972; SSE-NEXT: shll $5, %ecx
7973; SSE-NEXT: orl %esi, %ecx
7974; SSE-NEXT: movl %eax, %edx
7975; SSE-NEXT: andl $64, %edx
7976; SSE-NEXT: shll $3, %edx
7977; SSE-NEXT: leal (%rax,%rax), %esi
7978; SSE-NEXT: andl $256, %esi # imm = 0x100
7979; SSE-NEXT: orl %edx, %esi
7980; SSE-NEXT: movl %eax, %edx
7981; SSE-NEXT: shrl %edx
7982; SSE-NEXT: andl $128, %edx
7983; SSE-NEXT: orl %esi, %edx
7984; SSE-NEXT: movl %eax, %esi
7985; SSE-NEXT: shrl $3, %esi
7986; SSE-NEXT: andl $64, %esi
7987; SSE-NEXT: orl %edx, %esi
7988; SSE-NEXT: movl %eax, %edx
7989; SSE-NEXT: shrl $5, %edx
7990; SSE-NEXT: andl $32, %edx
7991; SSE-NEXT: orl %esi, %edx
7992; SSE-NEXT: movl %eax, %esi
7993; SSE-NEXT: shrl $7, %esi
7994; SSE-NEXT: andl $16, %esi
7995; SSE-NEXT: orl %edx, %esi
7996; SSE-NEXT: movl %eax, %edx
7997; SSE-NEXT: shrl $9, %edx
7998; SSE-NEXT: andl $8, %edx
7999; SSE-NEXT: orl %esi, %edx
8000; SSE-NEXT: movl %eax, %esi
8001; SSE-NEXT: shrl $11, %esi
8002; SSE-NEXT: andl $4, %esi
8003; SSE-NEXT: orl %edx, %esi
8004; SSE-NEXT: movl %eax, %edx
8005; SSE-NEXT: shrl $13, %edx
8006; SSE-NEXT: andl $2, %edx
8007; SSE-NEXT: orl %esi, %edx
8008; SSE-NEXT: shrl $15, %eax
8009; SSE-NEXT: orl %edx, %eax
8010; SSE-NEXT: orl %ecx, %eax
8011; SSE-NEXT: movd %eax, %xmm4
8012; SSE-NEXT: movd %xmm0, %eax
8013; SSE-NEXT: movl %eax, %ecx
8014; SSE-NEXT: shll $15, %ecx
8015; SSE-NEXT: movl %eax, %edx
8016; SSE-NEXT: andl $2, %edx
8017; SSE-NEXT: shll $13, %edx
8018; SSE-NEXT: orl %ecx, %edx
8019; SSE-NEXT: movl %eax, %ecx
8020; SSE-NEXT: andl $4, %ecx
8021; SSE-NEXT: shll $11, %ecx
8022; SSE-NEXT: orl %edx, %ecx
8023; SSE-NEXT: movl %eax, %edx
8024; SSE-NEXT: andl $8, %edx
8025; SSE-NEXT: shll $9, %edx
8026; SSE-NEXT: orl %ecx, %edx
8027; SSE-NEXT: movl %eax, %esi
8028; SSE-NEXT: andl $16, %esi
8029; SSE-NEXT: shll $7, %esi
8030; SSE-NEXT: orl %edx, %esi
8031; SSE-NEXT: movl %eax, %ecx
8032; SSE-NEXT: andl $32, %ecx
8033; SSE-NEXT: shll $5, %ecx
8034; SSE-NEXT: orl %esi, %ecx
8035; SSE-NEXT: movl %eax, %edx
8036; SSE-NEXT: andl $64, %edx
8037; SSE-NEXT: shll $3, %edx
8038; SSE-NEXT: leal (%rax,%rax), %esi
8039; SSE-NEXT: andl $256, %esi # imm = 0x100
8040; SSE-NEXT: orl %edx, %esi
8041; SSE-NEXT: movl %eax, %edx
8042; SSE-NEXT: shrl %edx
8043; SSE-NEXT: andl $128, %edx
8044; SSE-NEXT: orl %esi, %edx
8045; SSE-NEXT: movl %eax, %esi
8046; SSE-NEXT: shrl $3, %esi
8047; SSE-NEXT: andl $64, %esi
8048; SSE-NEXT: orl %edx, %esi
8049; SSE-NEXT: movl %eax, %edx
8050; SSE-NEXT: shrl $5, %edx
8051; SSE-NEXT: andl $32, %edx
8052; SSE-NEXT: orl %esi, %edx
8053; SSE-NEXT: movl %eax, %esi
8054; SSE-NEXT: shrl $7, %esi
8055; SSE-NEXT: andl $16, %esi
8056; SSE-NEXT: orl %edx, %esi
8057; SSE-NEXT: movl %eax, %edx
8058; SSE-NEXT: shrl $9, %edx
8059; SSE-NEXT: andl $8, %edx
8060; SSE-NEXT: orl %esi, %edx
8061; SSE-NEXT: movl %eax, %esi
8062; SSE-NEXT: shrl $11, %esi
8063; SSE-NEXT: andl $4, %esi
8064; SSE-NEXT: orl %edx, %esi
8065; SSE-NEXT: movl %eax, %edx
8066; SSE-NEXT: shrl $13, %edx
8067; SSE-NEXT: andl $2, %edx
8068; SSE-NEXT: orl %esi, %edx
8069; SSE-NEXT: andl $32768, %eax # imm = 0x8000
8070; SSE-NEXT: shrl $15, %eax
8071; SSE-NEXT: orl %edx, %eax
8072; SSE-NEXT: orl %ecx, %eax
8073; SSE-NEXT: movd %eax, %xmm0
8074; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
8075; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
8076; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
8077; SSE-NEXT: pextrw $7, %xmm1, %eax
8078; SSE-NEXT: movl %eax, %ecx
8079; SSE-NEXT: shll $15, %ecx
8080; SSE-NEXT: movl %eax, %edx
8081; SSE-NEXT: andl $2, %edx
8082; SSE-NEXT: shll $13, %edx
8083; SSE-NEXT: orl %ecx, %edx
8084; SSE-NEXT: movl %eax, %ecx
8085; SSE-NEXT: andl $4, %ecx
8086; SSE-NEXT: shll $11, %ecx
8087; SSE-NEXT: orl %edx, %ecx
8088; SSE-NEXT: movl %eax, %edx
8089; SSE-NEXT: andl $8, %edx
8090; SSE-NEXT: shll $9, %edx
8091; SSE-NEXT: orl %ecx, %edx
8092; SSE-NEXT: movl %eax, %esi
8093; SSE-NEXT: andl $16, %esi
8094; SSE-NEXT: shll $7, %esi
8095; SSE-NEXT: orl %edx, %esi
8096; SSE-NEXT: movl %eax, %ecx
8097; SSE-NEXT: andl $32, %ecx
8098; SSE-NEXT: shll $5, %ecx
8099; SSE-NEXT: orl %esi, %ecx
8100; SSE-NEXT: movl %eax, %edx
8101; SSE-NEXT: andl $64, %edx
8102; SSE-NEXT: shll $3, %edx
8103; SSE-NEXT: leal (%rax,%rax), %esi
8104; SSE-NEXT: andl $256, %esi # imm = 0x100
8105; SSE-NEXT: orl %edx, %esi
8106; SSE-NEXT: movl %eax, %edx
8107; SSE-NEXT: shrl %edx
8108; SSE-NEXT: andl $128, %edx
8109; SSE-NEXT: orl %esi, %edx
8110; SSE-NEXT: movl %eax, %esi
8111; SSE-NEXT: shrl $3, %esi
8112; SSE-NEXT: andl $64, %esi
8113; SSE-NEXT: orl %edx, %esi
8114; SSE-NEXT: movl %eax, %edx
8115; SSE-NEXT: shrl $5, %edx
8116; SSE-NEXT: andl $32, %edx
8117; SSE-NEXT: orl %esi, %edx
8118; SSE-NEXT: movl %eax, %esi
8119; SSE-NEXT: shrl $7, %esi
8120; SSE-NEXT: andl $16, %esi
8121; SSE-NEXT: orl %edx, %esi
8122; SSE-NEXT: movl %eax, %edx
8123; SSE-NEXT: shrl $9, %edx
8124; SSE-NEXT: andl $8, %edx
8125; SSE-NEXT: orl %esi, %edx
8126; SSE-NEXT: movl %eax, %esi
8127; SSE-NEXT: shrl $11, %esi
8128; SSE-NEXT: andl $4, %esi
8129; SSE-NEXT: orl %edx, %esi
8130; SSE-NEXT: movl %eax, %edx
8131; SSE-NEXT: shrl $13, %edx
8132; SSE-NEXT: andl $2, %edx
8133; SSE-NEXT: orl %esi, %edx
8134; SSE-NEXT: shrl $15, %eax
8135; SSE-NEXT: orl %edx, %eax
8136; SSE-NEXT: orl %ecx, %eax
8137; SSE-NEXT: movd %eax, %xmm2
8138; SSE-NEXT: pextrw $3, %xmm1, %eax
8139; SSE-NEXT: movl %eax, %ecx
8140; SSE-NEXT: shll $15, %ecx
8141; SSE-NEXT: movl %eax, %edx
8142; SSE-NEXT: andl $2, %edx
8143; SSE-NEXT: shll $13, %edx
8144; SSE-NEXT: orl %ecx, %edx
8145; SSE-NEXT: movl %eax, %ecx
8146; SSE-NEXT: andl $4, %ecx
8147; SSE-NEXT: shll $11, %ecx
8148; SSE-NEXT: orl %edx, %ecx
8149; SSE-NEXT: movl %eax, %edx
8150; SSE-NEXT: andl $8, %edx
8151; SSE-NEXT: shll $9, %edx
8152; SSE-NEXT: orl %ecx, %edx
8153; SSE-NEXT: movl %eax, %esi
8154; SSE-NEXT: andl $16, %esi
8155; SSE-NEXT: shll $7, %esi
8156; SSE-NEXT: orl %edx, %esi
8157; SSE-NEXT: movl %eax, %ecx
8158; SSE-NEXT: andl $32, %ecx
8159; SSE-NEXT: shll $5, %ecx
8160; SSE-NEXT: orl %esi, %ecx
8161; SSE-NEXT: movl %eax, %edx
8162; SSE-NEXT: andl $64, %edx
8163; SSE-NEXT: shll $3, %edx
8164; SSE-NEXT: leal (%rax,%rax), %esi
8165; SSE-NEXT: andl $256, %esi # imm = 0x100
8166; SSE-NEXT: orl %edx, %esi
8167; SSE-NEXT: movl %eax, %edx
8168; SSE-NEXT: shrl %edx
8169; SSE-NEXT: andl $128, %edx
8170; SSE-NEXT: orl %esi, %edx
8171; SSE-NEXT: movl %eax, %esi
8172; SSE-NEXT: shrl $3, %esi
8173; SSE-NEXT: andl $64, %esi
8174; SSE-NEXT: orl %edx, %esi
8175; SSE-NEXT: movl %eax, %edx
8176; SSE-NEXT: shrl $5, %edx
8177; SSE-NEXT: andl $32, %edx
8178; SSE-NEXT: orl %esi, %edx
8179; SSE-NEXT: movl %eax, %esi
8180; SSE-NEXT: shrl $7, %esi
8181; SSE-NEXT: andl $16, %esi
8182; SSE-NEXT: orl %edx, %esi
8183; SSE-NEXT: movl %eax, %edx
8184; SSE-NEXT: shrl $9, %edx
8185; SSE-NEXT: andl $8, %edx
8186; SSE-NEXT: orl %esi, %edx
8187; SSE-NEXT: movl %eax, %esi
8188; SSE-NEXT: shrl $11, %esi
8189; SSE-NEXT: andl $4, %esi
8190; SSE-NEXT: orl %edx, %esi
8191; SSE-NEXT: movl %eax, %edx
8192; SSE-NEXT: shrl $13, %edx
8193; SSE-NEXT: andl $2, %edx
8194; SSE-NEXT: orl %esi, %edx
8195; SSE-NEXT: shrl $15, %eax
8196; SSE-NEXT: orl %edx, %eax
8197; SSE-NEXT: orl %ecx, %eax
8198; SSE-NEXT: movd %eax, %xmm3
8199; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
8200; SSE-NEXT: pextrw $5, %xmm1, %eax
8201; SSE-NEXT: movl %eax, %ecx
8202; SSE-NEXT: shll $15, %ecx
8203; SSE-NEXT: movl %eax, %edx
8204; SSE-NEXT: andl $2, %edx
8205; SSE-NEXT: shll $13, %edx
8206; SSE-NEXT: orl %ecx, %edx
8207; SSE-NEXT: movl %eax, %ecx
8208; SSE-NEXT: andl $4, %ecx
8209; SSE-NEXT: shll $11, %ecx
8210; SSE-NEXT: orl %edx, %ecx
8211; SSE-NEXT: movl %eax, %edx
8212; SSE-NEXT: andl $8, %edx
8213; SSE-NEXT: shll $9, %edx
8214; SSE-NEXT: orl %ecx, %edx
8215; SSE-NEXT: movl %eax, %esi
8216; SSE-NEXT: andl $16, %esi
8217; SSE-NEXT: shll $7, %esi
8218; SSE-NEXT: orl %edx, %esi
8219; SSE-NEXT: movl %eax, %ecx
8220; SSE-NEXT: andl $32, %ecx
8221; SSE-NEXT: shll $5, %ecx
8222; SSE-NEXT: orl %esi, %ecx
8223; SSE-NEXT: movl %eax, %edx
8224; SSE-NEXT: andl $64, %edx
8225; SSE-NEXT: shll $3, %edx
8226; SSE-NEXT: leal (%rax,%rax), %esi
8227; SSE-NEXT: andl $256, %esi # imm = 0x100
8228; SSE-NEXT: orl %edx, %esi
8229; SSE-NEXT: movl %eax, %edx
8230; SSE-NEXT: shrl %edx
8231; SSE-NEXT: andl $128, %edx
8232; SSE-NEXT: orl %esi, %edx
8233; SSE-NEXT: movl %eax, %esi
8234; SSE-NEXT: shrl $3, %esi
8235; SSE-NEXT: andl $64, %esi
8236; SSE-NEXT: orl %edx, %esi
8237; SSE-NEXT: movl %eax, %edx
8238; SSE-NEXT: shrl $5, %edx
8239; SSE-NEXT: andl $32, %edx
8240; SSE-NEXT: orl %esi, %edx
8241; SSE-NEXT: movl %eax, %esi
8242; SSE-NEXT: shrl $7, %esi
8243; SSE-NEXT: andl $16, %esi
8244; SSE-NEXT: orl %edx, %esi
8245; SSE-NEXT: movl %eax, %edx
8246; SSE-NEXT: shrl $9, %edx
8247; SSE-NEXT: andl $8, %edx
8248; SSE-NEXT: orl %esi, %edx
8249; SSE-NEXT: movl %eax, %esi
8250; SSE-NEXT: shrl $11, %esi
8251; SSE-NEXT: andl $4, %esi
8252; SSE-NEXT: orl %edx, %esi
8253; SSE-NEXT: movl %eax, %edx
8254; SSE-NEXT: shrl $13, %edx
8255; SSE-NEXT: andl $2, %edx
8256; SSE-NEXT: orl %esi, %edx
8257; SSE-NEXT: shrl $15, %eax
8258; SSE-NEXT: orl %edx, %eax
8259; SSE-NEXT: orl %ecx, %eax
8260; SSE-NEXT: movd %eax, %xmm4
8261; SSE-NEXT: pextrw $1, %xmm1, %eax
8262; SSE-NEXT: movl %eax, %ecx
8263; SSE-NEXT: shll $15, %ecx
8264; SSE-NEXT: movl %eax, %edx
8265; SSE-NEXT: andl $2, %edx
8266; SSE-NEXT: shll $13, %edx
8267; SSE-NEXT: orl %ecx, %edx
8268; SSE-NEXT: movl %eax, %ecx
8269; SSE-NEXT: andl $4, %ecx
8270; SSE-NEXT: shll $11, %ecx
8271; SSE-NEXT: orl %edx, %ecx
8272; SSE-NEXT: movl %eax, %edx
8273; SSE-NEXT: andl $8, %edx
8274; SSE-NEXT: shll $9, %edx
8275; SSE-NEXT: orl %ecx, %edx
8276; SSE-NEXT: movl %eax, %esi
8277; SSE-NEXT: andl $16, %esi
8278; SSE-NEXT: shll $7, %esi
8279; SSE-NEXT: orl %edx, %esi
8280; SSE-NEXT: movl %eax, %ecx
8281; SSE-NEXT: andl $32, %ecx
8282; SSE-NEXT: shll $5, %ecx
8283; SSE-NEXT: orl %esi, %ecx
8284; SSE-NEXT: movl %eax, %edx
8285; SSE-NEXT: andl $64, %edx
8286; SSE-NEXT: shll $3, %edx
8287; SSE-NEXT: leal (%rax,%rax), %esi
8288; SSE-NEXT: andl $256, %esi # imm = 0x100
8289; SSE-NEXT: orl %edx, %esi
8290; SSE-NEXT: movl %eax, %edx
8291; SSE-NEXT: shrl %edx
8292; SSE-NEXT: andl $128, %edx
8293; SSE-NEXT: orl %esi, %edx
8294; SSE-NEXT: movl %eax, %esi
8295; SSE-NEXT: shrl $3, %esi
8296; SSE-NEXT: andl $64, %esi
8297; SSE-NEXT: orl %edx, %esi
8298; SSE-NEXT: movl %eax, %edx
8299; SSE-NEXT: shrl $5, %edx
8300; SSE-NEXT: andl $32, %edx
8301; SSE-NEXT: orl %esi, %edx
8302; SSE-NEXT: movl %eax, %esi
8303; SSE-NEXT: shrl $7, %esi
8304; SSE-NEXT: andl $16, %esi
8305; SSE-NEXT: orl %edx, %esi
8306; SSE-NEXT: movl %eax, %edx
8307; SSE-NEXT: shrl $9, %edx
8308; SSE-NEXT: andl $8, %edx
8309; SSE-NEXT: orl %esi, %edx
8310; SSE-NEXT: movl %eax, %esi
8311; SSE-NEXT: shrl $11, %esi
8312; SSE-NEXT: andl $4, %esi
8313; SSE-NEXT: orl %edx, %esi
8314; SSE-NEXT: movl %eax, %edx
8315; SSE-NEXT: shrl $13, %edx
8316; SSE-NEXT: andl $2, %edx
8317; SSE-NEXT: orl %esi, %edx
8318; SSE-NEXT: shrl $15, %eax
8319; SSE-NEXT: orl %edx, %eax
8320; SSE-NEXT: orl %ecx, %eax
8321; SSE-NEXT: movd %eax, %xmm2
8322; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
8323; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
8324; SSE-NEXT: pextrw $6, %xmm1, %eax
8325; SSE-NEXT: movl %eax, %ecx
8326; SSE-NEXT: shll $15, %ecx
8327; SSE-NEXT: movl %eax, %edx
8328; SSE-NEXT: andl $2, %edx
8329; SSE-NEXT: shll $13, %edx
8330; SSE-NEXT: orl %ecx, %edx
8331; SSE-NEXT: movl %eax, %ecx
8332; SSE-NEXT: andl $4, %ecx
8333; SSE-NEXT: shll $11, %ecx
8334; SSE-NEXT: orl %edx, %ecx
8335; SSE-NEXT: movl %eax, %edx
8336; SSE-NEXT: andl $8, %edx
8337; SSE-NEXT: shll $9, %edx
8338; SSE-NEXT: orl %ecx, %edx
8339; SSE-NEXT: movl %eax, %esi
8340; SSE-NEXT: andl $16, %esi
8341; SSE-NEXT: shll $7, %esi
8342; SSE-NEXT: orl %edx, %esi
8343; SSE-NEXT: movl %eax, %ecx
8344; SSE-NEXT: andl $32, %ecx
8345; SSE-NEXT: shll $5, %ecx
8346; SSE-NEXT: orl %esi, %ecx
8347; SSE-NEXT: movl %eax, %edx
8348; SSE-NEXT: andl $64, %edx
8349; SSE-NEXT: shll $3, %edx
8350; SSE-NEXT: leal (%rax,%rax), %esi
8351; SSE-NEXT: andl $256, %esi # imm = 0x100
8352; SSE-NEXT: orl %edx, %esi
8353; SSE-NEXT: movl %eax, %edx
8354; SSE-NEXT: shrl %edx
8355; SSE-NEXT: andl $128, %edx
8356; SSE-NEXT: orl %esi, %edx
8357; SSE-NEXT: movl %eax, %esi
8358; SSE-NEXT: shrl $3, %esi
8359; SSE-NEXT: andl $64, %esi
8360; SSE-NEXT: orl %edx, %esi
8361; SSE-NEXT: movl %eax, %edx
8362; SSE-NEXT: shrl $5, %edx
8363; SSE-NEXT: andl $32, %edx
8364; SSE-NEXT: orl %esi, %edx
8365; SSE-NEXT: movl %eax, %esi
8366; SSE-NEXT: shrl $7, %esi
8367; SSE-NEXT: andl $16, %esi
8368; SSE-NEXT: orl %edx, %esi
8369; SSE-NEXT: movl %eax, %edx
8370; SSE-NEXT: shrl $9, %edx
8371; SSE-NEXT: andl $8, %edx
8372; SSE-NEXT: orl %esi, %edx
8373; SSE-NEXT: movl %eax, %esi
8374; SSE-NEXT: shrl $11, %esi
8375; SSE-NEXT: andl $4, %esi
8376; SSE-NEXT: orl %edx, %esi
8377; SSE-NEXT: movl %eax, %edx
8378; SSE-NEXT: shrl $13, %edx
8379; SSE-NEXT: andl $2, %edx
8380; SSE-NEXT: orl %esi, %edx
8381; SSE-NEXT: shrl $15, %eax
8382; SSE-NEXT: orl %edx, %eax
8383; SSE-NEXT: orl %ecx, %eax
8384; SSE-NEXT: movd %eax, %xmm4
8385; SSE-NEXT: pextrw $2, %xmm1, %eax
8386; SSE-NEXT: movl %eax, %ecx
8387; SSE-NEXT: shll $15, %ecx
8388; SSE-NEXT: movl %eax, %edx
8389; SSE-NEXT: andl $2, %edx
8390; SSE-NEXT: shll $13, %edx
8391; SSE-NEXT: orl %ecx, %edx
8392; SSE-NEXT: movl %eax, %ecx
8393; SSE-NEXT: andl $4, %ecx
8394; SSE-NEXT: shll $11, %ecx
8395; SSE-NEXT: orl %edx, %ecx
8396; SSE-NEXT: movl %eax, %edx
8397; SSE-NEXT: andl $8, %edx
8398; SSE-NEXT: shll $9, %edx
8399; SSE-NEXT: orl %ecx, %edx
8400; SSE-NEXT: movl %eax, %esi
8401; SSE-NEXT: andl $16, %esi
8402; SSE-NEXT: shll $7, %esi
8403; SSE-NEXT: orl %edx, %esi
8404; SSE-NEXT: movl %eax, %ecx
8405; SSE-NEXT: andl $32, %ecx
8406; SSE-NEXT: shll $5, %ecx
8407; SSE-NEXT: orl %esi, %ecx
8408; SSE-NEXT: movl %eax, %edx
8409; SSE-NEXT: andl $64, %edx
8410; SSE-NEXT: shll $3, %edx
8411; SSE-NEXT: leal (%rax,%rax), %esi
8412; SSE-NEXT: andl $256, %esi # imm = 0x100
8413; SSE-NEXT: orl %edx, %esi
8414; SSE-NEXT: movl %eax, %edx
8415; SSE-NEXT: shrl %edx
8416; SSE-NEXT: andl $128, %edx
8417; SSE-NEXT: orl %esi, %edx
8418; SSE-NEXT: movl %eax, %esi
8419; SSE-NEXT: shrl $3, %esi
8420; SSE-NEXT: andl $64, %esi
8421; SSE-NEXT: orl %edx, %esi
8422; SSE-NEXT: movl %eax, %edx
8423; SSE-NEXT: shrl $5, %edx
8424; SSE-NEXT: andl $32, %edx
8425; SSE-NEXT: orl %esi, %edx
8426; SSE-NEXT: movl %eax, %esi
8427; SSE-NEXT: shrl $7, %esi
8428; SSE-NEXT: andl $16, %esi
8429; SSE-NEXT: orl %edx, %esi
8430; SSE-NEXT: movl %eax, %edx
8431; SSE-NEXT: shrl $9, %edx
8432; SSE-NEXT: andl $8, %edx
8433; SSE-NEXT: orl %esi, %edx
8434; SSE-NEXT: movl %eax, %esi
8435; SSE-NEXT: shrl $11, %esi
8436; SSE-NEXT: andl $4, %esi
8437; SSE-NEXT: orl %edx, %esi
8438; SSE-NEXT: movl %eax, %edx
8439; SSE-NEXT: shrl $13, %edx
8440; SSE-NEXT: andl $2, %edx
8441; SSE-NEXT: orl %esi, %edx
8442; SSE-NEXT: shrl $15, %eax
8443; SSE-NEXT: orl %edx, %eax
8444; SSE-NEXT: orl %ecx, %eax
8445; SSE-NEXT: movd %eax, %xmm3
8446; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
8447; SSE-NEXT: pextrw $4, %xmm1, %eax
8448; SSE-NEXT: movl %eax, %ecx
8449; SSE-NEXT: shll $15, %ecx
8450; SSE-NEXT: movl %eax, %edx
8451; SSE-NEXT: andl $2, %edx
8452; SSE-NEXT: shll $13, %edx
8453; SSE-NEXT: orl %ecx, %edx
8454; SSE-NEXT: movl %eax, %ecx
8455; SSE-NEXT: andl $4, %ecx
8456; SSE-NEXT: shll $11, %ecx
8457; SSE-NEXT: orl %edx, %ecx
8458; SSE-NEXT: movl %eax, %edx
8459; SSE-NEXT: andl $8, %edx
8460; SSE-NEXT: shll $9, %edx
8461; SSE-NEXT: orl %ecx, %edx
8462; SSE-NEXT: movl %eax, %esi
8463; SSE-NEXT: andl $16, %esi
8464; SSE-NEXT: shll $7, %esi
8465; SSE-NEXT: orl %edx, %esi
8466; SSE-NEXT: movl %eax, %ecx
8467; SSE-NEXT: andl $32, %ecx
8468; SSE-NEXT: shll $5, %ecx
8469; SSE-NEXT: orl %esi, %ecx
8470; SSE-NEXT: movl %eax, %edx
8471; SSE-NEXT: andl $64, %edx
8472; SSE-NEXT: shll $3, %edx
8473; SSE-NEXT: leal (%rax,%rax), %esi
8474; SSE-NEXT: andl $256, %esi # imm = 0x100
8475; SSE-NEXT: orl %edx, %esi
8476; SSE-NEXT: movl %eax, %edx
8477; SSE-NEXT: shrl %edx
8478; SSE-NEXT: andl $128, %edx
8479; SSE-NEXT: orl %esi, %edx
8480; SSE-NEXT: movl %eax, %esi
8481; SSE-NEXT: shrl $3, %esi
8482; SSE-NEXT: andl $64, %esi
8483; SSE-NEXT: orl %edx, %esi
8484; SSE-NEXT: movl %eax, %edx
8485; SSE-NEXT: shrl $5, %edx
8486; SSE-NEXT: andl $32, %edx
8487; SSE-NEXT: orl %esi, %edx
8488; SSE-NEXT: movl %eax, %esi
8489; SSE-NEXT: shrl $7, %esi
8490; SSE-NEXT: andl $16, %esi
8491; SSE-NEXT: orl %edx, %esi
8492; SSE-NEXT: movl %eax, %edx
8493; SSE-NEXT: shrl $9, %edx
8494; SSE-NEXT: andl $8, %edx
8495; SSE-NEXT: orl %esi, %edx
8496; SSE-NEXT: movl %eax, %esi
8497; SSE-NEXT: shrl $11, %esi
8498; SSE-NEXT: andl $4, %esi
8499; SSE-NEXT: orl %edx, %esi
8500; SSE-NEXT: movl %eax, %edx
8501; SSE-NEXT: shrl $13, %edx
8502; SSE-NEXT: andl $2, %edx
8503; SSE-NEXT: orl %esi, %edx
8504; SSE-NEXT: shrl $15, %eax
8505; SSE-NEXT: orl %edx, %eax
8506; SSE-NEXT: orl %ecx, %eax
8507; SSE-NEXT: movd %eax, %xmm4
8508; SSE-NEXT: movd %xmm1, %eax
8509; SSE-NEXT: movl %eax, %ecx
8510; SSE-NEXT: shll $15, %ecx
8511; SSE-NEXT: movl %eax, %edx
8512; SSE-NEXT: andl $2, %edx
8513; SSE-NEXT: shll $13, %edx
8514; SSE-NEXT: orl %ecx, %edx
8515; SSE-NEXT: movl %eax, %ecx
8516; SSE-NEXT: andl $4, %ecx
8517; SSE-NEXT: shll $11, %ecx
8518; SSE-NEXT: orl %edx, %ecx
8519; SSE-NEXT: movl %eax, %edx
8520; SSE-NEXT: andl $8, %edx
8521; SSE-NEXT: shll $9, %edx
8522; SSE-NEXT: orl %ecx, %edx
8523; SSE-NEXT: movl %eax, %esi
8524; SSE-NEXT: andl $16, %esi
8525; SSE-NEXT: shll $7, %esi
8526; SSE-NEXT: orl %edx, %esi
8527; SSE-NEXT: movl %eax, %ecx
8528; SSE-NEXT: andl $32, %ecx
8529; SSE-NEXT: shll $5, %ecx
8530; SSE-NEXT: orl %esi, %ecx
8531; SSE-NEXT: movl %eax, %edx
8532; SSE-NEXT: andl $64, %edx
8533; SSE-NEXT: shll $3, %edx
8534; SSE-NEXT: leal (%rax,%rax), %esi
8535; SSE-NEXT: andl $256, %esi # imm = 0x100
8536; SSE-NEXT: orl %edx, %esi
8537; SSE-NEXT: movl %eax, %edx
8538; SSE-NEXT: shrl %edx
8539; SSE-NEXT: andl $128, %edx
8540; SSE-NEXT: orl %esi, %edx
8541; SSE-NEXT: movl %eax, %esi
8542; SSE-NEXT: shrl $3, %esi
8543; SSE-NEXT: andl $64, %esi
8544; SSE-NEXT: orl %edx, %esi
8545; SSE-NEXT: movl %eax, %edx
8546; SSE-NEXT: shrl $5, %edx
8547; SSE-NEXT: andl $32, %edx
8548; SSE-NEXT: orl %esi, %edx
8549; SSE-NEXT: movl %eax, %esi
8550; SSE-NEXT: shrl $7, %esi
8551; SSE-NEXT: andl $16, %esi
8552; SSE-NEXT: orl %edx, %esi
8553; SSE-NEXT: movl %eax, %edx
8554; SSE-NEXT: shrl $9, %edx
8555; SSE-NEXT: andl $8, %edx
8556; SSE-NEXT: orl %esi, %edx
8557; SSE-NEXT: movl %eax, %esi
8558; SSE-NEXT: shrl $11, %esi
8559; SSE-NEXT: andl $4, %esi
8560; SSE-NEXT: orl %edx, %esi
8561; SSE-NEXT: movl %eax, %edx
8562; SSE-NEXT: shrl $13, %edx
8563; SSE-NEXT: andl $2, %edx
8564; SSE-NEXT: orl %esi, %edx
8565; SSE-NEXT: andl $32768, %eax # imm = 0x8000
8566; SSE-NEXT: shrl $15, %eax
8567; SSE-NEXT: orl %edx, %eax
8568; SSE-NEXT: orl %ecx, %eax
8569; SSE-NEXT: movd %eax, %xmm1
8570; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
8571; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
8572; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
8573; SSE-NEXT: retq
8574;
8575; AVX1-LABEL: test_bitreverse_v16i16:
8576; AVX1: # BB#0:
8577; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
8578; AVX1-NEXT: vpextrw $1, %xmm1, %eax
8579; AVX1-NEXT: movl %eax, %ecx
8580; AVX1-NEXT: shll $15, %ecx
8581; AVX1-NEXT: movl %eax, %edx
8582; AVX1-NEXT: andl $2, %edx
8583; AVX1-NEXT: shll $13, %edx
8584; AVX1-NEXT: orl %ecx, %edx
8585; AVX1-NEXT: movl %eax, %ecx
8586; AVX1-NEXT: andl $4, %ecx
8587; AVX1-NEXT: shll $11, %ecx
8588; AVX1-NEXT: orl %edx, %ecx
8589; AVX1-NEXT: movl %eax, %edx
8590; AVX1-NEXT: andl $8, %edx
8591; AVX1-NEXT: shll $9, %edx
8592; AVX1-NEXT: orl %ecx, %edx
8593; AVX1-NEXT: movl %eax, %esi
8594; AVX1-NEXT: andl $16, %esi
8595; AVX1-NEXT: shll $7, %esi
8596; AVX1-NEXT: orl %edx, %esi
8597; AVX1-NEXT: movl %eax, %ecx
8598; AVX1-NEXT: andl $32, %ecx
8599; AVX1-NEXT: shll $5, %ecx
8600; AVX1-NEXT: orl %esi, %ecx
8601; AVX1-NEXT: movl %eax, %edx
8602; AVX1-NEXT: andl $64, %edx
8603; AVX1-NEXT: shll $3, %edx
8604; AVX1-NEXT: leal (%rax,%rax), %esi
8605; AVX1-NEXT: andl $256, %esi # imm = 0x100
8606; AVX1-NEXT: orl %edx, %esi
8607; AVX1-NEXT: movl %eax, %edx
8608; AVX1-NEXT: shrl %edx
8609; AVX1-NEXT: andl $128, %edx
8610; AVX1-NEXT: orl %esi, %edx
8611; AVX1-NEXT: movl %eax, %esi
8612; AVX1-NEXT: shrl $3, %esi
8613; AVX1-NEXT: andl $64, %esi
8614; AVX1-NEXT: orl %edx, %esi
8615; AVX1-NEXT: movl %eax, %edx
8616; AVX1-NEXT: shrl $5, %edx
8617; AVX1-NEXT: andl $32, %edx
8618; AVX1-NEXT: orl %esi, %edx
8619; AVX1-NEXT: movl %eax, %esi
8620; AVX1-NEXT: shrl $7, %esi
8621; AVX1-NEXT: andl $16, %esi
8622; AVX1-NEXT: orl %edx, %esi
8623; AVX1-NEXT: movl %eax, %edx
8624; AVX1-NEXT: shrl $9, %edx
8625; AVX1-NEXT: andl $8, %edx
8626; AVX1-NEXT: orl %esi, %edx
8627; AVX1-NEXT: movl %eax, %esi
8628; AVX1-NEXT: shrl $11, %esi
8629; AVX1-NEXT: andl $4, %esi
8630; AVX1-NEXT: orl %edx, %esi
8631; AVX1-NEXT: movl %eax, %edx
8632; AVX1-NEXT: shrl $13, %edx
8633; AVX1-NEXT: andl $2, %edx
8634; AVX1-NEXT: orl %esi, %edx
8635; AVX1-NEXT: shrl $15, %eax
8636; AVX1-NEXT: orl %edx, %eax
8637; AVX1-NEXT: orl %ecx, %eax
8638; AVX1-NEXT: vmovd %xmm1, %ecx
8639; AVX1-NEXT: movl %ecx, %edx
8640; AVX1-NEXT: shll $15, %edx
8641; AVX1-NEXT: movl %ecx, %esi
8642; AVX1-NEXT: andl $2, %esi
8643; AVX1-NEXT: shll $13, %esi
8644; AVX1-NEXT: orl %edx, %esi
8645; AVX1-NEXT: movl %ecx, %edx
8646; AVX1-NEXT: andl $4, %edx
8647; AVX1-NEXT: shll $11, %edx
8648; AVX1-NEXT: orl %esi, %edx
8649; AVX1-NEXT: movl %ecx, %esi
8650; AVX1-NEXT: andl $8, %esi
8651; AVX1-NEXT: shll $9, %esi
8652; AVX1-NEXT: orl %edx, %esi
8653; AVX1-NEXT: movl %ecx, %edi
8654; AVX1-NEXT: andl $16, %edi
8655; AVX1-NEXT: shll $7, %edi
8656; AVX1-NEXT: orl %esi, %edi
8657; AVX1-NEXT: movl %ecx, %edx
8658; AVX1-NEXT: andl $32, %edx
8659; AVX1-NEXT: shll $5, %edx
8660; AVX1-NEXT: orl %edi, %edx
8661; AVX1-NEXT: movl %ecx, %esi
8662; AVX1-NEXT: andl $64, %esi
8663; AVX1-NEXT: shll $3, %esi
8664; AVX1-NEXT: leal (%rcx,%rcx), %edi
8665; AVX1-NEXT: andl $256, %edi # imm = 0x100
8666; AVX1-NEXT: orl %esi, %edi
8667; AVX1-NEXT: movl %ecx, %esi
8668; AVX1-NEXT: shrl %esi
8669; AVX1-NEXT: andl $128, %esi
8670; AVX1-NEXT: orl %edi, %esi
8671; AVX1-NEXT: movl %ecx, %edi
8672; AVX1-NEXT: shrl $3, %edi
8673; AVX1-NEXT: andl $64, %edi
8674; AVX1-NEXT: orl %esi, %edi
8675; AVX1-NEXT: movl %ecx, %esi
8676; AVX1-NEXT: shrl $5, %esi
8677; AVX1-NEXT: andl $32, %esi
8678; AVX1-NEXT: orl %edi, %esi
8679; AVX1-NEXT: movl %ecx, %edi
8680; AVX1-NEXT: shrl $7, %edi
8681; AVX1-NEXT: andl $16, %edi
8682; AVX1-NEXT: orl %esi, %edi
8683; AVX1-NEXT: movl %ecx, %esi
8684; AVX1-NEXT: shrl $9, %esi
8685; AVX1-NEXT: andl $8, %esi
8686; AVX1-NEXT: orl %edi, %esi
8687; AVX1-NEXT: movl %ecx, %edi
8688; AVX1-NEXT: shrl $11, %edi
8689; AVX1-NEXT: andl $4, %edi
8690; AVX1-NEXT: orl %esi, %edi
8691; AVX1-NEXT: movl %ecx, %esi
8692; AVX1-NEXT: shrl $13, %esi
8693; AVX1-NEXT: andl $2, %esi
8694; AVX1-NEXT: orl %edi, %esi
8695; AVX1-NEXT: andl $32768, %ecx # imm = 0x8000
8696; AVX1-NEXT: shrl $15, %ecx
8697; AVX1-NEXT: orl %esi, %ecx
8698; AVX1-NEXT: orl %edx, %ecx
8699; AVX1-NEXT: vmovd %ecx, %xmm2
8700; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
8701; AVX1-NEXT: vpextrw $2, %xmm1, %eax
8702; AVX1-NEXT: movl %eax, %ecx
8703; AVX1-NEXT: shll $15, %ecx
8704; AVX1-NEXT: movl %eax, %edx
8705; AVX1-NEXT: andl $2, %edx
8706; AVX1-NEXT: shll $13, %edx
8707; AVX1-NEXT: orl %ecx, %edx
8708; AVX1-NEXT: movl %eax, %ecx
8709; AVX1-NEXT: andl $4, %ecx
8710; AVX1-NEXT: shll $11, %ecx
8711; AVX1-NEXT: orl %edx, %ecx
8712; AVX1-NEXT: movl %eax, %edx
8713; AVX1-NEXT: andl $8, %edx
8714; AVX1-NEXT: shll $9, %edx
8715; AVX1-NEXT: orl %ecx, %edx
8716; AVX1-NEXT: movl %eax, %esi
8717; AVX1-NEXT: andl $16, %esi
8718; AVX1-NEXT: shll $7, %esi
8719; AVX1-NEXT: orl %edx, %esi
8720; AVX1-NEXT: movl %eax, %ecx
8721; AVX1-NEXT: andl $32, %ecx
8722; AVX1-NEXT: shll $5, %ecx
8723; AVX1-NEXT: orl %esi, %ecx
8724; AVX1-NEXT: movl %eax, %edx
8725; AVX1-NEXT: andl $64, %edx
8726; AVX1-NEXT: shll $3, %edx
8727; AVX1-NEXT: leal (%rax,%rax), %esi
8728; AVX1-NEXT: andl $256, %esi # imm = 0x100
8729; AVX1-NEXT: orl %edx, %esi
8730; AVX1-NEXT: movl %eax, %edx
8731; AVX1-NEXT: shrl %edx
8732; AVX1-NEXT: andl $128, %edx
8733; AVX1-NEXT: orl %esi, %edx
8734; AVX1-NEXT: movl %eax, %esi
8735; AVX1-NEXT: shrl $3, %esi
8736; AVX1-NEXT: andl $64, %esi
8737; AVX1-NEXT: orl %edx, %esi
8738; AVX1-NEXT: movl %eax, %edx
8739; AVX1-NEXT: shrl $5, %edx
8740; AVX1-NEXT: andl $32, %edx
8741; AVX1-NEXT: orl %esi, %edx
8742; AVX1-NEXT: movl %eax, %esi
8743; AVX1-NEXT: shrl $7, %esi
8744; AVX1-NEXT: andl $16, %esi
8745; AVX1-NEXT: orl %edx, %esi
8746; AVX1-NEXT: movl %eax, %edx
8747; AVX1-NEXT: shrl $9, %edx
8748; AVX1-NEXT: andl $8, %edx
8749; AVX1-NEXT: orl %esi, %edx
8750; AVX1-NEXT: movl %eax, %esi
8751; AVX1-NEXT: shrl $11, %esi
8752; AVX1-NEXT: andl $4, %esi
8753; AVX1-NEXT: orl %edx, %esi
8754; AVX1-NEXT: movl %eax, %edx
8755; AVX1-NEXT: shrl $13, %edx
8756; AVX1-NEXT: andl $2, %edx
8757; AVX1-NEXT: orl %esi, %edx
8758; AVX1-NEXT: shrl $15, %eax
8759; AVX1-NEXT: orl %edx, %eax
8760; AVX1-NEXT: orl %ecx, %eax
8761; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
8762; AVX1-NEXT: vpextrw $3, %xmm1, %eax
8763; AVX1-NEXT: movl %eax, %ecx
8764; AVX1-NEXT: shll $15, %ecx
8765; AVX1-NEXT: movl %eax, %edx
8766; AVX1-NEXT: andl $2, %edx
8767; AVX1-NEXT: shll $13, %edx
8768; AVX1-NEXT: orl %ecx, %edx
8769; AVX1-NEXT: movl %eax, %ecx
8770; AVX1-NEXT: andl $4, %ecx
8771; AVX1-NEXT: shll $11, %ecx
8772; AVX1-NEXT: orl %edx, %ecx
8773; AVX1-NEXT: movl %eax, %edx
8774; AVX1-NEXT: andl $8, %edx
8775; AVX1-NEXT: shll $9, %edx
8776; AVX1-NEXT: orl %ecx, %edx
8777; AVX1-NEXT: movl %eax, %esi
8778; AVX1-NEXT: andl $16, %esi
8779; AVX1-NEXT: shll $7, %esi
8780; AVX1-NEXT: orl %edx, %esi
8781; AVX1-NEXT: movl %eax, %ecx
8782; AVX1-NEXT: andl $32, %ecx
8783; AVX1-NEXT: shll $5, %ecx
8784; AVX1-NEXT: orl %esi, %ecx
8785; AVX1-NEXT: movl %eax, %edx
8786; AVX1-NEXT: andl $64, %edx
8787; AVX1-NEXT: shll $3, %edx
8788; AVX1-NEXT: leal (%rax,%rax), %esi
8789; AVX1-NEXT: andl $256, %esi # imm = 0x100
8790; AVX1-NEXT: orl %edx, %esi
8791; AVX1-NEXT: movl %eax, %edx
8792; AVX1-NEXT: shrl %edx
8793; AVX1-NEXT: andl $128, %edx
8794; AVX1-NEXT: orl %esi, %edx
8795; AVX1-NEXT: movl %eax, %esi
8796; AVX1-NEXT: shrl $3, %esi
8797; AVX1-NEXT: andl $64, %esi
8798; AVX1-NEXT: orl %edx, %esi
8799; AVX1-NEXT: movl %eax, %edx
8800; AVX1-NEXT: shrl $5, %edx
8801; AVX1-NEXT: andl $32, %edx
8802; AVX1-NEXT: orl %esi, %edx
8803; AVX1-NEXT: movl %eax, %esi
8804; AVX1-NEXT: shrl $7, %esi
8805; AVX1-NEXT: andl $16, %esi
8806; AVX1-NEXT: orl %edx, %esi
8807; AVX1-NEXT: movl %eax, %edx
8808; AVX1-NEXT: shrl $9, %edx
8809; AVX1-NEXT: andl $8, %edx
8810; AVX1-NEXT: orl %esi, %edx
8811; AVX1-NEXT: movl %eax, %esi
8812; AVX1-NEXT: shrl $11, %esi
8813; AVX1-NEXT: andl $4, %esi
8814; AVX1-NEXT: orl %edx, %esi
8815; AVX1-NEXT: movl %eax, %edx
8816; AVX1-NEXT: shrl $13, %edx
8817; AVX1-NEXT: andl $2, %edx
8818; AVX1-NEXT: orl %esi, %edx
8819; AVX1-NEXT: shrl $15, %eax
8820; AVX1-NEXT: orl %edx, %eax
8821; AVX1-NEXT: orl %ecx, %eax
8822; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
8823; AVX1-NEXT: vpextrw $4, %xmm1, %eax
8824; AVX1-NEXT: movl %eax, %ecx
8825; AVX1-NEXT: shll $15, %ecx
8826; AVX1-NEXT: movl %eax, %edx
8827; AVX1-NEXT: andl $2, %edx
8828; AVX1-NEXT: shll $13, %edx
8829; AVX1-NEXT: orl %ecx, %edx
8830; AVX1-NEXT: movl %eax, %ecx
8831; AVX1-NEXT: andl $4, %ecx
8832; AVX1-NEXT: shll $11, %ecx
8833; AVX1-NEXT: orl %edx, %ecx
8834; AVX1-NEXT: movl %eax, %edx
8835; AVX1-NEXT: andl $8, %edx
8836; AVX1-NEXT: shll $9, %edx
8837; AVX1-NEXT: orl %ecx, %edx
8838; AVX1-NEXT: movl %eax, %esi
8839; AVX1-NEXT: andl $16, %esi
8840; AVX1-NEXT: shll $7, %esi
8841; AVX1-NEXT: orl %edx, %esi
8842; AVX1-NEXT: movl %eax, %ecx
8843; AVX1-NEXT: andl $32, %ecx
8844; AVX1-NEXT: shll $5, %ecx
8845; AVX1-NEXT: orl %esi, %ecx
8846; AVX1-NEXT: movl %eax, %edx
8847; AVX1-NEXT: andl $64, %edx
8848; AVX1-NEXT: shll $3, %edx
8849; AVX1-NEXT: leal (%rax,%rax), %esi
8850; AVX1-NEXT: andl $256, %esi # imm = 0x100
8851; AVX1-NEXT: orl %edx, %esi
8852; AVX1-NEXT: movl %eax, %edx
8853; AVX1-NEXT: shrl %edx
8854; AVX1-NEXT: andl $128, %edx
8855; AVX1-NEXT: orl %esi, %edx
8856; AVX1-NEXT: movl %eax, %esi
8857; AVX1-NEXT: shrl $3, %esi
8858; AVX1-NEXT: andl $64, %esi
8859; AVX1-NEXT: orl %edx, %esi
8860; AVX1-NEXT: movl %eax, %edx
8861; AVX1-NEXT: shrl $5, %edx
8862; AVX1-NEXT: andl $32, %edx
8863; AVX1-NEXT: orl %esi, %edx
8864; AVX1-NEXT: movl %eax, %esi
8865; AVX1-NEXT: shrl $7, %esi
8866; AVX1-NEXT: andl $16, %esi
8867; AVX1-NEXT: orl %edx, %esi
8868; AVX1-NEXT: movl %eax, %edx
8869; AVX1-NEXT: shrl $9, %edx
8870; AVX1-NEXT: andl $8, %edx
8871; AVX1-NEXT: orl %esi, %edx
8872; AVX1-NEXT: movl %eax, %esi
8873; AVX1-NEXT: shrl $11, %esi
8874; AVX1-NEXT: andl $4, %esi
8875; AVX1-NEXT: orl %edx, %esi
8876; AVX1-NEXT: movl %eax, %edx
8877; AVX1-NEXT: shrl $13, %edx
8878; AVX1-NEXT: andl $2, %edx
8879; AVX1-NEXT: orl %esi, %edx
8880; AVX1-NEXT: shrl $15, %eax
8881; AVX1-NEXT: orl %edx, %eax
8882; AVX1-NEXT: orl %ecx, %eax
8883; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
8884; AVX1-NEXT: vpextrw $5, %xmm1, %eax
8885; AVX1-NEXT: movl %eax, %ecx
8886; AVX1-NEXT: shll $15, %ecx
8887; AVX1-NEXT: movl %eax, %edx
8888; AVX1-NEXT: andl $2, %edx
8889; AVX1-NEXT: shll $13, %edx
8890; AVX1-NEXT: orl %ecx, %edx
8891; AVX1-NEXT: movl %eax, %ecx
8892; AVX1-NEXT: andl $4, %ecx
8893; AVX1-NEXT: shll $11, %ecx
8894; AVX1-NEXT: orl %edx, %ecx
8895; AVX1-NEXT: movl %eax, %edx
8896; AVX1-NEXT: andl $8, %edx
8897; AVX1-NEXT: shll $9, %edx
8898; AVX1-NEXT: orl %ecx, %edx
8899; AVX1-NEXT: movl %eax, %esi
8900; AVX1-NEXT: andl $16, %esi
8901; AVX1-NEXT: shll $7, %esi
8902; AVX1-NEXT: orl %edx, %esi
8903; AVX1-NEXT: movl %eax, %ecx
8904; AVX1-NEXT: andl $32, %ecx
8905; AVX1-NEXT: shll $5, %ecx
8906; AVX1-NEXT: orl %esi, %ecx
8907; AVX1-NEXT: movl %eax, %edx
8908; AVX1-NEXT: andl $64, %edx
8909; AVX1-NEXT: shll $3, %edx
8910; AVX1-NEXT: leal (%rax,%rax), %esi
8911; AVX1-NEXT: andl $256, %esi # imm = 0x100
8912; AVX1-NEXT: orl %edx, %esi
8913; AVX1-NEXT: movl %eax, %edx
8914; AVX1-NEXT: shrl %edx
8915; AVX1-NEXT: andl $128, %edx
8916; AVX1-NEXT: orl %esi, %edx
8917; AVX1-NEXT: movl %eax, %esi
8918; AVX1-NEXT: shrl $3, %esi
8919; AVX1-NEXT: andl $64, %esi
8920; AVX1-NEXT: orl %edx, %esi
8921; AVX1-NEXT: movl %eax, %edx
8922; AVX1-NEXT: shrl $5, %edx
8923; AVX1-NEXT: andl $32, %edx
8924; AVX1-NEXT: orl %esi, %edx
8925; AVX1-NEXT: movl %eax, %esi
8926; AVX1-NEXT: shrl $7, %esi
8927; AVX1-NEXT: andl $16, %esi
8928; AVX1-NEXT: orl %edx, %esi
8929; AVX1-NEXT: movl %eax, %edx
8930; AVX1-NEXT: shrl $9, %edx
8931; AVX1-NEXT: andl $8, %edx
8932; AVX1-NEXT: orl %esi, %edx
8933; AVX1-NEXT: movl %eax, %esi
8934; AVX1-NEXT: shrl $11, %esi
8935; AVX1-NEXT: andl $4, %esi
8936; AVX1-NEXT: orl %edx, %esi
8937; AVX1-NEXT: movl %eax, %edx
8938; AVX1-NEXT: shrl $13, %edx
8939; AVX1-NEXT: andl $2, %edx
8940; AVX1-NEXT: orl %esi, %edx
8941; AVX1-NEXT: shrl $15, %eax
8942; AVX1-NEXT: orl %edx, %eax
8943; AVX1-NEXT: orl %ecx, %eax
8944; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
8945; AVX1-NEXT: vpextrw $6, %xmm1, %eax
8946; AVX1-NEXT: movl %eax, %ecx
8947; AVX1-NEXT: shll $15, %ecx
8948; AVX1-NEXT: movl %eax, %edx
8949; AVX1-NEXT: andl $2, %edx
8950; AVX1-NEXT: shll $13, %edx
8951; AVX1-NEXT: orl %ecx, %edx
8952; AVX1-NEXT: movl %eax, %ecx
8953; AVX1-NEXT: andl $4, %ecx
8954; AVX1-NEXT: shll $11, %ecx
8955; AVX1-NEXT: orl %edx, %ecx
8956; AVX1-NEXT: movl %eax, %edx
8957; AVX1-NEXT: andl $8, %edx
8958; AVX1-NEXT: shll $9, %edx
8959; AVX1-NEXT: orl %ecx, %edx
8960; AVX1-NEXT: movl %eax, %esi
8961; AVX1-NEXT: andl $16, %esi
8962; AVX1-NEXT: shll $7, %esi
8963; AVX1-NEXT: orl %edx, %esi
8964; AVX1-NEXT: movl %eax, %ecx
8965; AVX1-NEXT: andl $32, %ecx
8966; AVX1-NEXT: shll $5, %ecx
8967; AVX1-NEXT: orl %esi, %ecx
8968; AVX1-NEXT: movl %eax, %edx
8969; AVX1-NEXT: andl $64, %edx
8970; AVX1-NEXT: shll $3, %edx
8971; AVX1-NEXT: leal (%rax,%rax), %esi
8972; AVX1-NEXT: andl $256, %esi # imm = 0x100
8973; AVX1-NEXT: orl %edx, %esi
8974; AVX1-NEXT: movl %eax, %edx
8975; AVX1-NEXT: shrl %edx
8976; AVX1-NEXT: andl $128, %edx
8977; AVX1-NEXT: orl %esi, %edx
8978; AVX1-NEXT: movl %eax, %esi
8979; AVX1-NEXT: shrl $3, %esi
8980; AVX1-NEXT: andl $64, %esi
8981; AVX1-NEXT: orl %edx, %esi
8982; AVX1-NEXT: movl %eax, %edx
8983; AVX1-NEXT: shrl $5, %edx
8984; AVX1-NEXT: andl $32, %edx
8985; AVX1-NEXT: orl %esi, %edx
8986; AVX1-NEXT: movl %eax, %esi
8987; AVX1-NEXT: shrl $7, %esi
8988; AVX1-NEXT: andl $16, %esi
8989; AVX1-NEXT: orl %edx, %esi
8990; AVX1-NEXT: movl %eax, %edx
8991; AVX1-NEXT: shrl $9, %edx
8992; AVX1-NEXT: andl $8, %edx
8993; AVX1-NEXT: orl %esi, %edx
8994; AVX1-NEXT: movl %eax, %esi
8995; AVX1-NEXT: shrl $11, %esi
8996; AVX1-NEXT: andl $4, %esi
8997; AVX1-NEXT: orl %edx, %esi
8998; AVX1-NEXT: movl %eax, %edx
8999; AVX1-NEXT: shrl $13, %edx
9000; AVX1-NEXT: andl $2, %edx
9001; AVX1-NEXT: orl %esi, %edx
9002; AVX1-NEXT: shrl $15, %eax
9003; AVX1-NEXT: orl %edx, %eax
9004; AVX1-NEXT: orl %ecx, %eax
9005; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
9006; AVX1-NEXT: vpextrw $7, %xmm1, %eax
9007; AVX1-NEXT: movl %eax, %ecx
9008; AVX1-NEXT: shll $15, %ecx
9009; AVX1-NEXT: movl %eax, %edx
9010; AVX1-NEXT: andl $2, %edx
9011; AVX1-NEXT: shll $13, %edx
9012; AVX1-NEXT: orl %ecx, %edx
9013; AVX1-NEXT: movl %eax, %ecx
9014; AVX1-NEXT: andl $4, %ecx
9015; AVX1-NEXT: shll $11, %ecx
9016; AVX1-NEXT: orl %edx, %ecx
9017; AVX1-NEXT: movl %eax, %edx
9018; AVX1-NEXT: andl $8, %edx
9019; AVX1-NEXT: shll $9, %edx
9020; AVX1-NEXT: orl %ecx, %edx
9021; AVX1-NEXT: movl %eax, %esi
9022; AVX1-NEXT: andl $16, %esi
9023; AVX1-NEXT: shll $7, %esi
9024; AVX1-NEXT: orl %edx, %esi
9025; AVX1-NEXT: movl %eax, %ecx
9026; AVX1-NEXT: andl $32, %ecx
9027; AVX1-NEXT: shll $5, %ecx
9028; AVX1-NEXT: orl %esi, %ecx
9029; AVX1-NEXT: movl %eax, %edx
9030; AVX1-NEXT: andl $64, %edx
9031; AVX1-NEXT: shll $3, %edx
9032; AVX1-NEXT: leal (%rax,%rax), %esi
9033; AVX1-NEXT: andl $256, %esi # imm = 0x100
9034; AVX1-NEXT: orl %edx, %esi
9035; AVX1-NEXT: movl %eax, %edx
9036; AVX1-NEXT: shrl %edx
9037; AVX1-NEXT: andl $128, %edx
9038; AVX1-NEXT: orl %esi, %edx
9039; AVX1-NEXT: movl %eax, %esi
9040; AVX1-NEXT: shrl $3, %esi
9041; AVX1-NEXT: andl $64, %esi
9042; AVX1-NEXT: orl %edx, %esi
9043; AVX1-NEXT: movl %eax, %edx
9044; AVX1-NEXT: shrl $5, %edx
9045; AVX1-NEXT: andl $32, %edx
9046; AVX1-NEXT: orl %esi, %edx
9047; AVX1-NEXT: movl %eax, %esi
9048; AVX1-NEXT: shrl $7, %esi
9049; AVX1-NEXT: andl $16, %esi
9050; AVX1-NEXT: orl %edx, %esi
9051; AVX1-NEXT: movl %eax, %edx
9052; AVX1-NEXT: shrl $9, %edx
9053; AVX1-NEXT: andl $8, %edx
9054; AVX1-NEXT: orl %esi, %edx
9055; AVX1-NEXT: movl %eax, %esi
9056; AVX1-NEXT: shrl $11, %esi
9057; AVX1-NEXT: andl $4, %esi
9058; AVX1-NEXT: orl %edx, %esi
9059; AVX1-NEXT: movl %eax, %edx
9060; AVX1-NEXT: shrl $13, %edx
9061; AVX1-NEXT: andl $2, %edx
9062; AVX1-NEXT: orl %esi, %edx
9063; AVX1-NEXT: shrl $15, %eax
9064; AVX1-NEXT: orl %edx, %eax
9065; AVX1-NEXT: orl %ecx, %eax
9066; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm1
9067; AVX1-NEXT: vpextrw $1, %xmm0, %eax
9068; AVX1-NEXT: movl %eax, %ecx
9069; AVX1-NEXT: shll $15, %ecx
9070; AVX1-NEXT: movl %eax, %edx
9071; AVX1-NEXT: andl $2, %edx
9072; AVX1-NEXT: shll $13, %edx
9073; AVX1-NEXT: orl %ecx, %edx
9074; AVX1-NEXT: movl %eax, %ecx
9075; AVX1-NEXT: andl $4, %ecx
9076; AVX1-NEXT: shll $11, %ecx
9077; AVX1-NEXT: orl %edx, %ecx
9078; AVX1-NEXT: movl %eax, %edx
9079; AVX1-NEXT: andl $8, %edx
9080; AVX1-NEXT: shll $9, %edx
9081; AVX1-NEXT: orl %ecx, %edx
9082; AVX1-NEXT: movl %eax, %esi
9083; AVX1-NEXT: andl $16, %esi
9084; AVX1-NEXT: shll $7, %esi
9085; AVX1-NEXT: orl %edx, %esi
9086; AVX1-NEXT: movl %eax, %ecx
9087; AVX1-NEXT: andl $32, %ecx
9088; AVX1-NEXT: shll $5, %ecx
9089; AVX1-NEXT: orl %esi, %ecx
9090; AVX1-NEXT: movl %eax, %edx
9091; AVX1-NEXT: andl $64, %edx
9092; AVX1-NEXT: shll $3, %edx
9093; AVX1-NEXT: leal (%rax,%rax), %esi
9094; AVX1-NEXT: andl $256, %esi # imm = 0x100
9095; AVX1-NEXT: orl %edx, %esi
9096; AVX1-NEXT: movl %eax, %edx
9097; AVX1-NEXT: shrl %edx
9098; AVX1-NEXT: andl $128, %edx
9099; AVX1-NEXT: orl %esi, %edx
9100; AVX1-NEXT: movl %eax, %esi
9101; AVX1-NEXT: shrl $3, %esi
9102; AVX1-NEXT: andl $64, %esi
9103; AVX1-NEXT: orl %edx, %esi
9104; AVX1-NEXT: movl %eax, %edx
9105; AVX1-NEXT: shrl $5, %edx
9106; AVX1-NEXT: andl $32, %edx
9107; AVX1-NEXT: orl %esi, %edx
9108; AVX1-NEXT: movl %eax, %esi
9109; AVX1-NEXT: shrl $7, %esi
9110; AVX1-NEXT: andl $16, %esi
9111; AVX1-NEXT: orl %edx, %esi
9112; AVX1-NEXT: movl %eax, %edx
9113; AVX1-NEXT: shrl $9, %edx
9114; AVX1-NEXT: andl $8, %edx
9115; AVX1-NEXT: orl %esi, %edx
9116; AVX1-NEXT: movl %eax, %esi
9117; AVX1-NEXT: shrl $11, %esi
9118; AVX1-NEXT: andl $4, %esi
9119; AVX1-NEXT: orl %edx, %esi
9120; AVX1-NEXT: movl %eax, %edx
9121; AVX1-NEXT: shrl $13, %edx
9122; AVX1-NEXT: andl $2, %edx
9123; AVX1-NEXT: orl %esi, %edx
9124; AVX1-NEXT: shrl $15, %eax
9125; AVX1-NEXT: orl %edx, %eax
9126; AVX1-NEXT: orl %ecx, %eax
9127; AVX1-NEXT: vmovd %xmm0, %ecx
9128; AVX1-NEXT: movl %ecx, %edx
9129; AVX1-NEXT: shll $15, %edx
9130; AVX1-NEXT: movl %ecx, %esi
9131; AVX1-NEXT: andl $2, %esi
9132; AVX1-NEXT: shll $13, %esi
9133; AVX1-NEXT: orl %edx, %esi
9134; AVX1-NEXT: movl %ecx, %edx
9135; AVX1-NEXT: andl $4, %edx
9136; AVX1-NEXT: shll $11, %edx
9137; AVX1-NEXT: orl %esi, %edx
9138; AVX1-NEXT: movl %ecx, %esi
9139; AVX1-NEXT: andl $8, %esi
9140; AVX1-NEXT: shll $9, %esi
9141; AVX1-NEXT: orl %edx, %esi
9142; AVX1-NEXT: movl %ecx, %edi
9143; AVX1-NEXT: andl $16, %edi
9144; AVX1-NEXT: shll $7, %edi
9145; AVX1-NEXT: orl %esi, %edi
9146; AVX1-NEXT: movl %ecx, %edx
9147; AVX1-NEXT: andl $32, %edx
9148; AVX1-NEXT: shll $5, %edx
9149; AVX1-NEXT: orl %edi, %edx
9150; AVX1-NEXT: movl %ecx, %esi
9151; AVX1-NEXT: andl $64, %esi
9152; AVX1-NEXT: shll $3, %esi
9153; AVX1-NEXT: leal (%rcx,%rcx), %edi
9154; AVX1-NEXT: andl $256, %edi # imm = 0x100
9155; AVX1-NEXT: orl %esi, %edi
9156; AVX1-NEXT: movl %ecx, %esi
9157; AVX1-NEXT: shrl %esi
9158; AVX1-NEXT: andl $128, %esi
9159; AVX1-NEXT: orl %edi, %esi
9160; AVX1-NEXT: movl %ecx, %edi
9161; AVX1-NEXT: shrl $3, %edi
9162; AVX1-NEXT: andl $64, %edi
9163; AVX1-NEXT: orl %esi, %edi
9164; AVX1-NEXT: movl %ecx, %esi
9165; AVX1-NEXT: shrl $5, %esi
9166; AVX1-NEXT: andl $32, %esi
9167; AVX1-NEXT: orl %edi, %esi
9168; AVX1-NEXT: movl %ecx, %edi
9169; AVX1-NEXT: shrl $7, %edi
9170; AVX1-NEXT: andl $16, %edi
9171; AVX1-NEXT: orl %esi, %edi
9172; AVX1-NEXT: movl %ecx, %esi
9173; AVX1-NEXT: shrl $9, %esi
9174; AVX1-NEXT: andl $8, %esi
9175; AVX1-NEXT: orl %edi, %esi
9176; AVX1-NEXT: movl %ecx, %edi
9177; AVX1-NEXT: shrl $11, %edi
9178; AVX1-NEXT: andl $4, %edi
9179; AVX1-NEXT: orl %esi, %edi
9180; AVX1-NEXT: movl %ecx, %esi
9181; AVX1-NEXT: shrl $13, %esi
9182; AVX1-NEXT: andl $2, %esi
9183; AVX1-NEXT: orl %edi, %esi
9184; AVX1-NEXT: andl $32768, %ecx # imm = 0x8000
9185; AVX1-NEXT: shrl $15, %ecx
9186; AVX1-NEXT: orl %esi, %ecx
9187; AVX1-NEXT: orl %edx, %ecx
9188; AVX1-NEXT: vmovd %ecx, %xmm2
9189; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
9190; AVX1-NEXT: vpextrw $2, %xmm0, %eax
9191; AVX1-NEXT: movl %eax, %ecx
9192; AVX1-NEXT: shll $15, %ecx
9193; AVX1-NEXT: movl %eax, %edx
9194; AVX1-NEXT: andl $2, %edx
9195; AVX1-NEXT: shll $13, %edx
9196; AVX1-NEXT: orl %ecx, %edx
9197; AVX1-NEXT: movl %eax, %ecx
9198; AVX1-NEXT: andl $4, %ecx
9199; AVX1-NEXT: shll $11, %ecx
9200; AVX1-NEXT: orl %edx, %ecx
9201; AVX1-NEXT: movl %eax, %edx
9202; AVX1-NEXT: andl $8, %edx
9203; AVX1-NEXT: shll $9, %edx
9204; AVX1-NEXT: orl %ecx, %edx
9205; AVX1-NEXT: movl %eax, %esi
9206; AVX1-NEXT: andl $16, %esi
9207; AVX1-NEXT: shll $7, %esi
9208; AVX1-NEXT: orl %edx, %esi
9209; AVX1-NEXT: movl %eax, %ecx
9210; AVX1-NEXT: andl $32, %ecx
9211; AVX1-NEXT: shll $5, %ecx
9212; AVX1-NEXT: orl %esi, %ecx
9213; AVX1-NEXT: movl %eax, %edx
9214; AVX1-NEXT: andl $64, %edx
9215; AVX1-NEXT: shll $3, %edx
9216; AVX1-NEXT: leal (%rax,%rax), %esi
9217; AVX1-NEXT: andl $256, %esi # imm = 0x100
9218; AVX1-NEXT: orl %edx, %esi
9219; AVX1-NEXT: movl %eax, %edx
9220; AVX1-NEXT: shrl %edx
9221; AVX1-NEXT: andl $128, %edx
9222; AVX1-NEXT: orl %esi, %edx
9223; AVX1-NEXT: movl %eax, %esi
9224; AVX1-NEXT: shrl $3, %esi
9225; AVX1-NEXT: andl $64, %esi
9226; AVX1-NEXT: orl %edx, %esi
9227; AVX1-NEXT: movl %eax, %edx
9228; AVX1-NEXT: shrl $5, %edx
9229; AVX1-NEXT: andl $32, %edx
9230; AVX1-NEXT: orl %esi, %edx
9231; AVX1-NEXT: movl %eax, %esi
9232; AVX1-NEXT: shrl $7, %esi
9233; AVX1-NEXT: andl $16, %esi
9234; AVX1-NEXT: orl %edx, %esi
9235; AVX1-NEXT: movl %eax, %edx
9236; AVX1-NEXT: shrl $9, %edx
9237; AVX1-NEXT: andl $8, %edx
9238; AVX1-NEXT: orl %esi, %edx
9239; AVX1-NEXT: movl %eax, %esi
9240; AVX1-NEXT: shrl $11, %esi
9241; AVX1-NEXT: andl $4, %esi
9242; AVX1-NEXT: orl %edx, %esi
9243; AVX1-NEXT: movl %eax, %edx
9244; AVX1-NEXT: shrl $13, %edx
9245; AVX1-NEXT: andl $2, %edx
9246; AVX1-NEXT: orl %esi, %edx
9247; AVX1-NEXT: shrl $15, %eax
9248; AVX1-NEXT: orl %edx, %eax
9249; AVX1-NEXT: orl %ecx, %eax
9250; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
9251; AVX1-NEXT: vpextrw $3, %xmm0, %eax
9252; AVX1-NEXT: movl %eax, %ecx
9253; AVX1-NEXT: shll $15, %ecx
9254; AVX1-NEXT: movl %eax, %edx
9255; AVX1-NEXT: andl $2, %edx
9256; AVX1-NEXT: shll $13, %edx
9257; AVX1-NEXT: orl %ecx, %edx
9258; AVX1-NEXT: movl %eax, %ecx
9259; AVX1-NEXT: andl $4, %ecx
9260; AVX1-NEXT: shll $11, %ecx
9261; AVX1-NEXT: orl %edx, %ecx
9262; AVX1-NEXT: movl %eax, %edx
9263; AVX1-NEXT: andl $8, %edx
9264; AVX1-NEXT: shll $9, %edx
9265; AVX1-NEXT: orl %ecx, %edx
9266; AVX1-NEXT: movl %eax, %esi
9267; AVX1-NEXT: andl $16, %esi
9268; AVX1-NEXT: shll $7, %esi
9269; AVX1-NEXT: orl %edx, %esi
9270; AVX1-NEXT: movl %eax, %ecx
9271; AVX1-NEXT: andl $32, %ecx
9272; AVX1-NEXT: shll $5, %ecx
9273; AVX1-NEXT: orl %esi, %ecx
9274; AVX1-NEXT: movl %eax, %edx
9275; AVX1-NEXT: andl $64, %edx
9276; AVX1-NEXT: shll $3, %edx
9277; AVX1-NEXT: leal (%rax,%rax), %esi
9278; AVX1-NEXT: andl $256, %esi # imm = 0x100
9279; AVX1-NEXT: orl %edx, %esi
9280; AVX1-NEXT: movl %eax, %edx
9281; AVX1-NEXT: shrl %edx
9282; AVX1-NEXT: andl $128, %edx
9283; AVX1-NEXT: orl %esi, %edx
9284; AVX1-NEXT: movl %eax, %esi
9285; AVX1-NEXT: shrl $3, %esi
9286; AVX1-NEXT: andl $64, %esi
9287; AVX1-NEXT: orl %edx, %esi
9288; AVX1-NEXT: movl %eax, %edx
9289; AVX1-NEXT: shrl $5, %edx
9290; AVX1-NEXT: andl $32, %edx
9291; AVX1-NEXT: orl %esi, %edx
9292; AVX1-NEXT: movl %eax, %esi
9293; AVX1-NEXT: shrl $7, %esi
9294; AVX1-NEXT: andl $16, %esi
9295; AVX1-NEXT: orl %edx, %esi
9296; AVX1-NEXT: movl %eax, %edx
9297; AVX1-NEXT: shrl $9, %edx
9298; AVX1-NEXT: andl $8, %edx
9299; AVX1-NEXT: orl %esi, %edx
9300; AVX1-NEXT: movl %eax, %esi
9301; AVX1-NEXT: shrl $11, %esi
9302; AVX1-NEXT: andl $4, %esi
9303; AVX1-NEXT: orl %edx, %esi
9304; AVX1-NEXT: movl %eax, %edx
9305; AVX1-NEXT: shrl $13, %edx
9306; AVX1-NEXT: andl $2, %edx
9307; AVX1-NEXT: orl %esi, %edx
9308; AVX1-NEXT: shrl $15, %eax
9309; AVX1-NEXT: orl %edx, %eax
9310; AVX1-NEXT: orl %ecx, %eax
9311; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
9312; AVX1-NEXT: vpextrw $4, %xmm0, %eax
9313; AVX1-NEXT: movl %eax, %ecx
9314; AVX1-NEXT: shll $15, %ecx
9315; AVX1-NEXT: movl %eax, %edx
9316; AVX1-NEXT: andl $2, %edx
9317; AVX1-NEXT: shll $13, %edx
9318; AVX1-NEXT: orl %ecx, %edx
9319; AVX1-NEXT: movl %eax, %ecx
9320; AVX1-NEXT: andl $4, %ecx
9321; AVX1-NEXT: shll $11, %ecx
9322; AVX1-NEXT: orl %edx, %ecx
9323; AVX1-NEXT: movl %eax, %edx
9324; AVX1-NEXT: andl $8, %edx
9325; AVX1-NEXT: shll $9, %edx
9326; AVX1-NEXT: orl %ecx, %edx
9327; AVX1-NEXT: movl %eax, %esi
9328; AVX1-NEXT: andl $16, %esi
9329; AVX1-NEXT: shll $7, %esi
9330; AVX1-NEXT: orl %edx, %esi
9331; AVX1-NEXT: movl %eax, %ecx
9332; AVX1-NEXT: andl $32, %ecx
9333; AVX1-NEXT: shll $5, %ecx
9334; AVX1-NEXT: orl %esi, %ecx
9335; AVX1-NEXT: movl %eax, %edx
9336; AVX1-NEXT: andl $64, %edx
9337; AVX1-NEXT: shll $3, %edx
9338; AVX1-NEXT: leal (%rax,%rax), %esi
9339; AVX1-NEXT: andl $256, %esi # imm = 0x100
9340; AVX1-NEXT: orl %edx, %esi
9341; AVX1-NEXT: movl %eax, %edx
9342; AVX1-NEXT: shrl %edx
9343; AVX1-NEXT: andl $128, %edx
9344; AVX1-NEXT: orl %esi, %edx
9345; AVX1-NEXT: movl %eax, %esi
9346; AVX1-NEXT: shrl $3, %esi
9347; AVX1-NEXT: andl $64, %esi
9348; AVX1-NEXT: orl %edx, %esi
9349; AVX1-NEXT: movl %eax, %edx
9350; AVX1-NEXT: shrl $5, %edx
9351; AVX1-NEXT: andl $32, %edx
9352; AVX1-NEXT: orl %esi, %edx
9353; AVX1-NEXT: movl %eax, %esi
9354; AVX1-NEXT: shrl $7, %esi
9355; AVX1-NEXT: andl $16, %esi
9356; AVX1-NEXT: orl %edx, %esi
9357; AVX1-NEXT: movl %eax, %edx
9358; AVX1-NEXT: shrl $9, %edx
9359; AVX1-NEXT: andl $8, %edx
9360; AVX1-NEXT: orl %esi, %edx
9361; AVX1-NEXT: movl %eax, %esi
9362; AVX1-NEXT: shrl $11, %esi
9363; AVX1-NEXT: andl $4, %esi
9364; AVX1-NEXT: orl %edx, %esi
9365; AVX1-NEXT: movl %eax, %edx
9366; AVX1-NEXT: shrl $13, %edx
9367; AVX1-NEXT: andl $2, %edx
9368; AVX1-NEXT: orl %esi, %edx
9369; AVX1-NEXT: shrl $15, %eax
9370; AVX1-NEXT: orl %edx, %eax
9371; AVX1-NEXT: orl %ecx, %eax
9372; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
9373; AVX1-NEXT: vpextrw $5, %xmm0, %eax
9374; AVX1-NEXT: movl %eax, %ecx
9375; AVX1-NEXT: shll $15, %ecx
9376; AVX1-NEXT: movl %eax, %edx
9377; AVX1-NEXT: andl $2, %edx
9378; AVX1-NEXT: shll $13, %edx
9379; AVX1-NEXT: orl %ecx, %edx
9380; AVX1-NEXT: movl %eax, %ecx
9381; AVX1-NEXT: andl $4, %ecx
9382; AVX1-NEXT: shll $11, %ecx
9383; AVX1-NEXT: orl %edx, %ecx
9384; AVX1-NEXT: movl %eax, %edx
9385; AVX1-NEXT: andl $8, %edx
9386; AVX1-NEXT: shll $9, %edx
9387; AVX1-NEXT: orl %ecx, %edx
9388; AVX1-NEXT: movl %eax, %esi
9389; AVX1-NEXT: andl $16, %esi
9390; AVX1-NEXT: shll $7, %esi
9391; AVX1-NEXT: orl %edx, %esi
9392; AVX1-NEXT: movl %eax, %ecx
9393; AVX1-NEXT: andl $32, %ecx
9394; AVX1-NEXT: shll $5, %ecx
9395; AVX1-NEXT: orl %esi, %ecx
9396; AVX1-NEXT: movl %eax, %edx
9397; AVX1-NEXT: andl $64, %edx
9398; AVX1-NEXT: shll $3, %edx
9399; AVX1-NEXT: leal (%rax,%rax), %esi
9400; AVX1-NEXT: andl $256, %esi # imm = 0x100
9401; AVX1-NEXT: orl %edx, %esi
9402; AVX1-NEXT: movl %eax, %edx
9403; AVX1-NEXT: shrl %edx
9404; AVX1-NEXT: andl $128, %edx
9405; AVX1-NEXT: orl %esi, %edx
9406; AVX1-NEXT: movl %eax, %esi
9407; AVX1-NEXT: shrl $3, %esi
9408; AVX1-NEXT: andl $64, %esi
9409; AVX1-NEXT: orl %edx, %esi
9410; AVX1-NEXT: movl %eax, %edx
9411; AVX1-NEXT: shrl $5, %edx
9412; AVX1-NEXT: andl $32, %edx
9413; AVX1-NEXT: orl %esi, %edx
9414; AVX1-NEXT: movl %eax, %esi
9415; AVX1-NEXT: shrl $7, %esi
9416; AVX1-NEXT: andl $16, %esi
9417; AVX1-NEXT: orl %edx, %esi
9418; AVX1-NEXT: movl %eax, %edx
9419; AVX1-NEXT: shrl $9, %edx
9420; AVX1-NEXT: andl $8, %edx
9421; AVX1-NEXT: orl %esi, %edx
9422; AVX1-NEXT: movl %eax, %esi
9423; AVX1-NEXT: shrl $11, %esi
9424; AVX1-NEXT: andl $4, %esi
9425; AVX1-NEXT: orl %edx, %esi
9426; AVX1-NEXT: movl %eax, %edx
9427; AVX1-NEXT: shrl $13, %edx
9428; AVX1-NEXT: andl $2, %edx
9429; AVX1-NEXT: orl %esi, %edx
9430; AVX1-NEXT: shrl $15, %eax
9431; AVX1-NEXT: orl %edx, %eax
9432; AVX1-NEXT: orl %ecx, %eax
9433; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
9434; AVX1-NEXT: vpextrw $6, %xmm0, %eax
9435; AVX1-NEXT: movl %eax, %ecx
9436; AVX1-NEXT: shll $15, %ecx
9437; AVX1-NEXT: movl %eax, %edx
9438; AVX1-NEXT: andl $2, %edx
9439; AVX1-NEXT: shll $13, %edx
9440; AVX1-NEXT: orl %ecx, %edx
9441; AVX1-NEXT: movl %eax, %ecx
9442; AVX1-NEXT: andl $4, %ecx
9443; AVX1-NEXT: shll $11, %ecx
9444; AVX1-NEXT: orl %edx, %ecx
9445; AVX1-NEXT: movl %eax, %edx
9446; AVX1-NEXT: andl $8, %edx
9447; AVX1-NEXT: shll $9, %edx
9448; AVX1-NEXT: orl %ecx, %edx
9449; AVX1-NEXT: movl %eax, %esi
9450; AVX1-NEXT: andl $16, %esi
9451; AVX1-NEXT: shll $7, %esi
9452; AVX1-NEXT: orl %edx, %esi
9453; AVX1-NEXT: movl %eax, %ecx
9454; AVX1-NEXT: andl $32, %ecx
9455; AVX1-NEXT: shll $5, %ecx
9456; AVX1-NEXT: orl %esi, %ecx
9457; AVX1-NEXT: movl %eax, %edx
9458; AVX1-NEXT: andl $64, %edx
9459; AVX1-NEXT: shll $3, %edx
9460; AVX1-NEXT: leal (%rax,%rax), %esi
9461; AVX1-NEXT: andl $256, %esi # imm = 0x100
9462; AVX1-NEXT: orl %edx, %esi
9463; AVX1-NEXT: movl %eax, %edx
9464; AVX1-NEXT: shrl %edx
9465; AVX1-NEXT: andl $128, %edx
9466; AVX1-NEXT: orl %esi, %edx
9467; AVX1-NEXT: movl %eax, %esi
9468; AVX1-NEXT: shrl $3, %esi
9469; AVX1-NEXT: andl $64, %esi
9470; AVX1-NEXT: orl %edx, %esi
9471; AVX1-NEXT: movl %eax, %edx
9472; AVX1-NEXT: shrl $5, %edx
9473; AVX1-NEXT: andl $32, %edx
9474; AVX1-NEXT: orl %esi, %edx
9475; AVX1-NEXT: movl %eax, %esi
9476; AVX1-NEXT: shrl $7, %esi
9477; AVX1-NEXT: andl $16, %esi
9478; AVX1-NEXT: orl %edx, %esi
9479; AVX1-NEXT: movl %eax, %edx
9480; AVX1-NEXT: shrl $9, %edx
9481; AVX1-NEXT: andl $8, %edx
9482; AVX1-NEXT: orl %esi, %edx
9483; AVX1-NEXT: movl %eax, %esi
9484; AVX1-NEXT: shrl $11, %esi
9485; AVX1-NEXT: andl $4, %esi
9486; AVX1-NEXT: orl %edx, %esi
9487; AVX1-NEXT: movl %eax, %edx
9488; AVX1-NEXT: shrl $13, %edx
9489; AVX1-NEXT: andl $2, %edx
9490; AVX1-NEXT: orl %esi, %edx
9491; AVX1-NEXT: shrl $15, %eax
9492; AVX1-NEXT: orl %edx, %eax
9493; AVX1-NEXT: orl %ecx, %eax
9494; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
9495; AVX1-NEXT: vpextrw $7, %xmm0, %eax
9496; AVX1-NEXT: movl %eax, %ecx
9497; AVX1-NEXT: shll $15, %ecx
9498; AVX1-NEXT: movl %eax, %edx
9499; AVX1-NEXT: andl $2, %edx
9500; AVX1-NEXT: shll $13, %edx
9501; AVX1-NEXT: orl %ecx, %edx
9502; AVX1-NEXT: movl %eax, %ecx
9503; AVX1-NEXT: andl $4, %ecx
9504; AVX1-NEXT: shll $11, %ecx
9505; AVX1-NEXT: orl %edx, %ecx
9506; AVX1-NEXT: movl %eax, %edx
9507; AVX1-NEXT: andl $8, %edx
9508; AVX1-NEXT: shll $9, %edx
9509; AVX1-NEXT: orl %ecx, %edx
9510; AVX1-NEXT: movl %eax, %esi
9511; AVX1-NEXT: andl $16, %esi
9512; AVX1-NEXT: shll $7, %esi
9513; AVX1-NEXT: orl %edx, %esi
9514; AVX1-NEXT: movl %eax, %ecx
9515; AVX1-NEXT: andl $32, %ecx
9516; AVX1-NEXT: shll $5, %ecx
9517; AVX1-NEXT: orl %esi, %ecx
9518; AVX1-NEXT: movl %eax, %edx
9519; AVX1-NEXT: andl $64, %edx
9520; AVX1-NEXT: shll $3, %edx
9521; AVX1-NEXT: leal (%rax,%rax), %esi
9522; AVX1-NEXT: andl $256, %esi # imm = 0x100
9523; AVX1-NEXT: orl %edx, %esi
9524; AVX1-NEXT: movl %eax, %edx
9525; AVX1-NEXT: shrl %edx
9526; AVX1-NEXT: andl $128, %edx
9527; AVX1-NEXT: orl %esi, %edx
9528; AVX1-NEXT: movl %eax, %esi
9529; AVX1-NEXT: shrl $3, %esi
9530; AVX1-NEXT: andl $64, %esi
9531; AVX1-NEXT: orl %edx, %esi
9532; AVX1-NEXT: movl %eax, %edx
9533; AVX1-NEXT: shrl $5, %edx
9534; AVX1-NEXT: andl $32, %edx
9535; AVX1-NEXT: orl %esi, %edx
9536; AVX1-NEXT: movl %eax, %esi
9537; AVX1-NEXT: shrl $7, %esi
9538; AVX1-NEXT: andl $16, %esi
9539; AVX1-NEXT: orl %edx, %esi
9540; AVX1-NEXT: movl %eax, %edx
9541; AVX1-NEXT: shrl $9, %edx
9542; AVX1-NEXT: andl $8, %edx
9543; AVX1-NEXT: orl %esi, %edx
9544; AVX1-NEXT: movl %eax, %esi
9545; AVX1-NEXT: shrl $11, %esi
9546; AVX1-NEXT: andl $4, %esi
9547; AVX1-NEXT: orl %edx, %esi
9548; AVX1-NEXT: movl %eax, %edx
9549; AVX1-NEXT: shrl $13, %edx
9550; AVX1-NEXT: andl $2, %edx
9551; AVX1-NEXT: orl %esi, %edx
9552; AVX1-NEXT: shrl $15, %eax
9553; AVX1-NEXT: orl %edx, %eax
9554; AVX1-NEXT: orl %ecx, %eax
9555; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0
9556; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
9557; AVX1-NEXT: retq
9558;
9559; AVX2-LABEL: test_bitreverse_v16i16:
9560; AVX2: # BB#0:
9561; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
9562; AVX2-NEXT: vpextrw $1, %xmm1, %eax
9563; AVX2-NEXT: movl %eax, %ecx
9564; AVX2-NEXT: shll $15, %ecx
9565; AVX2-NEXT: movl %eax, %edx
9566; AVX2-NEXT: andl $2, %edx
9567; AVX2-NEXT: shll $13, %edx
9568; AVX2-NEXT: orl %ecx, %edx
9569; AVX2-NEXT: movl %eax, %ecx
9570; AVX2-NEXT: andl $4, %ecx
9571; AVX2-NEXT: shll $11, %ecx
9572; AVX2-NEXT: orl %edx, %ecx
9573; AVX2-NEXT: movl %eax, %edx
9574; AVX2-NEXT: andl $8, %edx
9575; AVX2-NEXT: shll $9, %edx
9576; AVX2-NEXT: orl %ecx, %edx
9577; AVX2-NEXT: movl %eax, %esi
9578; AVX2-NEXT: andl $16, %esi
9579; AVX2-NEXT: shll $7, %esi
9580; AVX2-NEXT: orl %edx, %esi
9581; AVX2-NEXT: movl %eax, %ecx
9582; AVX2-NEXT: andl $32, %ecx
9583; AVX2-NEXT: shll $5, %ecx
9584; AVX2-NEXT: orl %esi, %ecx
9585; AVX2-NEXT: movl %eax, %edx
9586; AVX2-NEXT: andl $64, %edx
9587; AVX2-NEXT: shll $3, %edx
9588; AVX2-NEXT: leal (%rax,%rax), %esi
9589; AVX2-NEXT: andl $256, %esi # imm = 0x100
9590; AVX2-NEXT: orl %edx, %esi
9591; AVX2-NEXT: movl %eax, %edx
9592; AVX2-NEXT: shrl %edx
9593; AVX2-NEXT: andl $128, %edx
9594; AVX2-NEXT: orl %esi, %edx
9595; AVX2-NEXT: movl %eax, %esi
9596; AVX2-NEXT: shrl $3, %esi
9597; AVX2-NEXT: andl $64, %esi
9598; AVX2-NEXT: orl %edx, %esi
9599; AVX2-NEXT: movl %eax, %edx
9600; AVX2-NEXT: shrl $5, %edx
9601; AVX2-NEXT: andl $32, %edx
9602; AVX2-NEXT: orl %esi, %edx
9603; AVX2-NEXT: movl %eax, %esi
9604; AVX2-NEXT: shrl $7, %esi
9605; AVX2-NEXT: andl $16, %esi
9606; AVX2-NEXT: orl %edx, %esi
9607; AVX2-NEXT: movl %eax, %edx
9608; AVX2-NEXT: shrl $9, %edx
9609; AVX2-NEXT: andl $8, %edx
9610; AVX2-NEXT: orl %esi, %edx
9611; AVX2-NEXT: movl %eax, %esi
9612; AVX2-NEXT: shrl $11, %esi
9613; AVX2-NEXT: andl $4, %esi
9614; AVX2-NEXT: orl %edx, %esi
9615; AVX2-NEXT: movl %eax, %edx
9616; AVX2-NEXT: shrl $13, %edx
9617; AVX2-NEXT: andl $2, %edx
9618; AVX2-NEXT: orl %esi, %edx
9619; AVX2-NEXT: shrl $15, %eax
9620; AVX2-NEXT: orl %edx, %eax
9621; AVX2-NEXT: orl %ecx, %eax
9622; AVX2-NEXT: vmovd %xmm1, %ecx
9623; AVX2-NEXT: movl %ecx, %edx
9624; AVX2-NEXT: shll $15, %edx
9625; AVX2-NEXT: movl %ecx, %esi
9626; AVX2-NEXT: andl $2, %esi
9627; AVX2-NEXT: shll $13, %esi
9628; AVX2-NEXT: orl %edx, %esi
9629; AVX2-NEXT: movl %ecx, %edx
9630; AVX2-NEXT: andl $4, %edx
9631; AVX2-NEXT: shll $11, %edx
9632; AVX2-NEXT: orl %esi, %edx
9633; AVX2-NEXT: movl %ecx, %esi
9634; AVX2-NEXT: andl $8, %esi
9635; AVX2-NEXT: shll $9, %esi
9636; AVX2-NEXT: orl %edx, %esi
9637; AVX2-NEXT: movl %ecx, %edi
9638; AVX2-NEXT: andl $16, %edi
9639; AVX2-NEXT: shll $7, %edi
9640; AVX2-NEXT: orl %esi, %edi
9641; AVX2-NEXT: movl %ecx, %edx
9642; AVX2-NEXT: andl $32, %edx
9643; AVX2-NEXT: shll $5, %edx
9644; AVX2-NEXT: orl %edi, %edx
9645; AVX2-NEXT: movl %ecx, %esi
9646; AVX2-NEXT: andl $64, %esi
9647; AVX2-NEXT: shll $3, %esi
9648; AVX2-NEXT: leal (%rcx,%rcx), %edi
9649; AVX2-NEXT: andl $256, %edi # imm = 0x100
9650; AVX2-NEXT: orl %esi, %edi
9651; AVX2-NEXT: movl %ecx, %esi
9652; AVX2-NEXT: shrl %esi
9653; AVX2-NEXT: andl $128, %esi
9654; AVX2-NEXT: orl %edi, %esi
9655; AVX2-NEXT: movl %ecx, %edi
9656; AVX2-NEXT: shrl $3, %edi
9657; AVX2-NEXT: andl $64, %edi
9658; AVX2-NEXT: orl %esi, %edi
9659; AVX2-NEXT: movl %ecx, %esi
9660; AVX2-NEXT: shrl $5, %esi
9661; AVX2-NEXT: andl $32, %esi
9662; AVX2-NEXT: orl %edi, %esi
9663; AVX2-NEXT: movl %ecx, %edi
9664; AVX2-NEXT: shrl $7, %edi
9665; AVX2-NEXT: andl $16, %edi
9666; AVX2-NEXT: orl %esi, %edi
9667; AVX2-NEXT: movl %ecx, %esi
9668; AVX2-NEXT: shrl $9, %esi
9669; AVX2-NEXT: andl $8, %esi
9670; AVX2-NEXT: orl %edi, %esi
9671; AVX2-NEXT: movl %ecx, %edi
9672; AVX2-NEXT: shrl $11, %edi
9673; AVX2-NEXT: andl $4, %edi
9674; AVX2-NEXT: orl %esi, %edi
9675; AVX2-NEXT: movl %ecx, %esi
9676; AVX2-NEXT: shrl $13, %esi
9677; AVX2-NEXT: andl $2, %esi
9678; AVX2-NEXT: orl %edi, %esi
9679; AVX2-NEXT: andl $32768, %ecx # imm = 0x8000
9680; AVX2-NEXT: shrl $15, %ecx
9681; AVX2-NEXT: orl %esi, %ecx
9682; AVX2-NEXT: orl %edx, %ecx
9683; AVX2-NEXT: vmovd %ecx, %xmm2
9684; AVX2-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
9685; AVX2-NEXT: vpextrw $2, %xmm1, %eax
9686; AVX2-NEXT: movl %eax, %ecx
9687; AVX2-NEXT: shll $15, %ecx
9688; AVX2-NEXT: movl %eax, %edx
9689; AVX2-NEXT: andl $2, %edx
9690; AVX2-NEXT: shll $13, %edx
9691; AVX2-NEXT: orl %ecx, %edx
9692; AVX2-NEXT: movl %eax, %ecx
9693; AVX2-NEXT: andl $4, %ecx
9694; AVX2-NEXT: shll $11, %ecx
9695; AVX2-NEXT: orl %edx, %ecx
9696; AVX2-NEXT: movl %eax, %edx
9697; AVX2-NEXT: andl $8, %edx
9698; AVX2-NEXT: shll $9, %edx
9699; AVX2-NEXT: orl %ecx, %edx
9700; AVX2-NEXT: movl %eax, %esi
9701; AVX2-NEXT: andl $16, %esi
9702; AVX2-NEXT: shll $7, %esi
9703; AVX2-NEXT: orl %edx, %esi
9704; AVX2-NEXT: movl %eax, %ecx
9705; AVX2-NEXT: andl $32, %ecx
9706; AVX2-NEXT: shll $5, %ecx
9707; AVX2-NEXT: orl %esi, %ecx
9708; AVX2-NEXT: movl %eax, %edx
9709; AVX2-NEXT: andl $64, %edx
9710; AVX2-NEXT: shll $3, %edx
9711; AVX2-NEXT: leal (%rax,%rax), %esi
9712; AVX2-NEXT: andl $256, %esi # imm = 0x100
9713; AVX2-NEXT: orl %edx, %esi
9714; AVX2-NEXT: movl %eax, %edx
9715; AVX2-NEXT: shrl %edx
9716; AVX2-NEXT: andl $128, %edx
9717; AVX2-NEXT: orl %esi, %edx
9718; AVX2-NEXT: movl %eax, %esi
9719; AVX2-NEXT: shrl $3, %esi
9720; AVX2-NEXT: andl $64, %esi
9721; AVX2-NEXT: orl %edx, %esi
9722; AVX2-NEXT: movl %eax, %edx
9723; AVX2-NEXT: shrl $5, %edx
9724; AVX2-NEXT: andl $32, %edx
9725; AVX2-NEXT: orl %esi, %edx
9726; AVX2-NEXT: movl %eax, %esi
9727; AVX2-NEXT: shrl $7, %esi
9728; AVX2-NEXT: andl $16, %esi
9729; AVX2-NEXT: orl %edx, %esi
9730; AVX2-NEXT: movl %eax, %edx
9731; AVX2-NEXT: shrl $9, %edx
9732; AVX2-NEXT: andl $8, %edx
9733; AVX2-NEXT: orl %esi, %edx
9734; AVX2-NEXT: movl %eax, %esi
9735; AVX2-NEXT: shrl $11, %esi
9736; AVX2-NEXT: andl $4, %esi
9737; AVX2-NEXT: orl %edx, %esi
9738; AVX2-NEXT: movl %eax, %edx
9739; AVX2-NEXT: shrl $13, %edx
9740; AVX2-NEXT: andl $2, %edx
9741; AVX2-NEXT: orl %esi, %edx
9742; AVX2-NEXT: shrl $15, %eax
9743; AVX2-NEXT: orl %edx, %eax
9744; AVX2-NEXT: orl %ecx, %eax
9745; AVX2-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
9746; AVX2-NEXT: vpextrw $3, %xmm1, %eax
9747; AVX2-NEXT: movl %eax, %ecx
9748; AVX2-NEXT: shll $15, %ecx
9749; AVX2-NEXT: movl %eax, %edx
9750; AVX2-NEXT: andl $2, %edx
9751; AVX2-NEXT: shll $13, %edx
9752; AVX2-NEXT: orl %ecx, %edx
9753; AVX2-NEXT: movl %eax, %ecx
9754; AVX2-NEXT: andl $4, %ecx
9755; AVX2-NEXT: shll $11, %ecx
9756; AVX2-NEXT: orl %edx, %ecx
9757; AVX2-NEXT: movl %eax, %edx
9758; AVX2-NEXT: andl $8, %edx
9759; AVX2-NEXT: shll $9, %edx
9760; AVX2-NEXT: orl %ecx, %edx
9761; AVX2-NEXT: movl %eax, %esi
9762; AVX2-NEXT: andl $16, %esi
9763; AVX2-NEXT: shll $7, %esi
9764; AVX2-NEXT: orl %edx, %esi
9765; AVX2-NEXT: movl %eax, %ecx
9766; AVX2-NEXT: andl $32, %ecx
9767; AVX2-NEXT: shll $5, %ecx
9768; AVX2-NEXT: orl %esi, %ecx
9769; AVX2-NEXT: movl %eax, %edx
9770; AVX2-NEXT: andl $64, %edx
9771; AVX2-NEXT: shll $3, %edx
9772; AVX2-NEXT: leal (%rax,%rax), %esi
9773; AVX2-NEXT: andl $256, %esi # imm = 0x100
9774; AVX2-NEXT: orl %edx, %esi
9775; AVX2-NEXT: movl %eax, %edx
9776; AVX2-NEXT: shrl %edx
9777; AVX2-NEXT: andl $128, %edx
9778; AVX2-NEXT: orl %esi, %edx
9779; AVX2-NEXT: movl %eax, %esi
9780; AVX2-NEXT: shrl $3, %esi
9781; AVX2-NEXT: andl $64, %esi
9782; AVX2-NEXT: orl %edx, %esi
9783; AVX2-NEXT: movl %eax, %edx
9784; AVX2-NEXT: shrl $5, %edx
9785; AVX2-NEXT: andl $32, %edx
9786; AVX2-NEXT: orl %esi, %edx
9787; AVX2-NEXT: movl %eax, %esi
9788; AVX2-NEXT: shrl $7, %esi
9789; AVX2-NEXT: andl $16, %esi
9790; AVX2-NEXT: orl %edx, %esi
9791; AVX2-NEXT: movl %eax, %edx
9792; AVX2-NEXT: shrl $9, %edx
9793; AVX2-NEXT: andl $8, %edx
9794; AVX2-NEXT: orl %esi, %edx
9795; AVX2-NEXT: movl %eax, %esi
9796; AVX2-NEXT: shrl $11, %esi
9797; AVX2-NEXT: andl $4, %esi
9798; AVX2-NEXT: orl %edx, %esi
9799; AVX2-NEXT: movl %eax, %edx
9800; AVX2-NEXT: shrl $13, %edx
9801; AVX2-NEXT: andl $2, %edx
9802; AVX2-NEXT: orl %esi, %edx
9803; AVX2-NEXT: shrl $15, %eax
9804; AVX2-NEXT: orl %edx, %eax
9805; AVX2-NEXT: orl %ecx, %eax
9806; AVX2-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
9807; AVX2-NEXT: vpextrw $4, %xmm1, %eax
9808; AVX2-NEXT: movl %eax, %ecx
9809; AVX2-NEXT: shll $15, %ecx
9810; AVX2-NEXT: movl %eax, %edx
9811; AVX2-NEXT: andl $2, %edx
9812; AVX2-NEXT: shll $13, %edx
9813; AVX2-NEXT: orl %ecx, %edx
9814; AVX2-NEXT: movl %eax, %ecx
9815; AVX2-NEXT: andl $4, %ecx
9816; AVX2-NEXT: shll $11, %ecx
9817; AVX2-NEXT: orl %edx, %ecx
9818; AVX2-NEXT: movl %eax, %edx
9819; AVX2-NEXT: andl $8, %edx
9820; AVX2-NEXT: shll $9, %edx
9821; AVX2-NEXT: orl %ecx, %edx
9822; AVX2-NEXT: movl %eax, %esi
9823; AVX2-NEXT: andl $16, %esi
9824; AVX2-NEXT: shll $7, %esi
9825; AVX2-NEXT: orl %edx, %esi
9826; AVX2-NEXT: movl %eax, %ecx
9827; AVX2-NEXT: andl $32, %ecx
9828; AVX2-NEXT: shll $5, %ecx
9829; AVX2-NEXT: orl %esi, %ecx
9830; AVX2-NEXT: movl %eax, %edx
9831; AVX2-NEXT: andl $64, %edx
9832; AVX2-NEXT: shll $3, %edx
9833; AVX2-NEXT: leal (%rax,%rax), %esi
9834; AVX2-NEXT: andl $256, %esi # imm = 0x100
9835; AVX2-NEXT: orl %edx, %esi
9836; AVX2-NEXT: movl %eax, %edx
9837; AVX2-NEXT: shrl %edx
9838; AVX2-NEXT: andl $128, %edx
9839; AVX2-NEXT: orl %esi, %edx
9840; AVX2-NEXT: movl %eax, %esi
9841; AVX2-NEXT: shrl $3, %esi
9842; AVX2-NEXT: andl $64, %esi
9843; AVX2-NEXT: orl %edx, %esi
9844; AVX2-NEXT: movl %eax, %edx
9845; AVX2-NEXT: shrl $5, %edx
9846; AVX2-NEXT: andl $32, %edx
9847; AVX2-NEXT: orl %esi, %edx
9848; AVX2-NEXT: movl %eax, %esi
9849; AVX2-NEXT: shrl $7, %esi
9850; AVX2-NEXT: andl $16, %esi
9851; AVX2-NEXT: orl %edx, %esi
9852; AVX2-NEXT: movl %eax, %edx
9853; AVX2-NEXT: shrl $9, %edx
9854; AVX2-NEXT: andl $8, %edx
9855; AVX2-NEXT: orl %esi, %edx
9856; AVX2-NEXT: movl %eax, %esi
9857; AVX2-NEXT: shrl $11, %esi
9858; AVX2-NEXT: andl $4, %esi
9859; AVX2-NEXT: orl %edx, %esi
9860; AVX2-NEXT: movl %eax, %edx
9861; AVX2-NEXT: shrl $13, %edx
9862; AVX2-NEXT: andl $2, %edx
9863; AVX2-NEXT: orl %esi, %edx
9864; AVX2-NEXT: shrl $15, %eax
9865; AVX2-NEXT: orl %edx, %eax
9866; AVX2-NEXT: orl %ecx, %eax
9867; AVX2-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
9868; AVX2-NEXT: vpextrw $5, %xmm1, %eax
9869; AVX2-NEXT: movl %eax, %ecx
9870; AVX2-NEXT: shll $15, %ecx
9871; AVX2-NEXT: movl %eax, %edx
9872; AVX2-NEXT: andl $2, %edx
9873; AVX2-NEXT: shll $13, %edx
9874; AVX2-NEXT: orl %ecx, %edx
9875; AVX2-NEXT: movl %eax, %ecx
9876; AVX2-NEXT: andl $4, %ecx
9877; AVX2-NEXT: shll $11, %ecx
9878; AVX2-NEXT: orl %edx, %ecx
9879; AVX2-NEXT: movl %eax, %edx
9880; AVX2-NEXT: andl $8, %edx
9881; AVX2-NEXT: shll $9, %edx
9882; AVX2-NEXT: orl %ecx, %edx
9883; AVX2-NEXT: movl %eax, %esi
9884; AVX2-NEXT: andl $16, %esi
9885; AVX2-NEXT: shll $7, %esi
9886; AVX2-NEXT: orl %edx, %esi
9887; AVX2-NEXT: movl %eax, %ecx
9888; AVX2-NEXT: andl $32, %ecx
9889; AVX2-NEXT: shll $5, %ecx
9890; AVX2-NEXT: orl %esi, %ecx
9891; AVX2-NEXT: movl %eax, %edx
9892; AVX2-NEXT: andl $64, %edx
9893; AVX2-NEXT: shll $3, %edx
9894; AVX2-NEXT: leal (%rax,%rax), %esi
9895; AVX2-NEXT: andl $256, %esi # imm = 0x100
9896; AVX2-NEXT: orl %edx, %esi
9897; AVX2-NEXT: movl %eax, %edx
9898; AVX2-NEXT: shrl %edx
9899; AVX2-NEXT: andl $128, %edx
9900; AVX2-NEXT: orl %esi, %edx
9901; AVX2-NEXT: movl %eax, %esi
9902; AVX2-NEXT: shrl $3, %esi
9903; AVX2-NEXT: andl $64, %esi
9904; AVX2-NEXT: orl %edx, %esi
9905; AVX2-NEXT: movl %eax, %edx
9906; AVX2-NEXT: shrl $5, %edx
9907; AVX2-NEXT: andl $32, %edx
9908; AVX2-NEXT: orl %esi, %edx
9909; AVX2-NEXT: movl %eax, %esi
9910; AVX2-NEXT: shrl $7, %esi
9911; AVX2-NEXT: andl $16, %esi
9912; AVX2-NEXT: orl %edx, %esi
9913; AVX2-NEXT: movl %eax, %edx
9914; AVX2-NEXT: shrl $9, %edx
9915; AVX2-NEXT: andl $8, %edx
9916; AVX2-NEXT: orl %esi, %edx
9917; AVX2-NEXT: movl %eax, %esi
9918; AVX2-NEXT: shrl $11, %esi
9919; AVX2-NEXT: andl $4, %esi
9920; AVX2-NEXT: orl %edx, %esi
9921; AVX2-NEXT: movl %eax, %edx
9922; AVX2-NEXT: shrl $13, %edx
9923; AVX2-NEXT: andl $2, %edx
9924; AVX2-NEXT: orl %esi, %edx
9925; AVX2-NEXT: shrl $15, %eax
9926; AVX2-NEXT: orl %edx, %eax
9927; AVX2-NEXT: orl %ecx, %eax
9928; AVX2-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
9929; AVX2-NEXT: vpextrw $6, %xmm1, %eax
9930; AVX2-NEXT: movl %eax, %ecx
9931; AVX2-NEXT: shll $15, %ecx
9932; AVX2-NEXT: movl %eax, %edx
9933; AVX2-NEXT: andl $2, %edx
9934; AVX2-NEXT: shll $13, %edx
9935; AVX2-NEXT: orl %ecx, %edx
9936; AVX2-NEXT: movl %eax, %ecx
9937; AVX2-NEXT: andl $4, %ecx
9938; AVX2-NEXT: shll $11, %ecx
9939; AVX2-NEXT: orl %edx, %ecx
9940; AVX2-NEXT: movl %eax, %edx
9941; AVX2-NEXT: andl $8, %edx
9942; AVX2-NEXT: shll $9, %edx
9943; AVX2-NEXT: orl %ecx, %edx
9944; AVX2-NEXT: movl %eax, %esi
9945; AVX2-NEXT: andl $16, %esi
9946; AVX2-NEXT: shll $7, %esi
9947; AVX2-NEXT: orl %edx, %esi
9948; AVX2-NEXT: movl %eax, %ecx
9949; AVX2-NEXT: andl $32, %ecx
9950; AVX2-NEXT: shll $5, %ecx
9951; AVX2-NEXT: orl %esi, %ecx
9952; AVX2-NEXT: movl %eax, %edx
9953; AVX2-NEXT: andl $64, %edx
9954; AVX2-NEXT: shll $3, %edx
9955; AVX2-NEXT: leal (%rax,%rax), %esi
9956; AVX2-NEXT: andl $256, %esi # imm = 0x100
9957; AVX2-NEXT: orl %edx, %esi
9958; AVX2-NEXT: movl %eax, %edx
9959; AVX2-NEXT: shrl %edx
9960; AVX2-NEXT: andl $128, %edx
9961; AVX2-NEXT: orl %esi, %edx
9962; AVX2-NEXT: movl %eax, %esi
9963; AVX2-NEXT: shrl $3, %esi
9964; AVX2-NEXT: andl $64, %esi
9965; AVX2-NEXT: orl %edx, %esi
9966; AVX2-NEXT: movl %eax, %edx
9967; AVX2-NEXT: shrl $5, %edx
9968; AVX2-NEXT: andl $32, %edx
9969; AVX2-NEXT: orl %esi, %edx
9970; AVX2-NEXT: movl %eax, %esi
9971; AVX2-NEXT: shrl $7, %esi
9972; AVX2-NEXT: andl $16, %esi
9973; AVX2-NEXT: orl %edx, %esi
9974; AVX2-NEXT: movl %eax, %edx
9975; AVX2-NEXT: shrl $9, %edx
9976; AVX2-NEXT: andl $8, %edx
9977; AVX2-NEXT: orl %esi, %edx
9978; AVX2-NEXT: movl %eax, %esi
9979; AVX2-NEXT: shrl $11, %esi
9980; AVX2-NEXT: andl $4, %esi
9981; AVX2-NEXT: orl %edx, %esi
9982; AVX2-NEXT: movl %eax, %edx
9983; AVX2-NEXT: shrl $13, %edx
9984; AVX2-NEXT: andl $2, %edx
9985; AVX2-NEXT: orl %esi, %edx
9986; AVX2-NEXT: shrl $15, %eax
9987; AVX2-NEXT: orl %edx, %eax
9988; AVX2-NEXT: orl %ecx, %eax
9989; AVX2-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
9990; AVX2-NEXT: vpextrw $7, %xmm1, %eax
9991; AVX2-NEXT: movl %eax, %ecx
9992; AVX2-NEXT: shll $15, %ecx
9993; AVX2-NEXT: movl %eax, %edx
9994; AVX2-NEXT: andl $2, %edx
9995; AVX2-NEXT: shll $13, %edx
9996; AVX2-NEXT: orl %ecx, %edx
9997; AVX2-NEXT: movl %eax, %ecx
9998; AVX2-NEXT: andl $4, %ecx
9999; AVX2-NEXT: shll $11, %ecx
10000; AVX2-NEXT: orl %edx, %ecx
10001; AVX2-NEXT: movl %eax, %edx
10002; AVX2-NEXT: andl $8, %edx
10003; AVX2-NEXT: shll $9, %edx
10004; AVX2-NEXT: orl %ecx, %edx
10005; AVX2-NEXT: movl %eax, %esi
10006; AVX2-NEXT: andl $16, %esi
10007; AVX2-NEXT: shll $7, %esi
10008; AVX2-NEXT: orl %edx, %esi
10009; AVX2-NEXT: movl %eax, %ecx
10010; AVX2-NEXT: andl $32, %ecx
10011; AVX2-NEXT: shll $5, %ecx
10012; AVX2-NEXT: orl %esi, %ecx
10013; AVX2-NEXT: movl %eax, %edx
10014; AVX2-NEXT: andl $64, %edx
10015; AVX2-NEXT: shll $3, %edx
10016; AVX2-NEXT: leal (%rax,%rax), %esi
10017; AVX2-NEXT: andl $256, %esi # imm = 0x100
10018; AVX2-NEXT: orl %edx, %esi
10019; AVX2-NEXT: movl %eax, %edx
10020; AVX2-NEXT: shrl %edx
10021; AVX2-NEXT: andl $128, %edx
10022; AVX2-NEXT: orl %esi, %edx
10023; AVX2-NEXT: movl %eax, %esi
10024; AVX2-NEXT: shrl $3, %esi
10025; AVX2-NEXT: andl $64, %esi
10026; AVX2-NEXT: orl %edx, %esi
10027; AVX2-NEXT: movl %eax, %edx
10028; AVX2-NEXT: shrl $5, %edx
10029; AVX2-NEXT: andl $32, %edx
10030; AVX2-NEXT: orl %esi, %edx
10031; AVX2-NEXT: movl %eax, %esi
10032; AVX2-NEXT: shrl $7, %esi
10033; AVX2-NEXT: andl $16, %esi
10034; AVX2-NEXT: orl %edx, %esi
10035; AVX2-NEXT: movl %eax, %edx
10036; AVX2-NEXT: shrl $9, %edx
10037; AVX2-NEXT: andl $8, %edx
10038; AVX2-NEXT: orl %esi, %edx
10039; AVX2-NEXT: movl %eax, %esi
10040; AVX2-NEXT: shrl $11, %esi
10041; AVX2-NEXT: andl $4, %esi
10042; AVX2-NEXT: orl %edx, %esi
10043; AVX2-NEXT: movl %eax, %edx
10044; AVX2-NEXT: shrl $13, %edx
10045; AVX2-NEXT: andl $2, %edx
10046; AVX2-NEXT: orl %esi, %edx
10047; AVX2-NEXT: shrl $15, %eax
10048; AVX2-NEXT: orl %edx, %eax
10049; AVX2-NEXT: orl %ecx, %eax
10050; AVX2-NEXT: vpinsrw $7, %eax, %xmm2, %xmm1
10051; AVX2-NEXT: vpextrw $1, %xmm0, %eax
10052; AVX2-NEXT: movl %eax, %ecx
10053; AVX2-NEXT: shll $15, %ecx
10054; AVX2-NEXT: movl %eax, %edx
10055; AVX2-NEXT: andl $2, %edx
10056; AVX2-NEXT: shll $13, %edx
10057; AVX2-NEXT: orl %ecx, %edx
10058; AVX2-NEXT: movl %eax, %ecx
10059; AVX2-NEXT: andl $4, %ecx
10060; AVX2-NEXT: shll $11, %ecx
10061; AVX2-NEXT: orl %edx, %ecx
10062; AVX2-NEXT: movl %eax, %edx
10063; AVX2-NEXT: andl $8, %edx
10064; AVX2-NEXT: shll $9, %edx
10065; AVX2-NEXT: orl %ecx, %edx
10066; AVX2-NEXT: movl %eax, %esi
10067; AVX2-NEXT: andl $16, %esi
10068; AVX2-NEXT: shll $7, %esi
10069; AVX2-NEXT: orl %edx, %esi
10070; AVX2-NEXT: movl %eax, %ecx
10071; AVX2-NEXT: andl $32, %ecx
10072; AVX2-NEXT: shll $5, %ecx
10073; AVX2-NEXT: orl %esi, %ecx
10074; AVX2-NEXT: movl %eax, %edx
10075; AVX2-NEXT: andl $64, %edx
10076; AVX2-NEXT: shll $3, %edx
10077; AVX2-NEXT: leal (%rax,%rax), %esi
10078; AVX2-NEXT: andl $256, %esi # imm = 0x100
10079; AVX2-NEXT: orl %edx, %esi
10080; AVX2-NEXT: movl %eax, %edx
10081; AVX2-NEXT: shrl %edx
10082; AVX2-NEXT: andl $128, %edx
10083; AVX2-NEXT: orl %esi, %edx
10084; AVX2-NEXT: movl %eax, %esi
10085; AVX2-NEXT: shrl $3, %esi
10086; AVX2-NEXT: andl $64, %esi
10087; AVX2-NEXT: orl %edx, %esi
10088; AVX2-NEXT: movl %eax, %edx
10089; AVX2-NEXT: shrl $5, %edx
10090; AVX2-NEXT: andl $32, %edx
10091; AVX2-NEXT: orl %esi, %edx
10092; AVX2-NEXT: movl %eax, %esi
10093; AVX2-NEXT: shrl $7, %esi
10094; AVX2-NEXT: andl $16, %esi
10095; AVX2-NEXT: orl %edx, %esi
10096; AVX2-NEXT: movl %eax, %edx
10097; AVX2-NEXT: shrl $9, %edx
10098; AVX2-NEXT: andl $8, %edx
10099; AVX2-NEXT: orl %esi, %edx
10100; AVX2-NEXT: movl %eax, %esi
10101; AVX2-NEXT: shrl $11, %esi
10102; AVX2-NEXT: andl $4, %esi
10103; AVX2-NEXT: orl %edx, %esi
10104; AVX2-NEXT: movl %eax, %edx
10105; AVX2-NEXT: shrl $13, %edx
10106; AVX2-NEXT: andl $2, %edx
10107; AVX2-NEXT: orl %esi, %edx
10108; AVX2-NEXT: shrl $15, %eax
10109; AVX2-NEXT: orl %edx, %eax
10110; AVX2-NEXT: orl %ecx, %eax
10111; AVX2-NEXT: vmovd %xmm0, %ecx
10112; AVX2-NEXT: movl %ecx, %edx
10113; AVX2-NEXT: shll $15, %edx
10114; AVX2-NEXT: movl %ecx, %esi
10115; AVX2-NEXT: andl $2, %esi
10116; AVX2-NEXT: shll $13, %esi
10117; AVX2-NEXT: orl %edx, %esi
10118; AVX2-NEXT: movl %ecx, %edx
10119; AVX2-NEXT: andl $4, %edx
10120; AVX2-NEXT: shll $11, %edx
10121; AVX2-NEXT: orl %esi, %edx
10122; AVX2-NEXT: movl %ecx, %esi
10123; AVX2-NEXT: andl $8, %esi
10124; AVX2-NEXT: shll $9, %esi
10125; AVX2-NEXT: orl %edx, %esi
10126; AVX2-NEXT: movl %ecx, %edi
10127; AVX2-NEXT: andl $16, %edi
10128; AVX2-NEXT: shll $7, %edi
10129; AVX2-NEXT: orl %esi, %edi
10130; AVX2-NEXT: movl %ecx, %edx
10131; AVX2-NEXT: andl $32, %edx
10132; AVX2-NEXT: shll $5, %edx
10133; AVX2-NEXT: orl %edi, %edx
10134; AVX2-NEXT: movl %ecx, %esi
10135; AVX2-NEXT: andl $64, %esi
10136; AVX2-NEXT: shll $3, %esi
10137; AVX2-NEXT: leal (%rcx,%rcx), %edi
10138; AVX2-NEXT: andl $256, %edi # imm = 0x100
10139; AVX2-NEXT: orl %esi, %edi
10140; AVX2-NEXT: movl %ecx, %esi
10141; AVX2-NEXT: shrl %esi
10142; AVX2-NEXT: andl $128, %esi
10143; AVX2-NEXT: orl %edi, %esi
10144; AVX2-NEXT: movl %ecx, %edi
10145; AVX2-NEXT: shrl $3, %edi
10146; AVX2-NEXT: andl $64, %edi
10147; AVX2-NEXT: orl %esi, %edi
10148; AVX2-NEXT: movl %ecx, %esi
10149; AVX2-NEXT: shrl $5, %esi
10150; AVX2-NEXT: andl $32, %esi
10151; AVX2-NEXT: orl %edi, %esi
10152; AVX2-NEXT: movl %ecx, %edi
10153; AVX2-NEXT: shrl $7, %edi
10154; AVX2-NEXT: andl $16, %edi
10155; AVX2-NEXT: orl %esi, %edi
10156; AVX2-NEXT: movl %ecx, %esi
10157; AVX2-NEXT: shrl $9, %esi
10158; AVX2-NEXT: andl $8, %esi
10159; AVX2-NEXT: orl %edi, %esi
10160; AVX2-NEXT: movl %ecx, %edi
10161; AVX2-NEXT: shrl $11, %edi
10162; AVX2-NEXT: andl $4, %edi
10163; AVX2-NEXT: orl %esi, %edi
10164; AVX2-NEXT: movl %ecx, %esi
10165; AVX2-NEXT: shrl $13, %esi
10166; AVX2-NEXT: andl $2, %esi
10167; AVX2-NEXT: orl %edi, %esi
10168; AVX2-NEXT: andl $32768, %ecx # imm = 0x8000
10169; AVX2-NEXT: shrl $15, %ecx
10170; AVX2-NEXT: orl %esi, %ecx
10171; AVX2-NEXT: orl %edx, %ecx
10172; AVX2-NEXT: vmovd %ecx, %xmm2
10173; AVX2-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
10174; AVX2-NEXT: vpextrw $2, %xmm0, %eax
10175; AVX2-NEXT: movl %eax, %ecx
10176; AVX2-NEXT: shll $15, %ecx
10177; AVX2-NEXT: movl %eax, %edx
10178; AVX2-NEXT: andl $2, %edx
10179; AVX2-NEXT: shll $13, %edx
10180; AVX2-NEXT: orl %ecx, %edx
10181; AVX2-NEXT: movl %eax, %ecx
10182; AVX2-NEXT: andl $4, %ecx
10183; AVX2-NEXT: shll $11, %ecx
10184; AVX2-NEXT: orl %edx, %ecx
10185; AVX2-NEXT: movl %eax, %edx
10186; AVX2-NEXT: andl $8, %edx
10187; AVX2-NEXT: shll $9, %edx
10188; AVX2-NEXT: orl %ecx, %edx
10189; AVX2-NEXT: movl %eax, %esi
10190; AVX2-NEXT: andl $16, %esi
10191; AVX2-NEXT: shll $7, %esi
10192; AVX2-NEXT: orl %edx, %esi
10193; AVX2-NEXT: movl %eax, %ecx
10194; AVX2-NEXT: andl $32, %ecx
10195; AVX2-NEXT: shll $5, %ecx
10196; AVX2-NEXT: orl %esi, %ecx
10197; AVX2-NEXT: movl %eax, %edx
10198; AVX2-NEXT: andl $64, %edx
10199; AVX2-NEXT: shll $3, %edx
10200; AVX2-NEXT: leal (%rax,%rax), %esi
10201; AVX2-NEXT: andl $256, %esi # imm = 0x100
10202; AVX2-NEXT: orl %edx, %esi
10203; AVX2-NEXT: movl %eax, %edx
10204; AVX2-NEXT: shrl %edx
10205; AVX2-NEXT: andl $128, %edx
10206; AVX2-NEXT: orl %esi, %edx
10207; AVX2-NEXT: movl %eax, %esi
10208; AVX2-NEXT: shrl $3, %esi
10209; AVX2-NEXT: andl $64, %esi
10210; AVX2-NEXT: orl %edx, %esi
10211; AVX2-NEXT: movl %eax, %edx
10212; AVX2-NEXT: shrl $5, %edx
10213; AVX2-NEXT: andl $32, %edx
10214; AVX2-NEXT: orl %esi, %edx
10215; AVX2-NEXT: movl %eax, %esi
10216; AVX2-NEXT: shrl $7, %esi
10217; AVX2-NEXT: andl $16, %esi
10218; AVX2-NEXT: orl %edx, %esi
10219; AVX2-NEXT: movl %eax, %edx
10220; AVX2-NEXT: shrl $9, %edx
10221; AVX2-NEXT: andl $8, %edx
10222; AVX2-NEXT: orl %esi, %edx
10223; AVX2-NEXT: movl %eax, %esi
10224; AVX2-NEXT: shrl $11, %esi
10225; AVX2-NEXT: andl $4, %esi
10226; AVX2-NEXT: orl %edx, %esi
10227; AVX2-NEXT: movl %eax, %edx
10228; AVX2-NEXT: shrl $13, %edx
10229; AVX2-NEXT: andl $2, %edx
10230; AVX2-NEXT: orl %esi, %edx
10231; AVX2-NEXT: shrl $15, %eax
10232; AVX2-NEXT: orl %edx, %eax
10233; AVX2-NEXT: orl %ecx, %eax
10234; AVX2-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
10235; AVX2-NEXT: vpextrw $3, %xmm0, %eax
10236; AVX2-NEXT: movl %eax, %ecx
10237; AVX2-NEXT: shll $15, %ecx
10238; AVX2-NEXT: movl %eax, %edx
10239; AVX2-NEXT: andl $2, %edx
10240; AVX2-NEXT: shll $13, %edx
10241; AVX2-NEXT: orl %ecx, %edx
10242; AVX2-NEXT: movl %eax, %ecx
10243; AVX2-NEXT: andl $4, %ecx
10244; AVX2-NEXT: shll $11, %ecx
10245; AVX2-NEXT: orl %edx, %ecx
10246; AVX2-NEXT: movl %eax, %edx
10247; AVX2-NEXT: andl $8, %edx
10248; AVX2-NEXT: shll $9, %edx
10249; AVX2-NEXT: orl %ecx, %edx
10250; AVX2-NEXT: movl %eax, %esi
10251; AVX2-NEXT: andl $16, %esi
10252; AVX2-NEXT: shll $7, %esi
10253; AVX2-NEXT: orl %edx, %esi
10254; AVX2-NEXT: movl %eax, %ecx
10255; AVX2-NEXT: andl $32, %ecx
10256; AVX2-NEXT: shll $5, %ecx
10257; AVX2-NEXT: orl %esi, %ecx
10258; AVX2-NEXT: movl %eax, %edx
10259; AVX2-NEXT: andl $64, %edx
10260; AVX2-NEXT: shll $3, %edx
10261; AVX2-NEXT: leal (%rax,%rax), %esi
10262; AVX2-NEXT: andl $256, %esi # imm = 0x100
10263; AVX2-NEXT: orl %edx, %esi
10264; AVX2-NEXT: movl %eax, %edx
10265; AVX2-NEXT: shrl %edx
10266; AVX2-NEXT: andl $128, %edx
10267; AVX2-NEXT: orl %esi, %edx
10268; AVX2-NEXT: movl %eax, %esi
10269; AVX2-NEXT: shrl $3, %esi
10270; AVX2-NEXT: andl $64, %esi
10271; AVX2-NEXT: orl %edx, %esi
10272; AVX2-NEXT: movl %eax, %edx
10273; AVX2-NEXT: shrl $5, %edx
10274; AVX2-NEXT: andl $32, %edx
10275; AVX2-NEXT: orl %esi, %edx
10276; AVX2-NEXT: movl %eax, %esi
10277; AVX2-NEXT: shrl $7, %esi
10278; AVX2-NEXT: andl $16, %esi
10279; AVX2-NEXT: orl %edx, %esi
10280; AVX2-NEXT: movl %eax, %edx
10281; AVX2-NEXT: shrl $9, %edx
10282; AVX2-NEXT: andl $8, %edx
10283; AVX2-NEXT: orl %esi, %edx
10284; AVX2-NEXT: movl %eax, %esi
10285; AVX2-NEXT: shrl $11, %esi
10286; AVX2-NEXT: andl $4, %esi
10287; AVX2-NEXT: orl %edx, %esi
10288; AVX2-NEXT: movl %eax, %edx
10289; AVX2-NEXT: shrl $13, %edx
10290; AVX2-NEXT: andl $2, %edx
10291; AVX2-NEXT: orl %esi, %edx
10292; AVX2-NEXT: shrl $15, %eax
10293; AVX2-NEXT: orl %edx, %eax
10294; AVX2-NEXT: orl %ecx, %eax
10295; AVX2-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
10296; AVX2-NEXT: vpextrw $4, %xmm0, %eax
10297; AVX2-NEXT: movl %eax, %ecx
10298; AVX2-NEXT: shll $15, %ecx
10299; AVX2-NEXT: movl %eax, %edx
10300; AVX2-NEXT: andl $2, %edx
10301; AVX2-NEXT: shll $13, %edx
10302; AVX2-NEXT: orl %ecx, %edx
10303; AVX2-NEXT: movl %eax, %ecx
10304; AVX2-NEXT: andl $4, %ecx
10305; AVX2-NEXT: shll $11, %ecx
10306; AVX2-NEXT: orl %edx, %ecx
10307; AVX2-NEXT: movl %eax, %edx
10308; AVX2-NEXT: andl $8, %edx
10309; AVX2-NEXT: shll $9, %edx
10310; AVX2-NEXT: orl %ecx, %edx
10311; AVX2-NEXT: movl %eax, %esi
10312; AVX2-NEXT: andl $16, %esi
10313; AVX2-NEXT: shll $7, %esi
10314; AVX2-NEXT: orl %edx, %esi
10315; AVX2-NEXT: movl %eax, %ecx
10316; AVX2-NEXT: andl $32, %ecx
10317; AVX2-NEXT: shll $5, %ecx
10318; AVX2-NEXT: orl %esi, %ecx
10319; AVX2-NEXT: movl %eax, %edx
10320; AVX2-NEXT: andl $64, %edx
10321; AVX2-NEXT: shll $3, %edx
10322; AVX2-NEXT: leal (%rax,%rax), %esi
10323; AVX2-NEXT: andl $256, %esi # imm = 0x100
10324; AVX2-NEXT: orl %edx, %esi
10325; AVX2-NEXT: movl %eax, %edx
10326; AVX2-NEXT: shrl %edx
10327; AVX2-NEXT: andl $128, %edx
10328; AVX2-NEXT: orl %esi, %edx
10329; AVX2-NEXT: movl %eax, %esi
10330; AVX2-NEXT: shrl $3, %esi
10331; AVX2-NEXT: andl $64, %esi
10332; AVX2-NEXT: orl %edx, %esi
10333; AVX2-NEXT: movl %eax, %edx
10334; AVX2-NEXT: shrl $5, %edx
10335; AVX2-NEXT: andl $32, %edx
10336; AVX2-NEXT: orl %esi, %edx
10337; AVX2-NEXT: movl %eax, %esi
10338; AVX2-NEXT: shrl $7, %esi
10339; AVX2-NEXT: andl $16, %esi
10340; AVX2-NEXT: orl %edx, %esi
10341; AVX2-NEXT: movl %eax, %edx
10342; AVX2-NEXT: shrl $9, %edx
10343; AVX2-NEXT: andl $8, %edx
10344; AVX2-NEXT: orl %esi, %edx
10345; AVX2-NEXT: movl %eax, %esi
10346; AVX2-NEXT: shrl $11, %esi
10347; AVX2-NEXT: andl $4, %esi
10348; AVX2-NEXT: orl %edx, %esi
10349; AVX2-NEXT: movl %eax, %edx
10350; AVX2-NEXT: shrl $13, %edx
10351; AVX2-NEXT: andl $2, %edx
10352; AVX2-NEXT: orl %esi, %edx
10353; AVX2-NEXT: shrl $15, %eax
10354; AVX2-NEXT: orl %edx, %eax
10355; AVX2-NEXT: orl %ecx, %eax
10356; AVX2-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
10357; AVX2-NEXT: vpextrw $5, %xmm0, %eax
10358; AVX2-NEXT: movl %eax, %ecx
10359; AVX2-NEXT: shll $15, %ecx
10360; AVX2-NEXT: movl %eax, %edx
10361; AVX2-NEXT: andl $2, %edx
10362; AVX2-NEXT: shll $13, %edx
10363; AVX2-NEXT: orl %ecx, %edx
10364; AVX2-NEXT: movl %eax, %ecx
10365; AVX2-NEXT: andl $4, %ecx
10366; AVX2-NEXT: shll $11, %ecx
10367; AVX2-NEXT: orl %edx, %ecx
10368; AVX2-NEXT: movl %eax, %edx
10369; AVX2-NEXT: andl $8, %edx
10370; AVX2-NEXT: shll $9, %edx
10371; AVX2-NEXT: orl %ecx, %edx
10372; AVX2-NEXT: movl %eax, %esi
10373; AVX2-NEXT: andl $16, %esi
10374; AVX2-NEXT: shll $7, %esi
10375; AVX2-NEXT: orl %edx, %esi
10376; AVX2-NEXT: movl %eax, %ecx
10377; AVX2-NEXT: andl $32, %ecx
10378; AVX2-NEXT: shll $5, %ecx
10379; AVX2-NEXT: orl %esi, %ecx
10380; AVX2-NEXT: movl %eax, %edx
10381; AVX2-NEXT: andl $64, %edx
10382; AVX2-NEXT: shll $3, %edx
10383; AVX2-NEXT: leal (%rax,%rax), %esi
10384; AVX2-NEXT: andl $256, %esi # imm = 0x100
10385; AVX2-NEXT: orl %edx, %esi
10386; AVX2-NEXT: movl %eax, %edx
10387; AVX2-NEXT: shrl %edx
10388; AVX2-NEXT: andl $128, %edx
10389; AVX2-NEXT: orl %esi, %edx
10390; AVX2-NEXT: movl %eax, %esi
10391; AVX2-NEXT: shrl $3, %esi
10392; AVX2-NEXT: andl $64, %esi
10393; AVX2-NEXT: orl %edx, %esi
10394; AVX2-NEXT: movl %eax, %edx
10395; AVX2-NEXT: shrl $5, %edx
10396; AVX2-NEXT: andl $32, %edx
10397; AVX2-NEXT: orl %esi, %edx
10398; AVX2-NEXT: movl %eax, %esi
10399; AVX2-NEXT: shrl $7, %esi
10400; AVX2-NEXT: andl $16, %esi
10401; AVX2-NEXT: orl %edx, %esi
10402; AVX2-NEXT: movl %eax, %edx
10403; AVX2-NEXT: shrl $9, %edx
10404; AVX2-NEXT: andl $8, %edx
10405; AVX2-NEXT: orl %esi, %edx
10406; AVX2-NEXT: movl %eax, %esi
10407; AVX2-NEXT: shrl $11, %esi
10408; AVX2-NEXT: andl $4, %esi
10409; AVX2-NEXT: orl %edx, %esi
10410; AVX2-NEXT: movl %eax, %edx
10411; AVX2-NEXT: shrl $13, %edx
10412; AVX2-NEXT: andl $2, %edx
10413; AVX2-NEXT: orl %esi, %edx
10414; AVX2-NEXT: shrl $15, %eax
10415; AVX2-NEXT: orl %edx, %eax
10416; AVX2-NEXT: orl %ecx, %eax
10417; AVX2-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
10418; AVX2-NEXT: vpextrw $6, %xmm0, %eax
10419; AVX2-NEXT: movl %eax, %ecx
10420; AVX2-NEXT: shll $15, %ecx
10421; AVX2-NEXT: movl %eax, %edx
10422; AVX2-NEXT: andl $2, %edx
10423; AVX2-NEXT: shll $13, %edx
10424; AVX2-NEXT: orl %ecx, %edx
10425; AVX2-NEXT: movl %eax, %ecx
10426; AVX2-NEXT: andl $4, %ecx
10427; AVX2-NEXT: shll $11, %ecx
10428; AVX2-NEXT: orl %edx, %ecx
10429; AVX2-NEXT: movl %eax, %edx
10430; AVX2-NEXT: andl $8, %edx
10431; AVX2-NEXT: shll $9, %edx
10432; AVX2-NEXT: orl %ecx, %edx
10433; AVX2-NEXT: movl %eax, %esi
10434; AVX2-NEXT: andl $16, %esi
10435; AVX2-NEXT: shll $7, %esi
10436; AVX2-NEXT: orl %edx, %esi
10437; AVX2-NEXT: movl %eax, %ecx
10438; AVX2-NEXT: andl $32, %ecx
10439; AVX2-NEXT: shll $5, %ecx
10440; AVX2-NEXT: orl %esi, %ecx
10441; AVX2-NEXT: movl %eax, %edx
10442; AVX2-NEXT: andl $64, %edx
10443; AVX2-NEXT: shll $3, %edx
10444; AVX2-NEXT: leal (%rax,%rax), %esi
10445; AVX2-NEXT: andl $256, %esi # imm = 0x100
10446; AVX2-NEXT: orl %edx, %esi
10447; AVX2-NEXT: movl %eax, %edx
10448; AVX2-NEXT: shrl %edx
10449; AVX2-NEXT: andl $128, %edx
10450; AVX2-NEXT: orl %esi, %edx
10451; AVX2-NEXT: movl %eax, %esi
10452; AVX2-NEXT: shrl $3, %esi
10453; AVX2-NEXT: andl $64, %esi
10454; AVX2-NEXT: orl %edx, %esi
10455; AVX2-NEXT: movl %eax, %edx
10456; AVX2-NEXT: shrl $5, %edx
10457; AVX2-NEXT: andl $32, %edx
10458; AVX2-NEXT: orl %esi, %edx
10459; AVX2-NEXT: movl %eax, %esi
10460; AVX2-NEXT: shrl $7, %esi
10461; AVX2-NEXT: andl $16, %esi
10462; AVX2-NEXT: orl %edx, %esi
10463; AVX2-NEXT: movl %eax, %edx
10464; AVX2-NEXT: shrl $9, %edx
10465; AVX2-NEXT: andl $8, %edx
10466; AVX2-NEXT: orl %esi, %edx
10467; AVX2-NEXT: movl %eax, %esi
10468; AVX2-NEXT: shrl $11, %esi
10469; AVX2-NEXT: andl $4, %esi
10470; AVX2-NEXT: orl %edx, %esi
10471; AVX2-NEXT: movl %eax, %edx
10472; AVX2-NEXT: shrl $13, %edx
10473; AVX2-NEXT: andl $2, %edx
10474; AVX2-NEXT: orl %esi, %edx
10475; AVX2-NEXT: shrl $15, %eax
10476; AVX2-NEXT: orl %edx, %eax
10477; AVX2-NEXT: orl %ecx, %eax
10478; AVX2-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
10479; AVX2-NEXT: vpextrw $7, %xmm0, %eax
10480; AVX2-NEXT: movl %eax, %ecx
10481; AVX2-NEXT: shll $15, %ecx
10482; AVX2-NEXT: movl %eax, %edx
10483; AVX2-NEXT: andl $2, %edx
10484; AVX2-NEXT: shll $13, %edx
10485; AVX2-NEXT: orl %ecx, %edx
10486; AVX2-NEXT: movl %eax, %ecx
10487; AVX2-NEXT: andl $4, %ecx
10488; AVX2-NEXT: shll $11, %ecx
10489; AVX2-NEXT: orl %edx, %ecx
10490; AVX2-NEXT: movl %eax, %edx
10491; AVX2-NEXT: andl $8, %edx
10492; AVX2-NEXT: shll $9, %edx
10493; AVX2-NEXT: orl %ecx, %edx
10494; AVX2-NEXT: movl %eax, %esi
10495; AVX2-NEXT: andl $16, %esi
10496; AVX2-NEXT: shll $7, %esi
10497; AVX2-NEXT: orl %edx, %esi
10498; AVX2-NEXT: movl %eax, %ecx
10499; AVX2-NEXT: andl $32, %ecx
10500; AVX2-NEXT: shll $5, %ecx
10501; AVX2-NEXT: orl %esi, %ecx
10502; AVX2-NEXT: movl %eax, %edx
10503; AVX2-NEXT: andl $64, %edx
10504; AVX2-NEXT: shll $3, %edx
10505; AVX2-NEXT: leal (%rax,%rax), %esi
10506; AVX2-NEXT: andl $256, %esi # imm = 0x100
10507; AVX2-NEXT: orl %edx, %esi
10508; AVX2-NEXT: movl %eax, %edx
10509; AVX2-NEXT: shrl %edx
10510; AVX2-NEXT: andl $128, %edx
10511; AVX2-NEXT: orl %esi, %edx
10512; AVX2-NEXT: movl %eax, %esi
10513; AVX2-NEXT: shrl $3, %esi
10514; AVX2-NEXT: andl $64, %esi
10515; AVX2-NEXT: orl %edx, %esi
10516; AVX2-NEXT: movl %eax, %edx
10517; AVX2-NEXT: shrl $5, %edx
10518; AVX2-NEXT: andl $32, %edx
10519; AVX2-NEXT: orl %esi, %edx
10520; AVX2-NEXT: movl %eax, %esi
10521; AVX2-NEXT: shrl $7, %esi
10522; AVX2-NEXT: andl $16, %esi
10523; AVX2-NEXT: orl %edx, %esi
10524; AVX2-NEXT: movl %eax, %edx
10525; AVX2-NEXT: shrl $9, %edx
10526; AVX2-NEXT: andl $8, %edx
10527; AVX2-NEXT: orl %esi, %edx
10528; AVX2-NEXT: movl %eax, %esi
10529; AVX2-NEXT: shrl $11, %esi
10530; AVX2-NEXT: andl $4, %esi
10531; AVX2-NEXT: orl %edx, %esi
10532; AVX2-NEXT: movl %eax, %edx
10533; AVX2-NEXT: shrl $13, %edx
10534; AVX2-NEXT: andl $2, %edx
10535; AVX2-NEXT: orl %esi, %edx
10536; AVX2-NEXT: shrl $15, %eax
10537; AVX2-NEXT: orl %edx, %eax
10538; AVX2-NEXT: orl %ecx, %eax
10539; AVX2-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0
10540; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
10541; AVX2-NEXT: retq
10542;
Simon Pilgrimb87ffe82016-03-30 14:14:00 +000010543; XOPAVX1-LABEL: test_bitreverse_v16i16:
10544; XOPAVX1: # BB#0:
10545; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
10546; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [81,80,83,82,85,84,87,86,89,88,91,90,93,92,95,94]
10547; XOPAVX1-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm1
10548; XOPAVX1-NEXT: vpperm %xmm2, %xmm0, %xmm0, %xmm0
10549; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
10550; XOPAVX1-NEXT: retq
10551;
10552; XOPAVX2-LABEL: test_bitreverse_v16i16:
10553; XOPAVX2: # BB#0:
10554; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
10555; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [81,80,83,82,85,84,87,86,89,88,91,90,93,92,95,94]
10556; XOPAVX2-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm1
10557; XOPAVX2-NEXT: vpperm %xmm2, %xmm0, %xmm0, %xmm0
10558; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
10559; XOPAVX2-NEXT: retq
10560 %b = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
10561 ret <16 x i16> %b
10562}
10563
Simon Pilgrim2d0104c2016-04-24 15:45:06 +000010564define <8 x i32> @test_bitreverse_v8i32(<8 x i32> %a) nounwind {
10565; SSE-LABEL: test_bitreverse_v8i32:
10566; SSE: # BB#0:
10567; SSE-NEXT: movdqa %xmm0, %xmm2
10568; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[3,1,2,3]
10569; SSE-NEXT: movd %xmm0, %eax
10570; SSE-NEXT: movl %eax, %ecx
10571; SSE-NEXT: shll $31, %ecx
10572; SSE-NEXT: movl %eax, %edx
10573; SSE-NEXT: andl $2, %edx
10574; SSE-NEXT: shll $29, %edx
10575; SSE-NEXT: orl %ecx, %edx
10576; SSE-NEXT: movl %eax, %ecx
10577; SSE-NEXT: andl $4, %ecx
10578; SSE-NEXT: shll $27, %ecx
10579; SSE-NEXT: orl %edx, %ecx
10580; SSE-NEXT: movl %eax, %edx
10581; SSE-NEXT: andl $8, %edx
10582; SSE-NEXT: shll $25, %edx
10583; SSE-NEXT: orl %ecx, %edx
10584; SSE-NEXT: movl %eax, %esi
10585; SSE-NEXT: andl $16, %esi
10586; SSE-NEXT: shll $23, %esi
10587; SSE-NEXT: orl %edx, %esi
10588; SSE-NEXT: movl %eax, %ecx
10589; SSE-NEXT: andl $32, %ecx
10590; SSE-NEXT: shll $21, %ecx
10591; SSE-NEXT: orl %esi, %ecx
10592; SSE-NEXT: movl %eax, %edx
10593; SSE-NEXT: andl $64, %edx
10594; SSE-NEXT: shll $19, %edx
10595; SSE-NEXT: movl %eax, %esi
10596; SSE-NEXT: shll $17, %esi
10597; SSE-NEXT: andl $16777216, %esi # imm = 0x1000000
10598; SSE-NEXT: orl %edx, %esi
10599; SSE-NEXT: movl %eax, %edx
10600; SSE-NEXT: shll $15, %edx
10601; SSE-NEXT: andl $8388608, %edx # imm = 0x800000
10602; SSE-NEXT: orl %esi, %edx
10603; SSE-NEXT: movl %eax, %esi
10604; SSE-NEXT: shll $13, %esi
10605; SSE-NEXT: andl $4194304, %esi # imm = 0x400000
10606; SSE-NEXT: orl %edx, %esi
10607; SSE-NEXT: movl %eax, %edx
10608; SSE-NEXT: shll $11, %edx
10609; SSE-NEXT: andl $2097152, %edx # imm = 0x200000
10610; SSE-NEXT: orl %esi, %edx
10611; SSE-NEXT: movl %eax, %esi
10612; SSE-NEXT: shll $9, %esi
10613; SSE-NEXT: andl $1048576, %esi # imm = 0x100000
10614; SSE-NEXT: orl %edx, %esi
10615; SSE-NEXT: movl %eax, %edx
10616; SSE-NEXT: shll $7, %edx
10617; SSE-NEXT: andl $524288, %edx # imm = 0x80000
10618; SSE-NEXT: orl %esi, %edx
10619; SSE-NEXT: movl %eax, %esi
10620; SSE-NEXT: shll $5, %esi
10621; SSE-NEXT: andl $262144, %esi # imm = 0x40000
10622; SSE-NEXT: orl %edx, %esi
10623; SSE-NEXT: leal (,%rax,8), %edx
10624; SSE-NEXT: andl $131072, %edx # imm = 0x20000
10625; SSE-NEXT: orl %esi, %edx
10626; SSE-NEXT: leal (%rax,%rax), %esi
10627; SSE-NEXT: andl $65536, %esi # imm = 0x10000
10628; SSE-NEXT: orl %edx, %esi
10629; SSE-NEXT: movl %eax, %edx
10630; SSE-NEXT: shrl %edx
10631; SSE-NEXT: andl $32768, %edx # imm = 0x8000
10632; SSE-NEXT: orl %esi, %edx
10633; SSE-NEXT: movl %eax, %esi
10634; SSE-NEXT: shrl $3, %esi
10635; SSE-NEXT: andl $16384, %esi # imm = 0x4000
10636; SSE-NEXT: orl %edx, %esi
10637; SSE-NEXT: movl %eax, %edx
10638; SSE-NEXT: shrl $5, %edx
10639; SSE-NEXT: andl $8192, %edx # imm = 0x2000
10640; SSE-NEXT: orl %esi, %edx
10641; SSE-NEXT: movl %eax, %esi
10642; SSE-NEXT: shrl $7, %esi
10643; SSE-NEXT: andl $4096, %esi # imm = 0x1000
10644; SSE-NEXT: orl %edx, %esi
10645; SSE-NEXT: movl %eax, %edx
10646; SSE-NEXT: shrl $9, %edx
10647; SSE-NEXT: andl $2048, %edx # imm = 0x800
10648; SSE-NEXT: orl %esi, %edx
10649; SSE-NEXT: movl %eax, %esi
10650; SSE-NEXT: shrl $11, %esi
10651; SSE-NEXT: andl $1024, %esi # imm = 0x400
10652; SSE-NEXT: orl %edx, %esi
10653; SSE-NEXT: movl %eax, %edx
10654; SSE-NEXT: shrl $13, %edx
10655; SSE-NEXT: andl $512, %edx # imm = 0x200
10656; SSE-NEXT: orl %esi, %edx
10657; SSE-NEXT: movl %eax, %esi
10658; SSE-NEXT: shrl $15, %esi
10659; SSE-NEXT: andl $256, %esi # imm = 0x100
10660; SSE-NEXT: orl %edx, %esi
10661; SSE-NEXT: movl %eax, %edx
10662; SSE-NEXT: shrl $17, %edx
10663; SSE-NEXT: andl $128, %edx
10664; SSE-NEXT: orl %esi, %edx
10665; SSE-NEXT: movl %eax, %esi
10666; SSE-NEXT: shrl $19, %esi
10667; SSE-NEXT: andl $64, %esi
10668; SSE-NEXT: orl %edx, %esi
10669; SSE-NEXT: movl %eax, %edx
10670; SSE-NEXT: shrl $21, %edx
10671; SSE-NEXT: andl $32, %edx
10672; SSE-NEXT: orl %esi, %edx
10673; SSE-NEXT: movl %eax, %esi
10674; SSE-NEXT: shrl $23, %esi
10675; SSE-NEXT: andl $16, %esi
10676; SSE-NEXT: orl %edx, %esi
10677; SSE-NEXT: movl %eax, %edx
10678; SSE-NEXT: shrl $25, %edx
10679; SSE-NEXT: andl $8, %edx
10680; SSE-NEXT: orl %esi, %edx
10681; SSE-NEXT: movl %eax, %esi
10682; SSE-NEXT: shrl $27, %esi
10683; SSE-NEXT: andl $4, %esi
10684; SSE-NEXT: orl %edx, %esi
10685; SSE-NEXT: movl %eax, %edx
10686; SSE-NEXT: shrl $29, %edx
10687; SSE-NEXT: andl $2, %edx
10688; SSE-NEXT: orl %esi, %edx
10689; SSE-NEXT: shrl $31, %eax
10690; SSE-NEXT: orl %edx, %eax
10691; SSE-NEXT: orl %ecx, %eax
10692; SSE-NEXT: movd %eax, %xmm0
10693; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,2,3]
10694; SSE-NEXT: movd %xmm3, %eax
10695; SSE-NEXT: movl %eax, %ecx
10696; SSE-NEXT: shll $31, %ecx
10697; SSE-NEXT: movl %eax, %edx
10698; SSE-NEXT: andl $2, %edx
10699; SSE-NEXT: shll $29, %edx
10700; SSE-NEXT: orl %ecx, %edx
10701; SSE-NEXT: movl %eax, %ecx
10702; SSE-NEXT: andl $4, %ecx
10703; SSE-NEXT: shll $27, %ecx
10704; SSE-NEXT: orl %edx, %ecx
10705; SSE-NEXT: movl %eax, %edx
10706; SSE-NEXT: andl $8, %edx
10707; SSE-NEXT: shll $25, %edx
10708; SSE-NEXT: orl %ecx, %edx
10709; SSE-NEXT: movl %eax, %esi
10710; SSE-NEXT: andl $16, %esi
10711; SSE-NEXT: shll $23, %esi
10712; SSE-NEXT: orl %edx, %esi
10713; SSE-NEXT: movl %eax, %ecx
10714; SSE-NEXT: andl $32, %ecx
10715; SSE-NEXT: shll $21, %ecx
10716; SSE-NEXT: orl %esi, %ecx
10717; SSE-NEXT: movl %eax, %edx
10718; SSE-NEXT: andl $64, %edx
10719; SSE-NEXT: shll $19, %edx
10720; SSE-NEXT: movl %eax, %esi
10721; SSE-NEXT: shll $17, %esi
10722; SSE-NEXT: andl $16777216, %esi # imm = 0x1000000
10723; SSE-NEXT: orl %edx, %esi
10724; SSE-NEXT: movl %eax, %edx
10725; SSE-NEXT: shll $15, %edx
10726; SSE-NEXT: andl $8388608, %edx # imm = 0x800000
10727; SSE-NEXT: orl %esi, %edx
10728; SSE-NEXT: movl %eax, %esi
10729; SSE-NEXT: shll $13, %esi
10730; SSE-NEXT: andl $4194304, %esi # imm = 0x400000
10731; SSE-NEXT: orl %edx, %esi
10732; SSE-NEXT: movl %eax, %edx
10733; SSE-NEXT: shll $11, %edx
10734; SSE-NEXT: andl $2097152, %edx # imm = 0x200000
10735; SSE-NEXT: orl %esi, %edx
10736; SSE-NEXT: movl %eax, %esi
10737; SSE-NEXT: shll $9, %esi
10738; SSE-NEXT: andl $1048576, %esi # imm = 0x100000
10739; SSE-NEXT: orl %edx, %esi
10740; SSE-NEXT: movl %eax, %edx
10741; SSE-NEXT: shll $7, %edx
10742; SSE-NEXT: andl $524288, %edx # imm = 0x80000
10743; SSE-NEXT: orl %esi, %edx
10744; SSE-NEXT: movl %eax, %esi
10745; SSE-NEXT: shll $5, %esi
10746; SSE-NEXT: andl $262144, %esi # imm = 0x40000
10747; SSE-NEXT: orl %edx, %esi
10748; SSE-NEXT: leal (,%rax,8), %edx
10749; SSE-NEXT: andl $131072, %edx # imm = 0x20000
10750; SSE-NEXT: orl %esi, %edx
10751; SSE-NEXT: leal (%rax,%rax), %esi
10752; SSE-NEXT: andl $65536, %esi # imm = 0x10000
10753; SSE-NEXT: orl %edx, %esi
10754; SSE-NEXT: movl %eax, %edx
10755; SSE-NEXT: shrl %edx
10756; SSE-NEXT: andl $32768, %edx # imm = 0x8000
10757; SSE-NEXT: orl %esi, %edx
10758; SSE-NEXT: movl %eax, %esi
10759; SSE-NEXT: shrl $3, %esi
10760; SSE-NEXT: andl $16384, %esi # imm = 0x4000
10761; SSE-NEXT: orl %edx, %esi
10762; SSE-NEXT: movl %eax, %edx
10763; SSE-NEXT: shrl $5, %edx
10764; SSE-NEXT: andl $8192, %edx # imm = 0x2000
10765; SSE-NEXT: orl %esi, %edx
10766; SSE-NEXT: movl %eax, %esi
10767; SSE-NEXT: shrl $7, %esi
10768; SSE-NEXT: andl $4096, %esi # imm = 0x1000
10769; SSE-NEXT: orl %edx, %esi
10770; SSE-NEXT: movl %eax, %edx
10771; SSE-NEXT: shrl $9, %edx
10772; SSE-NEXT: andl $2048, %edx # imm = 0x800
10773; SSE-NEXT: orl %esi, %edx
10774; SSE-NEXT: movl %eax, %esi
10775; SSE-NEXT: shrl $11, %esi
10776; SSE-NEXT: andl $1024, %esi # imm = 0x400
10777; SSE-NEXT: orl %edx, %esi
10778; SSE-NEXT: movl %eax, %edx
10779; SSE-NEXT: shrl $13, %edx
10780; SSE-NEXT: andl $512, %edx # imm = 0x200
10781; SSE-NEXT: orl %esi, %edx
10782; SSE-NEXT: movl %eax, %esi
10783; SSE-NEXT: shrl $15, %esi
10784; SSE-NEXT: andl $256, %esi # imm = 0x100
10785; SSE-NEXT: orl %edx, %esi
10786; SSE-NEXT: movl %eax, %edx
10787; SSE-NEXT: shrl $17, %edx
10788; SSE-NEXT: andl $128, %edx
10789; SSE-NEXT: orl %esi, %edx
10790; SSE-NEXT: movl %eax, %esi
10791; SSE-NEXT: shrl $19, %esi
10792; SSE-NEXT: andl $64, %esi
10793; SSE-NEXT: orl %edx, %esi
10794; SSE-NEXT: movl %eax, %edx
10795; SSE-NEXT: shrl $21, %edx
10796; SSE-NEXT: andl $32, %edx
10797; SSE-NEXT: orl %esi, %edx
10798; SSE-NEXT: movl %eax, %esi
10799; SSE-NEXT: shrl $23, %esi
10800; SSE-NEXT: andl $16, %esi
10801; SSE-NEXT: orl %edx, %esi
10802; SSE-NEXT: movl %eax, %edx
10803; SSE-NEXT: shrl $25, %edx
10804; SSE-NEXT: andl $8, %edx
10805; SSE-NEXT: orl %esi, %edx
10806; SSE-NEXT: movl %eax, %esi
10807; SSE-NEXT: shrl $27, %esi
10808; SSE-NEXT: andl $4, %esi
10809; SSE-NEXT: orl %edx, %esi
10810; SSE-NEXT: movl %eax, %edx
10811; SSE-NEXT: shrl $29, %edx
10812; SSE-NEXT: andl $2, %edx
10813; SSE-NEXT: orl %esi, %edx
10814; SSE-NEXT: shrl $31, %eax
10815; SSE-NEXT: orl %edx, %eax
10816; SSE-NEXT: orl %ecx, %eax
10817; SSE-NEXT: movd %eax, %xmm3
10818; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
10819; SSE-NEXT: movd %xmm2, %eax
10820; SSE-NEXT: movl %eax, %ecx
10821; SSE-NEXT: shll $31, %ecx
10822; SSE-NEXT: movl %eax, %edx
10823; SSE-NEXT: andl $2, %edx
10824; SSE-NEXT: shll $29, %edx
10825; SSE-NEXT: orl %ecx, %edx
10826; SSE-NEXT: movl %eax, %ecx
10827; SSE-NEXT: andl $4, %ecx
10828; SSE-NEXT: shll $27, %ecx
10829; SSE-NEXT: orl %edx, %ecx
10830; SSE-NEXT: movl %eax, %edx
10831; SSE-NEXT: andl $8, %edx
10832; SSE-NEXT: shll $25, %edx
10833; SSE-NEXT: orl %ecx, %edx
10834; SSE-NEXT: movl %eax, %esi
10835; SSE-NEXT: andl $16, %esi
10836; SSE-NEXT: shll $23, %esi
10837; SSE-NEXT: orl %edx, %esi
10838; SSE-NEXT: movl %eax, %ecx
10839; SSE-NEXT: andl $32, %ecx
10840; SSE-NEXT: shll $21, %ecx
10841; SSE-NEXT: orl %esi, %ecx
10842; SSE-NEXT: movl %eax, %edx
10843; SSE-NEXT: andl $64, %edx
10844; SSE-NEXT: shll $19, %edx
10845; SSE-NEXT: movl %eax, %esi
10846; SSE-NEXT: shll $17, %esi
10847; SSE-NEXT: andl $16777216, %esi # imm = 0x1000000
10848; SSE-NEXT: orl %edx, %esi
10849; SSE-NEXT: movl %eax, %edx
10850; SSE-NEXT: shll $15, %edx
10851; SSE-NEXT: andl $8388608, %edx # imm = 0x800000
10852; SSE-NEXT: orl %esi, %edx
10853; SSE-NEXT: movl %eax, %esi
10854; SSE-NEXT: shll $13, %esi
10855; SSE-NEXT: andl $4194304, %esi # imm = 0x400000
10856; SSE-NEXT: orl %edx, %esi
10857; SSE-NEXT: movl %eax, %edx
10858; SSE-NEXT: shll $11, %edx
10859; SSE-NEXT: andl $2097152, %edx # imm = 0x200000
10860; SSE-NEXT: orl %esi, %edx
10861; SSE-NEXT: movl %eax, %esi
10862; SSE-NEXT: shll $9, %esi
10863; SSE-NEXT: andl $1048576, %esi # imm = 0x100000
10864; SSE-NEXT: orl %edx, %esi
10865; SSE-NEXT: movl %eax, %edx
10866; SSE-NEXT: shll $7, %edx
10867; SSE-NEXT: andl $524288, %edx # imm = 0x80000
10868; SSE-NEXT: orl %esi, %edx
10869; SSE-NEXT: movl %eax, %esi
10870; SSE-NEXT: shll $5, %esi
10871; SSE-NEXT: andl $262144, %esi # imm = 0x40000
10872; SSE-NEXT: orl %edx, %esi
10873; SSE-NEXT: leal (,%rax,8), %edx
10874; SSE-NEXT: andl $131072, %edx # imm = 0x20000
10875; SSE-NEXT: orl %esi, %edx
10876; SSE-NEXT: leal (%rax,%rax), %esi
10877; SSE-NEXT: andl $65536, %esi # imm = 0x10000
10878; SSE-NEXT: orl %edx, %esi
10879; SSE-NEXT: movl %eax, %edx
10880; SSE-NEXT: shrl %edx
10881; SSE-NEXT: andl $32768, %edx # imm = 0x8000
10882; SSE-NEXT: orl %esi, %edx
10883; SSE-NEXT: movl %eax, %esi
10884; SSE-NEXT: shrl $3, %esi
10885; SSE-NEXT: andl $16384, %esi # imm = 0x4000
10886; SSE-NEXT: orl %edx, %esi
10887; SSE-NEXT: movl %eax, %edx
10888; SSE-NEXT: shrl $5, %edx
10889; SSE-NEXT: andl $8192, %edx # imm = 0x2000
10890; SSE-NEXT: orl %esi, %edx
10891; SSE-NEXT: movl %eax, %esi
10892; SSE-NEXT: shrl $7, %esi
10893; SSE-NEXT: andl $4096, %esi # imm = 0x1000
10894; SSE-NEXT: orl %edx, %esi
10895; SSE-NEXT: movl %eax, %edx
10896; SSE-NEXT: shrl $9, %edx
10897; SSE-NEXT: andl $2048, %edx # imm = 0x800
10898; SSE-NEXT: orl %esi, %edx
10899; SSE-NEXT: movl %eax, %esi
10900; SSE-NEXT: shrl $11, %esi
10901; SSE-NEXT: andl $1024, %esi # imm = 0x400
10902; SSE-NEXT: orl %edx, %esi
10903; SSE-NEXT: movl %eax, %edx
10904; SSE-NEXT: shrl $13, %edx
10905; SSE-NEXT: andl $512, %edx # imm = 0x200
10906; SSE-NEXT: orl %esi, %edx
10907; SSE-NEXT: movl %eax, %esi
10908; SSE-NEXT: shrl $15, %esi
10909; SSE-NEXT: andl $256, %esi # imm = 0x100
10910; SSE-NEXT: orl %edx, %esi
10911; SSE-NEXT: movl %eax, %edx
10912; SSE-NEXT: shrl $17, %edx
10913; SSE-NEXT: andl $128, %edx
10914; SSE-NEXT: orl %esi, %edx
10915; SSE-NEXT: movl %eax, %esi
10916; SSE-NEXT: shrl $19, %esi
10917; SSE-NEXT: andl $64, %esi
10918; SSE-NEXT: orl %edx, %esi
10919; SSE-NEXT: movl %eax, %edx
10920; SSE-NEXT: shrl $21, %edx
10921; SSE-NEXT: andl $32, %edx
10922; SSE-NEXT: orl %esi, %edx
10923; SSE-NEXT: movl %eax, %esi
10924; SSE-NEXT: shrl $23, %esi
10925; SSE-NEXT: andl $16, %esi
10926; SSE-NEXT: orl %edx, %esi
10927; SSE-NEXT: movl %eax, %edx
10928; SSE-NEXT: shrl $25, %edx
10929; SSE-NEXT: andl $8, %edx
10930; SSE-NEXT: orl %esi, %edx
10931; SSE-NEXT: movl %eax, %esi
10932; SSE-NEXT: shrl $27, %esi
10933; SSE-NEXT: andl $4, %esi
10934; SSE-NEXT: orl %edx, %esi
10935; SSE-NEXT: movl %eax, %edx
10936; SSE-NEXT: shrl $29, %edx
10937; SSE-NEXT: andl $2, %edx
10938; SSE-NEXT: orl %esi, %edx
10939; SSE-NEXT: shrl $31, %eax
10940; SSE-NEXT: orl %edx, %eax
10941; SSE-NEXT: orl %ecx, %eax
10942; SSE-NEXT: movd %eax, %xmm0
10943; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
10944; SSE-NEXT: movd %xmm2, %eax
10945; SSE-NEXT: movl %eax, %ecx
10946; SSE-NEXT: shll $31, %ecx
10947; SSE-NEXT: movl %eax, %edx
10948; SSE-NEXT: andl $2, %edx
10949; SSE-NEXT: shll $29, %edx
10950; SSE-NEXT: orl %ecx, %edx
10951; SSE-NEXT: movl %eax, %ecx
10952; SSE-NEXT: andl $4, %ecx
10953; SSE-NEXT: shll $27, %ecx
10954; SSE-NEXT: orl %edx, %ecx
10955; SSE-NEXT: movl %eax, %edx
10956; SSE-NEXT: andl $8, %edx
10957; SSE-NEXT: shll $25, %edx
10958; SSE-NEXT: orl %ecx, %edx
10959; SSE-NEXT: movl %eax, %esi
10960; SSE-NEXT: andl $16, %esi
10961; SSE-NEXT: shll $23, %esi
10962; SSE-NEXT: orl %edx, %esi
10963; SSE-NEXT: movl %eax, %ecx
10964; SSE-NEXT: andl $32, %ecx
10965; SSE-NEXT: shll $21, %ecx
10966; SSE-NEXT: orl %esi, %ecx
10967; SSE-NEXT: movl %eax, %edx
10968; SSE-NEXT: andl $64, %edx
10969; SSE-NEXT: shll $19, %edx
10970; SSE-NEXT: movl %eax, %esi
10971; SSE-NEXT: shll $17, %esi
10972; SSE-NEXT: andl $16777216, %esi # imm = 0x1000000
10973; SSE-NEXT: orl %edx, %esi
10974; SSE-NEXT: movl %eax, %edx
10975; SSE-NEXT: shll $15, %edx
10976; SSE-NEXT: andl $8388608, %edx # imm = 0x800000
10977; SSE-NEXT: orl %esi, %edx
10978; SSE-NEXT: movl %eax, %esi
10979; SSE-NEXT: shll $13, %esi
10980; SSE-NEXT: andl $4194304, %esi # imm = 0x400000
10981; SSE-NEXT: orl %edx, %esi
10982; SSE-NEXT: movl %eax, %edx
10983; SSE-NEXT: shll $11, %edx
10984; SSE-NEXT: andl $2097152, %edx # imm = 0x200000
10985; SSE-NEXT: orl %esi, %edx
10986; SSE-NEXT: movl %eax, %esi
10987; SSE-NEXT: shll $9, %esi
10988; SSE-NEXT: andl $1048576, %esi # imm = 0x100000
10989; SSE-NEXT: orl %edx, %esi
10990; SSE-NEXT: movl %eax, %edx
10991; SSE-NEXT: shll $7, %edx
10992; SSE-NEXT: andl $524288, %edx # imm = 0x80000
10993; SSE-NEXT: orl %esi, %edx
10994; SSE-NEXT: movl %eax, %esi
10995; SSE-NEXT: shll $5, %esi
10996; SSE-NEXT: andl $262144, %esi # imm = 0x40000
10997; SSE-NEXT: orl %edx, %esi
10998; SSE-NEXT: leal (,%rax,8), %edx
10999; SSE-NEXT: andl $131072, %edx # imm = 0x20000
11000; SSE-NEXT: orl %esi, %edx
11001; SSE-NEXT: leal (%rax,%rax), %esi
11002; SSE-NEXT: andl $65536, %esi # imm = 0x10000
11003; SSE-NEXT: orl %edx, %esi
11004; SSE-NEXT: movl %eax, %edx
11005; SSE-NEXT: shrl %edx
11006; SSE-NEXT: andl $32768, %edx # imm = 0x8000
11007; SSE-NEXT: orl %esi, %edx
11008; SSE-NEXT: movl %eax, %esi
11009; SSE-NEXT: shrl $3, %esi
11010; SSE-NEXT: andl $16384, %esi # imm = 0x4000
11011; SSE-NEXT: orl %edx, %esi
11012; SSE-NEXT: movl %eax, %edx
11013; SSE-NEXT: shrl $5, %edx
11014; SSE-NEXT: andl $8192, %edx # imm = 0x2000
11015; SSE-NEXT: orl %esi, %edx
11016; SSE-NEXT: movl %eax, %esi
11017; SSE-NEXT: shrl $7, %esi
11018; SSE-NEXT: andl $4096, %esi # imm = 0x1000
11019; SSE-NEXT: orl %edx, %esi
11020; SSE-NEXT: movl %eax, %edx
11021; SSE-NEXT: shrl $9, %edx
11022; SSE-NEXT: andl $2048, %edx # imm = 0x800
11023; SSE-NEXT: orl %esi, %edx
11024; SSE-NEXT: movl %eax, %esi
11025; SSE-NEXT: shrl $11, %esi
11026; SSE-NEXT: andl $1024, %esi # imm = 0x400
11027; SSE-NEXT: orl %edx, %esi
11028; SSE-NEXT: movl %eax, %edx
11029; SSE-NEXT: shrl $13, %edx
11030; SSE-NEXT: andl $512, %edx # imm = 0x200
11031; SSE-NEXT: orl %esi, %edx
11032; SSE-NEXT: movl %eax, %esi
11033; SSE-NEXT: shrl $15, %esi
11034; SSE-NEXT: andl $256, %esi # imm = 0x100
11035; SSE-NEXT: orl %edx, %esi
11036; SSE-NEXT: movl %eax, %edx
11037; SSE-NEXT: shrl $17, %edx
11038; SSE-NEXT: andl $128, %edx
11039; SSE-NEXT: orl %esi, %edx
11040; SSE-NEXT: movl %eax, %esi
11041; SSE-NEXT: shrl $19, %esi
11042; SSE-NEXT: andl $64, %esi
11043; SSE-NEXT: orl %edx, %esi
11044; SSE-NEXT: movl %eax, %edx
11045; SSE-NEXT: shrl $21, %edx
11046; SSE-NEXT: andl $32, %edx
11047; SSE-NEXT: orl %esi, %edx
11048; SSE-NEXT: movl %eax, %esi
11049; SSE-NEXT: shrl $23, %esi
11050; SSE-NEXT: andl $16, %esi
11051; SSE-NEXT: orl %edx, %esi
11052; SSE-NEXT: movl %eax, %edx
11053; SSE-NEXT: shrl $25, %edx
11054; SSE-NEXT: andl $8, %edx
11055; SSE-NEXT: orl %esi, %edx
11056; SSE-NEXT: movl %eax, %esi
11057; SSE-NEXT: shrl $27, %esi
11058; SSE-NEXT: andl $4, %esi
11059; SSE-NEXT: orl %edx, %esi
11060; SSE-NEXT: movl %eax, %edx
11061; SSE-NEXT: shrl $29, %edx
11062; SSE-NEXT: andl $2, %edx
11063; SSE-NEXT: orl %esi, %edx
11064; SSE-NEXT: shrl $31, %eax
11065; SSE-NEXT: orl %edx, %eax
11066; SSE-NEXT: orl %ecx, %eax
11067; SSE-NEXT: movd %eax, %xmm2
11068; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
11069; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
11070; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3]
11071; SSE-NEXT: movd %xmm2, %eax
11072; SSE-NEXT: movl %eax, %ecx
11073; SSE-NEXT: shll $31, %ecx
11074; SSE-NEXT: movl %eax, %edx
11075; SSE-NEXT: andl $2, %edx
11076; SSE-NEXT: shll $29, %edx
11077; SSE-NEXT: orl %ecx, %edx
11078; SSE-NEXT: movl %eax, %ecx
11079; SSE-NEXT: andl $4, %ecx
11080; SSE-NEXT: shll $27, %ecx
11081; SSE-NEXT: orl %edx, %ecx
11082; SSE-NEXT: movl %eax, %edx
11083; SSE-NEXT: andl $8, %edx
11084; SSE-NEXT: shll $25, %edx
11085; SSE-NEXT: orl %ecx, %edx
11086; SSE-NEXT: movl %eax, %esi
11087; SSE-NEXT: andl $16, %esi
11088; SSE-NEXT: shll $23, %esi
11089; SSE-NEXT: orl %edx, %esi
11090; SSE-NEXT: movl %eax, %ecx
11091; SSE-NEXT: andl $32, %ecx
11092; SSE-NEXT: shll $21, %ecx
11093; SSE-NEXT: orl %esi, %ecx
11094; SSE-NEXT: movl %eax, %edx
11095; SSE-NEXT: andl $64, %edx
11096; SSE-NEXT: shll $19, %edx
11097; SSE-NEXT: movl %eax, %esi
11098; SSE-NEXT: shll $17, %esi
11099; SSE-NEXT: andl $16777216, %esi # imm = 0x1000000
11100; SSE-NEXT: orl %edx, %esi
11101; SSE-NEXT: movl %eax, %edx
11102; SSE-NEXT: shll $15, %edx
11103; SSE-NEXT: andl $8388608, %edx # imm = 0x800000
11104; SSE-NEXT: orl %esi, %edx
11105; SSE-NEXT: movl %eax, %esi
11106; SSE-NEXT: shll $13, %esi
11107; SSE-NEXT: andl $4194304, %esi # imm = 0x400000
11108; SSE-NEXT: orl %edx, %esi
11109; SSE-NEXT: movl %eax, %edx
11110; SSE-NEXT: shll $11, %edx
11111; SSE-NEXT: andl $2097152, %edx # imm = 0x200000
11112; SSE-NEXT: orl %esi, %edx
11113; SSE-NEXT: movl %eax, %esi
11114; SSE-NEXT: shll $9, %esi
11115; SSE-NEXT: andl $1048576, %esi # imm = 0x100000
11116; SSE-NEXT: orl %edx, %esi
11117; SSE-NEXT: movl %eax, %edx
11118; SSE-NEXT: shll $7, %edx
11119; SSE-NEXT: andl $524288, %edx # imm = 0x80000
11120; SSE-NEXT: orl %esi, %edx
11121; SSE-NEXT: movl %eax, %esi
11122; SSE-NEXT: shll $5, %esi
11123; SSE-NEXT: andl $262144, %esi # imm = 0x40000
11124; SSE-NEXT: orl %edx, %esi
11125; SSE-NEXT: leal (,%rax,8), %edx
11126; SSE-NEXT: andl $131072, %edx # imm = 0x20000
11127; SSE-NEXT: orl %esi, %edx
11128; SSE-NEXT: leal (%rax,%rax), %esi
11129; SSE-NEXT: andl $65536, %esi # imm = 0x10000
11130; SSE-NEXT: orl %edx, %esi
11131; SSE-NEXT: movl %eax, %edx
11132; SSE-NEXT: shrl %edx
11133; SSE-NEXT: andl $32768, %edx # imm = 0x8000
11134; SSE-NEXT: orl %esi, %edx
11135; SSE-NEXT: movl %eax, %esi
11136; SSE-NEXT: shrl $3, %esi
11137; SSE-NEXT: andl $16384, %esi # imm = 0x4000
11138; SSE-NEXT: orl %edx, %esi
11139; SSE-NEXT: movl %eax, %edx
11140; SSE-NEXT: shrl $5, %edx
11141; SSE-NEXT: andl $8192, %edx # imm = 0x2000
11142; SSE-NEXT: orl %esi, %edx
11143; SSE-NEXT: movl %eax, %esi
11144; SSE-NEXT: shrl $7, %esi
11145; SSE-NEXT: andl $4096, %esi # imm = 0x1000
11146; SSE-NEXT: orl %edx, %esi
11147; SSE-NEXT: movl %eax, %edx
11148; SSE-NEXT: shrl $9, %edx
11149; SSE-NEXT: andl $2048, %edx # imm = 0x800
11150; SSE-NEXT: orl %esi, %edx
11151; SSE-NEXT: movl %eax, %esi
11152; SSE-NEXT: shrl $11, %esi
11153; SSE-NEXT: andl $1024, %esi # imm = 0x400
11154; SSE-NEXT: orl %edx, %esi
11155; SSE-NEXT: movl %eax, %edx
11156; SSE-NEXT: shrl $13, %edx
11157; SSE-NEXT: andl $512, %edx # imm = 0x200
11158; SSE-NEXT: orl %esi, %edx
11159; SSE-NEXT: movl %eax, %esi
11160; SSE-NEXT: shrl $15, %esi
11161; SSE-NEXT: andl $256, %esi # imm = 0x100
11162; SSE-NEXT: orl %edx, %esi
11163; SSE-NEXT: movl %eax, %edx
11164; SSE-NEXT: shrl $17, %edx
11165; SSE-NEXT: andl $128, %edx
11166; SSE-NEXT: orl %esi, %edx
11167; SSE-NEXT: movl %eax, %esi
11168; SSE-NEXT: shrl $19, %esi
11169; SSE-NEXT: andl $64, %esi
11170; SSE-NEXT: orl %edx, %esi
11171; SSE-NEXT: movl %eax, %edx
11172; SSE-NEXT: shrl $21, %edx
11173; SSE-NEXT: andl $32, %edx
11174; SSE-NEXT: orl %esi, %edx
11175; SSE-NEXT: movl %eax, %esi
11176; SSE-NEXT: shrl $23, %esi
11177; SSE-NEXT: andl $16, %esi
11178; SSE-NEXT: orl %edx, %esi
11179; SSE-NEXT: movl %eax, %edx
11180; SSE-NEXT: shrl $25, %edx
11181; SSE-NEXT: andl $8, %edx
11182; SSE-NEXT: orl %esi, %edx
11183; SSE-NEXT: movl %eax, %esi
11184; SSE-NEXT: shrl $27, %esi
11185; SSE-NEXT: andl $4, %esi
11186; SSE-NEXT: orl %edx, %esi
11187; SSE-NEXT: movl %eax, %edx
11188; SSE-NEXT: shrl $29, %edx
11189; SSE-NEXT: andl $2, %edx
11190; SSE-NEXT: orl %esi, %edx
11191; SSE-NEXT: shrl $31, %eax
11192; SSE-NEXT: orl %edx, %eax
11193; SSE-NEXT: orl %ecx, %eax
11194; SSE-NEXT: movd %eax, %xmm2
11195; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
11196; SSE-NEXT: movd %xmm3, %eax
11197; SSE-NEXT: movl %eax, %ecx
11198; SSE-NEXT: shll $31, %ecx
11199; SSE-NEXT: movl %eax, %edx
11200; SSE-NEXT: andl $2, %edx
11201; SSE-NEXT: shll $29, %edx
11202; SSE-NEXT: orl %ecx, %edx
11203; SSE-NEXT: movl %eax, %ecx
11204; SSE-NEXT: andl $4, %ecx
11205; SSE-NEXT: shll $27, %ecx
11206; SSE-NEXT: orl %edx, %ecx
11207; SSE-NEXT: movl %eax, %edx
11208; SSE-NEXT: andl $8, %edx
11209; SSE-NEXT: shll $25, %edx
11210; SSE-NEXT: orl %ecx, %edx
11211; SSE-NEXT: movl %eax, %esi
11212; SSE-NEXT: andl $16, %esi
11213; SSE-NEXT: shll $23, %esi
11214; SSE-NEXT: orl %edx, %esi
11215; SSE-NEXT: movl %eax, %ecx
11216; SSE-NEXT: andl $32, %ecx
11217; SSE-NEXT: shll $21, %ecx
11218; SSE-NEXT: orl %esi, %ecx
11219; SSE-NEXT: movl %eax, %edx
11220; SSE-NEXT: andl $64, %edx
11221; SSE-NEXT: shll $19, %edx
11222; SSE-NEXT: movl %eax, %esi
11223; SSE-NEXT: shll $17, %esi
11224; SSE-NEXT: andl $16777216, %esi # imm = 0x1000000
11225; SSE-NEXT: orl %edx, %esi
11226; SSE-NEXT: movl %eax, %edx
11227; SSE-NEXT: shll $15, %edx
11228; SSE-NEXT: andl $8388608, %edx # imm = 0x800000
11229; SSE-NEXT: orl %esi, %edx
11230; SSE-NEXT: movl %eax, %esi
11231; SSE-NEXT: shll $13, %esi
11232; SSE-NEXT: andl $4194304, %esi # imm = 0x400000
11233; SSE-NEXT: orl %edx, %esi
11234; SSE-NEXT: movl %eax, %edx
11235; SSE-NEXT: shll $11, %edx
11236; SSE-NEXT: andl $2097152, %edx # imm = 0x200000
11237; SSE-NEXT: orl %esi, %edx
11238; SSE-NEXT: movl %eax, %esi
11239; SSE-NEXT: shll $9, %esi
11240; SSE-NEXT: andl $1048576, %esi # imm = 0x100000
11241; SSE-NEXT: orl %edx, %esi
11242; SSE-NEXT: movl %eax, %edx
11243; SSE-NEXT: shll $7, %edx
11244; SSE-NEXT: andl $524288, %edx # imm = 0x80000
11245; SSE-NEXT: orl %esi, %edx
11246; SSE-NEXT: movl %eax, %esi
11247; SSE-NEXT: shll $5, %esi
11248; SSE-NEXT: andl $262144, %esi # imm = 0x40000
11249; SSE-NEXT: orl %edx, %esi
11250; SSE-NEXT: leal (,%rax,8), %edx
11251; SSE-NEXT: andl $131072, %edx # imm = 0x20000
11252; SSE-NEXT: orl %esi, %edx
11253; SSE-NEXT: leal (%rax,%rax), %esi
11254; SSE-NEXT: andl $65536, %esi # imm = 0x10000
11255; SSE-NEXT: orl %edx, %esi
11256; SSE-NEXT: movl %eax, %edx
11257; SSE-NEXT: shrl %edx
11258; SSE-NEXT: andl $32768, %edx # imm = 0x8000
11259; SSE-NEXT: orl %esi, %edx
11260; SSE-NEXT: movl %eax, %esi
11261; SSE-NEXT: shrl $3, %esi
11262; SSE-NEXT: andl $16384, %esi # imm = 0x4000
11263; SSE-NEXT: orl %edx, %esi
11264; SSE-NEXT: movl %eax, %edx
11265; SSE-NEXT: shrl $5, %edx
11266; SSE-NEXT: andl $8192, %edx # imm = 0x2000
11267; SSE-NEXT: orl %esi, %edx
11268; SSE-NEXT: movl %eax, %esi
11269; SSE-NEXT: shrl $7, %esi
11270; SSE-NEXT: andl $4096, %esi # imm = 0x1000
11271; SSE-NEXT: orl %edx, %esi
11272; SSE-NEXT: movl %eax, %edx
11273; SSE-NEXT: shrl $9, %edx
11274; SSE-NEXT: andl $2048, %edx # imm = 0x800
11275; SSE-NEXT: orl %esi, %edx
11276; SSE-NEXT: movl %eax, %esi
11277; SSE-NEXT: shrl $11, %esi
11278; SSE-NEXT: andl $1024, %esi # imm = 0x400
11279; SSE-NEXT: orl %edx, %esi
11280; SSE-NEXT: movl %eax, %edx
11281; SSE-NEXT: shrl $13, %edx
11282; SSE-NEXT: andl $512, %edx # imm = 0x200
11283; SSE-NEXT: orl %esi, %edx
11284; SSE-NEXT: movl %eax, %esi
11285; SSE-NEXT: shrl $15, %esi
11286; SSE-NEXT: andl $256, %esi # imm = 0x100
11287; SSE-NEXT: orl %edx, %esi
11288; SSE-NEXT: movl %eax, %edx
11289; SSE-NEXT: shrl $17, %edx
11290; SSE-NEXT: andl $128, %edx
11291; SSE-NEXT: orl %esi, %edx
11292; SSE-NEXT: movl %eax, %esi
11293; SSE-NEXT: shrl $19, %esi
11294; SSE-NEXT: andl $64, %esi
11295; SSE-NEXT: orl %edx, %esi
11296; SSE-NEXT: movl %eax, %edx
11297; SSE-NEXT: shrl $21, %edx
11298; SSE-NEXT: andl $32, %edx
11299; SSE-NEXT: orl %esi, %edx
11300; SSE-NEXT: movl %eax, %esi
11301; SSE-NEXT: shrl $23, %esi
11302; SSE-NEXT: andl $16, %esi
11303; SSE-NEXT: orl %edx, %esi
11304; SSE-NEXT: movl %eax, %edx
11305; SSE-NEXT: shrl $25, %edx
11306; SSE-NEXT: andl $8, %edx
11307; SSE-NEXT: orl %esi, %edx
11308; SSE-NEXT: movl %eax, %esi
11309; SSE-NEXT: shrl $27, %esi
11310; SSE-NEXT: andl $4, %esi
11311; SSE-NEXT: orl %edx, %esi
11312; SSE-NEXT: movl %eax, %edx
11313; SSE-NEXT: shrl $29, %edx
11314; SSE-NEXT: andl $2, %edx
11315; SSE-NEXT: orl %esi, %edx
11316; SSE-NEXT: shrl $31, %eax
11317; SSE-NEXT: orl %edx, %eax
11318; SSE-NEXT: orl %ecx, %eax
11319; SSE-NEXT: movd %eax, %xmm3
11320; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
11321; SSE-NEXT: movd %xmm1, %eax
11322; SSE-NEXT: movl %eax, %ecx
11323; SSE-NEXT: shll $31, %ecx
11324; SSE-NEXT: movl %eax, %edx
11325; SSE-NEXT: andl $2, %edx
11326; SSE-NEXT: shll $29, %edx
11327; SSE-NEXT: orl %ecx, %edx
11328; SSE-NEXT: movl %eax, %ecx
11329; SSE-NEXT: andl $4, %ecx
11330; SSE-NEXT: shll $27, %ecx
11331; SSE-NEXT: orl %edx, %ecx
11332; SSE-NEXT: movl %eax, %edx
11333; SSE-NEXT: andl $8, %edx
11334; SSE-NEXT: shll $25, %edx
11335; SSE-NEXT: orl %ecx, %edx
11336; SSE-NEXT: movl %eax, %esi
11337; SSE-NEXT: andl $16, %esi
11338; SSE-NEXT: shll $23, %esi
11339; SSE-NEXT: orl %edx, %esi
11340; SSE-NEXT: movl %eax, %ecx
11341; SSE-NEXT: andl $32, %ecx
11342; SSE-NEXT: shll $21, %ecx
11343; SSE-NEXT: orl %esi, %ecx
11344; SSE-NEXT: movl %eax, %edx
11345; SSE-NEXT: andl $64, %edx
11346; SSE-NEXT: shll $19, %edx
11347; SSE-NEXT: movl %eax, %esi
11348; SSE-NEXT: shll $17, %esi
11349; SSE-NEXT: andl $16777216, %esi # imm = 0x1000000
11350; SSE-NEXT: orl %edx, %esi
11351; SSE-NEXT: movl %eax, %edx
11352; SSE-NEXT: shll $15, %edx
11353; SSE-NEXT: andl $8388608, %edx # imm = 0x800000
11354; SSE-NEXT: orl %esi, %edx
11355; SSE-NEXT: movl %eax, %esi
11356; SSE-NEXT: shll $13, %esi
11357; SSE-NEXT: andl $4194304, %esi # imm = 0x400000
11358; SSE-NEXT: orl %edx, %esi
11359; SSE-NEXT: movl %eax, %edx
11360; SSE-NEXT: shll $11, %edx
11361; SSE-NEXT: andl $2097152, %edx # imm = 0x200000
11362; SSE-NEXT: orl %esi, %edx
11363; SSE-NEXT: movl %eax, %esi
11364; SSE-NEXT: shll $9, %esi
11365; SSE-NEXT: andl $1048576, %esi # imm = 0x100000
11366; SSE-NEXT: orl %edx, %esi
11367; SSE-NEXT: movl %eax, %edx
11368; SSE-NEXT: shll $7, %edx
11369; SSE-NEXT: andl $524288, %edx # imm = 0x80000
11370; SSE-NEXT: orl %esi, %edx
11371; SSE-NEXT: movl %eax, %esi
11372; SSE-NEXT: shll $5, %esi
11373; SSE-NEXT: andl $262144, %esi # imm = 0x40000
11374; SSE-NEXT: orl %edx, %esi
11375; SSE-NEXT: leal (,%rax,8), %edx
11376; SSE-NEXT: andl $131072, %edx # imm = 0x20000
11377; SSE-NEXT: orl %esi, %edx
11378; SSE-NEXT: leal (%rax,%rax), %esi
11379; SSE-NEXT: andl $65536, %esi # imm = 0x10000
11380; SSE-NEXT: orl %edx, %esi
11381; SSE-NEXT: movl %eax, %edx
11382; SSE-NEXT: shrl %edx
11383; SSE-NEXT: andl $32768, %edx # imm = 0x8000
11384; SSE-NEXT: orl %esi, %edx
11385; SSE-NEXT: movl %eax, %esi
11386; SSE-NEXT: shrl $3, %esi
11387; SSE-NEXT: andl $16384, %esi # imm = 0x4000
11388; SSE-NEXT: orl %edx, %esi
11389; SSE-NEXT: movl %eax, %edx
11390; SSE-NEXT: shrl $5, %edx
11391; SSE-NEXT: andl $8192, %edx # imm = 0x2000
11392; SSE-NEXT: orl %esi, %edx
11393; SSE-NEXT: movl %eax, %esi
11394; SSE-NEXT: shrl $7, %esi
11395; SSE-NEXT: andl $4096, %esi # imm = 0x1000
11396; SSE-NEXT: orl %edx, %esi
11397; SSE-NEXT: movl %eax, %edx
11398; SSE-NEXT: shrl $9, %edx
11399; SSE-NEXT: andl $2048, %edx # imm = 0x800
11400; SSE-NEXT: orl %esi, %edx
11401; SSE-NEXT: movl %eax, %esi
11402; SSE-NEXT: shrl $11, %esi
11403; SSE-NEXT: andl $1024, %esi # imm = 0x400
11404; SSE-NEXT: orl %edx, %esi
11405; SSE-NEXT: movl %eax, %edx
11406; SSE-NEXT: shrl $13, %edx
11407; SSE-NEXT: andl $512, %edx # imm = 0x200
11408; SSE-NEXT: orl %esi, %edx
11409; SSE-NEXT: movl %eax, %esi
11410; SSE-NEXT: shrl $15, %esi
11411; SSE-NEXT: andl $256, %esi # imm = 0x100
11412; SSE-NEXT: orl %edx, %esi
11413; SSE-NEXT: movl %eax, %edx
11414; SSE-NEXT: shrl $17, %edx
11415; SSE-NEXT: andl $128, %edx
11416; SSE-NEXT: orl %esi, %edx
11417; SSE-NEXT: movl %eax, %esi
11418; SSE-NEXT: shrl $19, %esi
11419; SSE-NEXT: andl $64, %esi
11420; SSE-NEXT: orl %edx, %esi
11421; SSE-NEXT: movl %eax, %edx
11422; SSE-NEXT: shrl $21, %edx
11423; SSE-NEXT: andl $32, %edx
11424; SSE-NEXT: orl %esi, %edx
11425; SSE-NEXT: movl %eax, %esi
11426; SSE-NEXT: shrl $23, %esi
11427; SSE-NEXT: andl $16, %esi
11428; SSE-NEXT: orl %edx, %esi
11429; SSE-NEXT: movl %eax, %edx
11430; SSE-NEXT: shrl $25, %edx
11431; SSE-NEXT: andl $8, %edx
11432; SSE-NEXT: orl %esi, %edx
11433; SSE-NEXT: movl %eax, %esi
11434; SSE-NEXT: shrl $27, %esi
11435; SSE-NEXT: andl $4, %esi
11436; SSE-NEXT: orl %edx, %esi
11437; SSE-NEXT: movl %eax, %edx
11438; SSE-NEXT: shrl $29, %edx
11439; SSE-NEXT: andl $2, %edx
11440; SSE-NEXT: orl %esi, %edx
11441; SSE-NEXT: shrl $31, %eax
11442; SSE-NEXT: orl %edx, %eax
11443; SSE-NEXT: orl %ecx, %eax
11444; SSE-NEXT: movd %eax, %xmm2
11445; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
11446; SSE-NEXT: movd %xmm1, %eax
11447; SSE-NEXT: movl %eax, %ecx
11448; SSE-NEXT: shll $31, %ecx
11449; SSE-NEXT: movl %eax, %edx
11450; SSE-NEXT: andl $2, %edx
11451; SSE-NEXT: shll $29, %edx
11452; SSE-NEXT: orl %ecx, %edx
11453; SSE-NEXT: movl %eax, %ecx
11454; SSE-NEXT: andl $4, %ecx
11455; SSE-NEXT: shll $27, %ecx
11456; SSE-NEXT: orl %edx, %ecx
11457; SSE-NEXT: movl %eax, %edx
11458; SSE-NEXT: andl $8, %edx
11459; SSE-NEXT: shll $25, %edx
11460; SSE-NEXT: orl %ecx, %edx
11461; SSE-NEXT: movl %eax, %esi
11462; SSE-NEXT: andl $16, %esi
11463; SSE-NEXT: shll $23, %esi
11464; SSE-NEXT: orl %edx, %esi
11465; SSE-NEXT: movl %eax, %ecx
11466; SSE-NEXT: andl $32, %ecx
11467; SSE-NEXT: shll $21, %ecx
11468; SSE-NEXT: orl %esi, %ecx
11469; SSE-NEXT: movl %eax, %edx
11470; SSE-NEXT: andl $64, %edx
11471; SSE-NEXT: shll $19, %edx
11472; SSE-NEXT: movl %eax, %esi
11473; SSE-NEXT: shll $17, %esi
11474; SSE-NEXT: andl $16777216, %esi # imm = 0x1000000
11475; SSE-NEXT: orl %edx, %esi
11476; SSE-NEXT: movl %eax, %edx
11477; SSE-NEXT: shll $15, %edx
11478; SSE-NEXT: andl $8388608, %edx # imm = 0x800000
11479; SSE-NEXT: orl %esi, %edx
11480; SSE-NEXT: movl %eax, %esi
11481; SSE-NEXT: shll $13, %esi
11482; SSE-NEXT: andl $4194304, %esi # imm = 0x400000
11483; SSE-NEXT: orl %edx, %esi
11484; SSE-NEXT: movl %eax, %edx
11485; SSE-NEXT: shll $11, %edx
11486; SSE-NEXT: andl $2097152, %edx # imm = 0x200000
11487; SSE-NEXT: orl %esi, %edx
11488; SSE-NEXT: movl %eax, %esi
11489; SSE-NEXT: shll $9, %esi
11490; SSE-NEXT: andl $1048576, %esi # imm = 0x100000
11491; SSE-NEXT: orl %edx, %esi
11492; SSE-NEXT: movl %eax, %edx
11493; SSE-NEXT: shll $7, %edx
11494; SSE-NEXT: andl $524288, %edx # imm = 0x80000
11495; SSE-NEXT: orl %esi, %edx
11496; SSE-NEXT: movl %eax, %esi
11497; SSE-NEXT: shll $5, %esi
11498; SSE-NEXT: andl $262144, %esi # imm = 0x40000
11499; SSE-NEXT: orl %edx, %esi
11500; SSE-NEXT: leal (,%rax,8), %edx
11501; SSE-NEXT: andl $131072, %edx # imm = 0x20000
11502; SSE-NEXT: orl %esi, %edx
11503; SSE-NEXT: leal (%rax,%rax), %esi
11504; SSE-NEXT: andl $65536, %esi # imm = 0x10000
11505; SSE-NEXT: orl %edx, %esi
11506; SSE-NEXT: movl %eax, %edx
11507; SSE-NEXT: shrl %edx
11508; SSE-NEXT: andl $32768, %edx # imm = 0x8000
11509; SSE-NEXT: orl %esi, %edx
11510; SSE-NEXT: movl %eax, %esi
11511; SSE-NEXT: shrl $3, %esi
11512; SSE-NEXT: andl $16384, %esi # imm = 0x4000
11513; SSE-NEXT: orl %edx, %esi
11514; SSE-NEXT: movl %eax, %edx
11515; SSE-NEXT: shrl $5, %edx
11516; SSE-NEXT: andl $8192, %edx # imm = 0x2000
11517; SSE-NEXT: orl %esi, %edx
11518; SSE-NEXT: movl %eax, %esi
11519; SSE-NEXT: shrl $7, %esi
11520; SSE-NEXT: andl $4096, %esi # imm = 0x1000
11521; SSE-NEXT: orl %edx, %esi
11522; SSE-NEXT: movl %eax, %edx
11523; SSE-NEXT: shrl $9, %edx
11524; SSE-NEXT: andl $2048, %edx # imm = 0x800
11525; SSE-NEXT: orl %esi, %edx
11526; SSE-NEXT: movl %eax, %esi
11527; SSE-NEXT: shrl $11, %esi
11528; SSE-NEXT: andl $1024, %esi # imm = 0x400
11529; SSE-NEXT: orl %edx, %esi
11530; SSE-NEXT: movl %eax, %edx
11531; SSE-NEXT: shrl $13, %edx
11532; SSE-NEXT: andl $512, %edx # imm = 0x200
11533; SSE-NEXT: orl %esi, %edx
11534; SSE-NEXT: movl %eax, %esi
11535; SSE-NEXT: shrl $15, %esi
11536; SSE-NEXT: andl $256, %esi # imm = 0x100
11537; SSE-NEXT: orl %edx, %esi
11538; SSE-NEXT: movl %eax, %edx
11539; SSE-NEXT: shrl $17, %edx
11540; SSE-NEXT: andl $128, %edx
11541; SSE-NEXT: orl %esi, %edx
11542; SSE-NEXT: movl %eax, %esi
11543; SSE-NEXT: shrl $19, %esi
11544; SSE-NEXT: andl $64, %esi
11545; SSE-NEXT: orl %edx, %esi
11546; SSE-NEXT: movl %eax, %edx
11547; SSE-NEXT: shrl $21, %edx
11548; SSE-NEXT: andl $32, %edx
11549; SSE-NEXT: orl %esi, %edx
11550; SSE-NEXT: movl %eax, %esi
11551; SSE-NEXT: shrl $23, %esi
11552; SSE-NEXT: andl $16, %esi
11553; SSE-NEXT: orl %edx, %esi
11554; SSE-NEXT: movl %eax, %edx
11555; SSE-NEXT: shrl $25, %edx
11556; SSE-NEXT: andl $8, %edx
11557; SSE-NEXT: orl %esi, %edx
11558; SSE-NEXT: movl %eax, %esi
11559; SSE-NEXT: shrl $27, %esi
11560; SSE-NEXT: andl $4, %esi
11561; SSE-NEXT: orl %edx, %esi
11562; SSE-NEXT: movl %eax, %edx
11563; SSE-NEXT: shrl $29, %edx
11564; SSE-NEXT: andl $2, %edx
11565; SSE-NEXT: orl %esi, %edx
11566; SSE-NEXT: shrl $31, %eax
11567; SSE-NEXT: orl %edx, %eax
11568; SSE-NEXT: orl %ecx, %eax
11569; SSE-NEXT: movd %eax, %xmm1
11570; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
11571; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
11572; SSE-NEXT: movdqa %xmm2, %xmm1
11573; SSE-NEXT: retq
11574;
11575; AVX1-LABEL: test_bitreverse_v8i32:
11576; AVX1: # BB#0:
11577; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
11578; AVX1-NEXT: vpextrd $1, %xmm1, %eax
11579; AVX1-NEXT: movl %eax, %ecx
11580; AVX1-NEXT: shll $31, %ecx
11581; AVX1-NEXT: movl %eax, %edx
11582; AVX1-NEXT: andl $2, %edx
11583; AVX1-NEXT: shll $29, %edx
11584; AVX1-NEXT: orl %ecx, %edx
11585; AVX1-NEXT: movl %eax, %ecx
11586; AVX1-NEXT: andl $4, %ecx
11587; AVX1-NEXT: shll $27, %ecx
11588; AVX1-NEXT: orl %edx, %ecx
11589; AVX1-NEXT: movl %eax, %edx
11590; AVX1-NEXT: andl $8, %edx
11591; AVX1-NEXT: shll $25, %edx
11592; AVX1-NEXT: orl %ecx, %edx
11593; AVX1-NEXT: movl %eax, %esi
11594; AVX1-NEXT: andl $16, %esi
11595; AVX1-NEXT: shll $23, %esi
11596; AVX1-NEXT: orl %edx, %esi
11597; AVX1-NEXT: movl %eax, %ecx
11598; AVX1-NEXT: andl $32, %ecx
11599; AVX1-NEXT: shll $21, %ecx
11600; AVX1-NEXT: orl %esi, %ecx
11601; AVX1-NEXT: movl %eax, %edx
11602; AVX1-NEXT: andl $64, %edx
11603; AVX1-NEXT: shll $19, %edx
11604; AVX1-NEXT: movl %eax, %esi
11605; AVX1-NEXT: shll $17, %esi
11606; AVX1-NEXT: andl $16777216, %esi # imm = 0x1000000
11607; AVX1-NEXT: orl %edx, %esi
11608; AVX1-NEXT: movl %eax, %edx
11609; AVX1-NEXT: shll $15, %edx
11610; AVX1-NEXT: andl $8388608, %edx # imm = 0x800000
11611; AVX1-NEXT: orl %esi, %edx
11612; AVX1-NEXT: movl %eax, %esi
11613; AVX1-NEXT: shll $13, %esi
11614; AVX1-NEXT: andl $4194304, %esi # imm = 0x400000
11615; AVX1-NEXT: orl %edx, %esi
11616; AVX1-NEXT: movl %eax, %edx
11617; AVX1-NEXT: shll $11, %edx
11618; AVX1-NEXT: andl $2097152, %edx # imm = 0x200000
11619; AVX1-NEXT: orl %esi, %edx
11620; AVX1-NEXT: movl %eax, %esi
11621; AVX1-NEXT: shll $9, %esi
11622; AVX1-NEXT: andl $1048576, %esi # imm = 0x100000
11623; AVX1-NEXT: orl %edx, %esi
11624; AVX1-NEXT: movl %eax, %edx
11625; AVX1-NEXT: shll $7, %edx
11626; AVX1-NEXT: andl $524288, %edx # imm = 0x80000
11627; AVX1-NEXT: orl %esi, %edx
11628; AVX1-NEXT: movl %eax, %esi
11629; AVX1-NEXT: shll $5, %esi
11630; AVX1-NEXT: andl $262144, %esi # imm = 0x40000
11631; AVX1-NEXT: orl %edx, %esi
11632; AVX1-NEXT: leal (,%rax,8), %edx
11633; AVX1-NEXT: andl $131072, %edx # imm = 0x20000
11634; AVX1-NEXT: orl %esi, %edx
11635; AVX1-NEXT: leal (%rax,%rax), %esi
11636; AVX1-NEXT: andl $65536, %esi # imm = 0x10000
11637; AVX1-NEXT: orl %edx, %esi
11638; AVX1-NEXT: movl %eax, %edx
11639; AVX1-NEXT: shrl %edx
11640; AVX1-NEXT: andl $32768, %edx # imm = 0x8000
11641; AVX1-NEXT: orl %esi, %edx
11642; AVX1-NEXT: movl %eax, %esi
11643; AVX1-NEXT: shrl $3, %esi
11644; AVX1-NEXT: andl $16384, %esi # imm = 0x4000
11645; AVX1-NEXT: orl %edx, %esi
11646; AVX1-NEXT: movl %eax, %edx
11647; AVX1-NEXT: shrl $5, %edx
11648; AVX1-NEXT: andl $8192, %edx # imm = 0x2000
11649; AVX1-NEXT: orl %esi, %edx
11650; AVX1-NEXT: movl %eax, %esi
11651; AVX1-NEXT: shrl $7, %esi
11652; AVX1-NEXT: andl $4096, %esi # imm = 0x1000
11653; AVX1-NEXT: orl %edx, %esi
11654; AVX1-NEXT: movl %eax, %edx
11655; AVX1-NEXT: shrl $9, %edx
11656; AVX1-NEXT: andl $2048, %edx # imm = 0x800
11657; AVX1-NEXT: orl %esi, %edx
11658; AVX1-NEXT: movl %eax, %esi
11659; AVX1-NEXT: shrl $11, %esi
11660; AVX1-NEXT: andl $1024, %esi # imm = 0x400
11661; AVX1-NEXT: orl %edx, %esi
11662; AVX1-NEXT: movl %eax, %edx
11663; AVX1-NEXT: shrl $13, %edx
11664; AVX1-NEXT: andl $512, %edx # imm = 0x200
11665; AVX1-NEXT: orl %esi, %edx
11666; AVX1-NEXT: movl %eax, %esi
11667; AVX1-NEXT: shrl $15, %esi
11668; AVX1-NEXT: andl $256, %esi # imm = 0x100
11669; AVX1-NEXT: orl %edx, %esi
11670; AVX1-NEXT: movl %eax, %edx
11671; AVX1-NEXT: shrl $17, %edx
11672; AVX1-NEXT: andl $128, %edx
11673; AVX1-NEXT: orl %esi, %edx
11674; AVX1-NEXT: movl %eax, %esi
11675; AVX1-NEXT: shrl $19, %esi
11676; AVX1-NEXT: andl $64, %esi
11677; AVX1-NEXT: orl %edx, %esi
11678; AVX1-NEXT: movl %eax, %edx
11679; AVX1-NEXT: shrl $21, %edx
11680; AVX1-NEXT: andl $32, %edx
11681; AVX1-NEXT: orl %esi, %edx
11682; AVX1-NEXT: movl %eax, %esi
11683; AVX1-NEXT: shrl $23, %esi
11684; AVX1-NEXT: andl $16, %esi
11685; AVX1-NEXT: orl %edx, %esi
11686; AVX1-NEXT: movl %eax, %edx
11687; AVX1-NEXT: shrl $25, %edx
11688; AVX1-NEXT: andl $8, %edx
11689; AVX1-NEXT: orl %esi, %edx
11690; AVX1-NEXT: movl %eax, %esi
11691; AVX1-NEXT: shrl $27, %esi
11692; AVX1-NEXT: andl $4, %esi
11693; AVX1-NEXT: orl %edx, %esi
11694; AVX1-NEXT: movl %eax, %edx
11695; AVX1-NEXT: shrl $29, %edx
11696; AVX1-NEXT: andl $2, %edx
11697; AVX1-NEXT: orl %esi, %edx
11698; AVX1-NEXT: shrl $31, %eax
11699; AVX1-NEXT: orl %edx, %eax
11700; AVX1-NEXT: orl %ecx, %eax
11701; AVX1-NEXT: vmovd %xmm1, %ecx
11702; AVX1-NEXT: movl %ecx, %edx
11703; AVX1-NEXT: shll $31, %edx
11704; AVX1-NEXT: movl %ecx, %esi
11705; AVX1-NEXT: andl $2, %esi
11706; AVX1-NEXT: shll $29, %esi
11707; AVX1-NEXT: orl %edx, %esi
11708; AVX1-NEXT: movl %ecx, %edx
11709; AVX1-NEXT: andl $4, %edx
11710; AVX1-NEXT: shll $27, %edx
11711; AVX1-NEXT: orl %esi, %edx
11712; AVX1-NEXT: movl %ecx, %esi
11713; AVX1-NEXT: andl $8, %esi
11714; AVX1-NEXT: shll $25, %esi
11715; AVX1-NEXT: orl %edx, %esi
11716; AVX1-NEXT: movl %ecx, %edi
11717; AVX1-NEXT: andl $16, %edi
11718; AVX1-NEXT: shll $23, %edi
11719; AVX1-NEXT: orl %esi, %edi
11720; AVX1-NEXT: movl %ecx, %edx
11721; AVX1-NEXT: andl $32, %edx
11722; AVX1-NEXT: shll $21, %edx
11723; AVX1-NEXT: orl %edi, %edx
11724; AVX1-NEXT: movl %ecx, %esi
11725; AVX1-NEXT: andl $64, %esi
11726; AVX1-NEXT: shll $19, %esi
11727; AVX1-NEXT: movl %ecx, %edi
11728; AVX1-NEXT: shll $17, %edi
11729; AVX1-NEXT: andl $16777216, %edi # imm = 0x1000000
11730; AVX1-NEXT: orl %esi, %edi
11731; AVX1-NEXT: movl %ecx, %esi
11732; AVX1-NEXT: shll $15, %esi
11733; AVX1-NEXT: andl $8388608, %esi # imm = 0x800000
11734; AVX1-NEXT: orl %edi, %esi
11735; AVX1-NEXT: movl %ecx, %edi
11736; AVX1-NEXT: shll $13, %edi
11737; AVX1-NEXT: andl $4194304, %edi # imm = 0x400000
11738; AVX1-NEXT: orl %esi, %edi
11739; AVX1-NEXT: movl %ecx, %esi
11740; AVX1-NEXT: shll $11, %esi
11741; AVX1-NEXT: andl $2097152, %esi # imm = 0x200000
11742; AVX1-NEXT: orl %edi, %esi
11743; AVX1-NEXT: movl %ecx, %edi
11744; AVX1-NEXT: shll $9, %edi
11745; AVX1-NEXT: andl $1048576, %edi # imm = 0x100000
11746; AVX1-NEXT: orl %esi, %edi
11747; AVX1-NEXT: movl %ecx, %esi
11748; AVX1-NEXT: shll $7, %esi
11749; AVX1-NEXT: andl $524288, %esi # imm = 0x80000
11750; AVX1-NEXT: orl %edi, %esi
11751; AVX1-NEXT: movl %ecx, %edi
11752; AVX1-NEXT: shll $5, %edi
11753; AVX1-NEXT: andl $262144, %edi # imm = 0x40000
11754; AVX1-NEXT: orl %esi, %edi
11755; AVX1-NEXT: leal (,%rcx,8), %esi
11756; AVX1-NEXT: andl $131072, %esi # imm = 0x20000
11757; AVX1-NEXT: orl %edi, %esi
11758; AVX1-NEXT: leal (%rcx,%rcx), %edi
11759; AVX1-NEXT: andl $65536, %edi # imm = 0x10000
11760; AVX1-NEXT: orl %esi, %edi
11761; AVX1-NEXT: movl %ecx, %esi
11762; AVX1-NEXT: shrl %esi
11763; AVX1-NEXT: andl $32768, %esi # imm = 0x8000
11764; AVX1-NEXT: orl %edi, %esi
11765; AVX1-NEXT: movl %ecx, %edi
11766; AVX1-NEXT: shrl $3, %edi
11767; AVX1-NEXT: andl $16384, %edi # imm = 0x4000
11768; AVX1-NEXT: orl %esi, %edi
11769; AVX1-NEXT: movl %ecx, %esi
11770; AVX1-NEXT: shrl $5, %esi
11771; AVX1-NEXT: andl $8192, %esi # imm = 0x2000
11772; AVX1-NEXT: orl %edi, %esi
11773; AVX1-NEXT: movl %ecx, %edi
11774; AVX1-NEXT: shrl $7, %edi
11775; AVX1-NEXT: andl $4096, %edi # imm = 0x1000
11776; AVX1-NEXT: orl %esi, %edi
11777; AVX1-NEXT: movl %ecx, %esi
11778; AVX1-NEXT: shrl $9, %esi
11779; AVX1-NEXT: andl $2048, %esi # imm = 0x800
11780; AVX1-NEXT: orl %edi, %esi
11781; AVX1-NEXT: movl %ecx, %edi
11782; AVX1-NEXT: shrl $11, %edi
11783; AVX1-NEXT: andl $1024, %edi # imm = 0x400
11784; AVX1-NEXT: orl %esi, %edi
11785; AVX1-NEXT: movl %ecx, %esi
11786; AVX1-NEXT: shrl $13, %esi
11787; AVX1-NEXT: andl $512, %esi # imm = 0x200
11788; AVX1-NEXT: orl %edi, %esi
11789; AVX1-NEXT: movl %ecx, %edi
11790; AVX1-NEXT: shrl $15, %edi
11791; AVX1-NEXT: andl $256, %edi # imm = 0x100
11792; AVX1-NEXT: orl %esi, %edi
11793; AVX1-NEXT: movl %ecx, %esi
11794; AVX1-NEXT: shrl $17, %esi
11795; AVX1-NEXT: andl $128, %esi
11796; AVX1-NEXT: orl %edi, %esi
11797; AVX1-NEXT: movl %ecx, %edi
11798; AVX1-NEXT: shrl $19, %edi
11799; AVX1-NEXT: andl $64, %edi
11800; AVX1-NEXT: orl %esi, %edi
11801; AVX1-NEXT: movl %ecx, %esi
11802; AVX1-NEXT: shrl $21, %esi
11803; AVX1-NEXT: andl $32, %esi
11804; AVX1-NEXT: orl %edi, %esi
11805; AVX1-NEXT: movl %ecx, %edi
11806; AVX1-NEXT: shrl $23, %edi
11807; AVX1-NEXT: andl $16, %edi
11808; AVX1-NEXT: orl %esi, %edi
11809; AVX1-NEXT: movl %ecx, %esi
11810; AVX1-NEXT: shrl $25, %esi
11811; AVX1-NEXT: andl $8, %esi
11812; AVX1-NEXT: orl %edi, %esi
11813; AVX1-NEXT: movl %ecx, %edi
11814; AVX1-NEXT: shrl $27, %edi
11815; AVX1-NEXT: andl $4, %edi
11816; AVX1-NEXT: orl %esi, %edi
11817; AVX1-NEXT: movl %ecx, %esi
11818; AVX1-NEXT: shrl $29, %esi
11819; AVX1-NEXT: andl $2, %esi
11820; AVX1-NEXT: orl %edi, %esi
11821; AVX1-NEXT: shrl $31, %ecx
11822; AVX1-NEXT: orl %esi, %ecx
11823; AVX1-NEXT: orl %edx, %ecx
11824; AVX1-NEXT: vmovd %ecx, %xmm2
11825; AVX1-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
11826; AVX1-NEXT: vpextrd $2, %xmm1, %eax
11827; AVX1-NEXT: movl %eax, %ecx
11828; AVX1-NEXT: shll $31, %ecx
11829; AVX1-NEXT: movl %eax, %edx
11830; AVX1-NEXT: andl $2, %edx
11831; AVX1-NEXT: shll $29, %edx
11832; AVX1-NEXT: orl %ecx, %edx
11833; AVX1-NEXT: movl %eax, %ecx
11834; AVX1-NEXT: andl $4, %ecx
11835; AVX1-NEXT: shll $27, %ecx
11836; AVX1-NEXT: orl %edx, %ecx
11837; AVX1-NEXT: movl %eax, %edx
11838; AVX1-NEXT: andl $8, %edx
11839; AVX1-NEXT: shll $25, %edx
11840; AVX1-NEXT: orl %ecx, %edx
11841; AVX1-NEXT: movl %eax, %esi
11842; AVX1-NEXT: andl $16, %esi
11843; AVX1-NEXT: shll $23, %esi
11844; AVX1-NEXT: orl %edx, %esi
11845; AVX1-NEXT: movl %eax, %ecx
11846; AVX1-NEXT: andl $32, %ecx
11847; AVX1-NEXT: shll $21, %ecx
11848; AVX1-NEXT: orl %esi, %ecx
11849; AVX1-NEXT: movl %eax, %edx
11850; AVX1-NEXT: andl $64, %edx
11851; AVX1-NEXT: shll $19, %edx
11852; AVX1-NEXT: movl %eax, %esi
11853; AVX1-NEXT: shll $17, %esi
11854; AVX1-NEXT: andl $16777216, %esi # imm = 0x1000000
11855; AVX1-NEXT: orl %edx, %esi
11856; AVX1-NEXT: movl %eax, %edx
11857; AVX1-NEXT: shll $15, %edx
11858; AVX1-NEXT: andl $8388608, %edx # imm = 0x800000
11859; AVX1-NEXT: orl %esi, %edx
11860; AVX1-NEXT: movl %eax, %esi
11861; AVX1-NEXT: shll $13, %esi
11862; AVX1-NEXT: andl $4194304, %esi # imm = 0x400000
11863; AVX1-NEXT: orl %edx, %esi
11864; AVX1-NEXT: movl %eax, %edx
11865; AVX1-NEXT: shll $11, %edx
11866; AVX1-NEXT: andl $2097152, %edx # imm = 0x200000
11867; AVX1-NEXT: orl %esi, %edx
11868; AVX1-NEXT: movl %eax, %esi
11869; AVX1-NEXT: shll $9, %esi
11870; AVX1-NEXT: andl $1048576, %esi # imm = 0x100000
11871; AVX1-NEXT: orl %edx, %esi
11872; AVX1-NEXT: movl %eax, %edx
11873; AVX1-NEXT: shll $7, %edx
11874; AVX1-NEXT: andl $524288, %edx # imm = 0x80000
11875; AVX1-NEXT: orl %esi, %edx
11876; AVX1-NEXT: movl %eax, %esi
11877; AVX1-NEXT: shll $5, %esi
11878; AVX1-NEXT: andl $262144, %esi # imm = 0x40000
11879; AVX1-NEXT: orl %edx, %esi
11880; AVX1-NEXT: leal (,%rax,8), %edx
11881; AVX1-NEXT: andl $131072, %edx # imm = 0x20000
11882; AVX1-NEXT: orl %esi, %edx
11883; AVX1-NEXT: leal (%rax,%rax), %esi
11884; AVX1-NEXT: andl $65536, %esi # imm = 0x10000
11885; AVX1-NEXT: orl %edx, %esi
11886; AVX1-NEXT: movl %eax, %edx
11887; AVX1-NEXT: shrl %edx
11888; AVX1-NEXT: andl $32768, %edx # imm = 0x8000
11889; AVX1-NEXT: orl %esi, %edx
11890; AVX1-NEXT: movl %eax, %esi
11891; AVX1-NEXT: shrl $3, %esi
11892; AVX1-NEXT: andl $16384, %esi # imm = 0x4000
11893; AVX1-NEXT: orl %edx, %esi
11894; AVX1-NEXT: movl %eax, %edx
11895; AVX1-NEXT: shrl $5, %edx
11896; AVX1-NEXT: andl $8192, %edx # imm = 0x2000
11897; AVX1-NEXT: orl %esi, %edx
11898; AVX1-NEXT: movl %eax, %esi
11899; AVX1-NEXT: shrl $7, %esi
11900; AVX1-NEXT: andl $4096, %esi # imm = 0x1000
11901; AVX1-NEXT: orl %edx, %esi
11902; AVX1-NEXT: movl %eax, %edx
11903; AVX1-NEXT: shrl $9, %edx
11904; AVX1-NEXT: andl $2048, %edx # imm = 0x800
11905; AVX1-NEXT: orl %esi, %edx
11906; AVX1-NEXT: movl %eax, %esi
11907; AVX1-NEXT: shrl $11, %esi
11908; AVX1-NEXT: andl $1024, %esi # imm = 0x400
11909; AVX1-NEXT: orl %edx, %esi
11910; AVX1-NEXT: movl %eax, %edx
11911; AVX1-NEXT: shrl $13, %edx
11912; AVX1-NEXT: andl $512, %edx # imm = 0x200
11913; AVX1-NEXT: orl %esi, %edx
11914; AVX1-NEXT: movl %eax, %esi
11915; AVX1-NEXT: shrl $15, %esi
11916; AVX1-NEXT: andl $256, %esi # imm = 0x100
11917; AVX1-NEXT: orl %edx, %esi
11918; AVX1-NEXT: movl %eax, %edx
11919; AVX1-NEXT: shrl $17, %edx
11920; AVX1-NEXT: andl $128, %edx
11921; AVX1-NEXT: orl %esi, %edx
11922; AVX1-NEXT: movl %eax, %esi
11923; AVX1-NEXT: shrl $19, %esi
11924; AVX1-NEXT: andl $64, %esi
11925; AVX1-NEXT: orl %edx, %esi
11926; AVX1-NEXT: movl %eax, %edx
11927; AVX1-NEXT: shrl $21, %edx
11928; AVX1-NEXT: andl $32, %edx
11929; AVX1-NEXT: orl %esi, %edx
11930; AVX1-NEXT: movl %eax, %esi
11931; AVX1-NEXT: shrl $23, %esi
11932; AVX1-NEXT: andl $16, %esi
11933; AVX1-NEXT: orl %edx, %esi
11934; AVX1-NEXT: movl %eax, %edx
11935; AVX1-NEXT: shrl $25, %edx
11936; AVX1-NEXT: andl $8, %edx
11937; AVX1-NEXT: orl %esi, %edx
11938; AVX1-NEXT: movl %eax, %esi
11939; AVX1-NEXT: shrl $27, %esi
11940; AVX1-NEXT: andl $4, %esi
11941; AVX1-NEXT: orl %edx, %esi
11942; AVX1-NEXT: movl %eax, %edx
11943; AVX1-NEXT: shrl $29, %edx
11944; AVX1-NEXT: andl $2, %edx
11945; AVX1-NEXT: orl %esi, %edx
11946; AVX1-NEXT: shrl $31, %eax
11947; AVX1-NEXT: orl %edx, %eax
11948; AVX1-NEXT: orl %ecx, %eax
11949; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
11950; AVX1-NEXT: vpextrd $3, %xmm1, %eax
11951; AVX1-NEXT: movl %eax, %ecx
11952; AVX1-NEXT: shll $31, %ecx
11953; AVX1-NEXT: movl %eax, %edx
11954; AVX1-NEXT: andl $2, %edx
11955; AVX1-NEXT: shll $29, %edx
11956; AVX1-NEXT: orl %ecx, %edx
11957; AVX1-NEXT: movl %eax, %ecx
11958; AVX1-NEXT: andl $4, %ecx
11959; AVX1-NEXT: shll $27, %ecx
11960; AVX1-NEXT: orl %edx, %ecx
11961; AVX1-NEXT: movl %eax, %edx
11962; AVX1-NEXT: andl $8, %edx
11963; AVX1-NEXT: shll $25, %edx
11964; AVX1-NEXT: orl %ecx, %edx
11965; AVX1-NEXT: movl %eax, %esi
11966; AVX1-NEXT: andl $16, %esi
11967; AVX1-NEXT: shll $23, %esi
11968; AVX1-NEXT: orl %edx, %esi
11969; AVX1-NEXT: movl %eax, %ecx
11970; AVX1-NEXT: andl $32, %ecx
11971; AVX1-NEXT: shll $21, %ecx
11972; AVX1-NEXT: orl %esi, %ecx
11973; AVX1-NEXT: movl %eax, %edx
11974; AVX1-NEXT: andl $64, %edx
11975; AVX1-NEXT: shll $19, %edx
11976; AVX1-NEXT: movl %eax, %esi
11977; AVX1-NEXT: shll $17, %esi
11978; AVX1-NEXT: andl $16777216, %esi # imm = 0x1000000
11979; AVX1-NEXT: orl %edx, %esi
11980; AVX1-NEXT: movl %eax, %edx
11981; AVX1-NEXT: shll $15, %edx
11982; AVX1-NEXT: andl $8388608, %edx # imm = 0x800000
11983; AVX1-NEXT: orl %esi, %edx
11984; AVX1-NEXT: movl %eax, %esi
11985; AVX1-NEXT: shll $13, %esi
11986; AVX1-NEXT: andl $4194304, %esi # imm = 0x400000
11987; AVX1-NEXT: orl %edx, %esi
11988; AVX1-NEXT: movl %eax, %edx
11989; AVX1-NEXT: shll $11, %edx
11990; AVX1-NEXT: andl $2097152, %edx # imm = 0x200000
11991; AVX1-NEXT: orl %esi, %edx
11992; AVX1-NEXT: movl %eax, %esi
11993; AVX1-NEXT: shll $9, %esi
11994; AVX1-NEXT: andl $1048576, %esi # imm = 0x100000
11995; AVX1-NEXT: orl %edx, %esi
11996; AVX1-NEXT: movl %eax, %edx
11997; AVX1-NEXT: shll $7, %edx
11998; AVX1-NEXT: andl $524288, %edx # imm = 0x80000
11999; AVX1-NEXT: orl %esi, %edx
12000; AVX1-NEXT: movl %eax, %esi
12001; AVX1-NEXT: shll $5, %esi
12002; AVX1-NEXT: andl $262144, %esi # imm = 0x40000
12003; AVX1-NEXT: orl %edx, %esi
12004; AVX1-NEXT: leal (,%rax,8), %edx
12005; AVX1-NEXT: andl $131072, %edx # imm = 0x20000
12006; AVX1-NEXT: orl %esi, %edx
12007; AVX1-NEXT: leal (%rax,%rax), %esi
12008; AVX1-NEXT: andl $65536, %esi # imm = 0x10000
12009; AVX1-NEXT: orl %edx, %esi
12010; AVX1-NEXT: movl %eax, %edx
12011; AVX1-NEXT: shrl %edx
12012; AVX1-NEXT: andl $32768, %edx # imm = 0x8000
12013; AVX1-NEXT: orl %esi, %edx
12014; AVX1-NEXT: movl %eax, %esi
12015; AVX1-NEXT: shrl $3, %esi
12016; AVX1-NEXT: andl $16384, %esi # imm = 0x4000
12017; AVX1-NEXT: orl %edx, %esi
12018; AVX1-NEXT: movl %eax, %edx
12019; AVX1-NEXT: shrl $5, %edx
12020; AVX1-NEXT: andl $8192, %edx # imm = 0x2000
12021; AVX1-NEXT: orl %esi, %edx
12022; AVX1-NEXT: movl %eax, %esi
12023; AVX1-NEXT: shrl $7, %esi
12024; AVX1-NEXT: andl $4096, %esi # imm = 0x1000
12025; AVX1-NEXT: orl %edx, %esi
12026; AVX1-NEXT: movl %eax, %edx
12027; AVX1-NEXT: shrl $9, %edx
12028; AVX1-NEXT: andl $2048, %edx # imm = 0x800
12029; AVX1-NEXT: orl %esi, %edx
12030; AVX1-NEXT: movl %eax, %esi
12031; AVX1-NEXT: shrl $11, %esi
12032; AVX1-NEXT: andl $1024, %esi # imm = 0x400
12033; AVX1-NEXT: orl %edx, %esi
12034; AVX1-NEXT: movl %eax, %edx
12035; AVX1-NEXT: shrl $13, %edx
12036; AVX1-NEXT: andl $512, %edx # imm = 0x200
12037; AVX1-NEXT: orl %esi, %edx
12038; AVX1-NEXT: movl %eax, %esi
12039; AVX1-NEXT: shrl $15, %esi
12040; AVX1-NEXT: andl $256, %esi # imm = 0x100
12041; AVX1-NEXT: orl %edx, %esi
12042; AVX1-NEXT: movl %eax, %edx
12043; AVX1-NEXT: shrl $17, %edx
12044; AVX1-NEXT: andl $128, %edx
12045; AVX1-NEXT: orl %esi, %edx
12046; AVX1-NEXT: movl %eax, %esi
12047; AVX1-NEXT: shrl $19, %esi
12048; AVX1-NEXT: andl $64, %esi
12049; AVX1-NEXT: orl %edx, %esi
12050; AVX1-NEXT: movl %eax, %edx
12051; AVX1-NEXT: shrl $21, %edx
12052; AVX1-NEXT: andl $32, %edx
12053; AVX1-NEXT: orl %esi, %edx
12054; AVX1-NEXT: movl %eax, %esi
12055; AVX1-NEXT: shrl $23, %esi
12056; AVX1-NEXT: andl $16, %esi
12057; AVX1-NEXT: orl %edx, %esi
12058; AVX1-NEXT: movl %eax, %edx
12059; AVX1-NEXT: shrl $25, %edx
12060; AVX1-NEXT: andl $8, %edx
12061; AVX1-NEXT: orl %esi, %edx
12062; AVX1-NEXT: movl %eax, %esi
12063; AVX1-NEXT: shrl $27, %esi
12064; AVX1-NEXT: andl $4, %esi
12065; AVX1-NEXT: orl %edx, %esi
12066; AVX1-NEXT: movl %eax, %edx
12067; AVX1-NEXT: shrl $29, %edx
12068; AVX1-NEXT: andl $2, %edx
12069; AVX1-NEXT: orl %esi, %edx
12070; AVX1-NEXT: shrl $31, %eax
12071; AVX1-NEXT: orl %edx, %eax
12072; AVX1-NEXT: orl %ecx, %eax
12073; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1
12074; AVX1-NEXT: vpextrd $1, %xmm0, %eax
12075; AVX1-NEXT: movl %eax, %ecx
12076; AVX1-NEXT: shll $31, %ecx
12077; AVX1-NEXT: movl %eax, %edx
12078; AVX1-NEXT: andl $2, %edx
12079; AVX1-NEXT: shll $29, %edx
12080; AVX1-NEXT: orl %ecx, %edx
12081; AVX1-NEXT: movl %eax, %ecx
12082; AVX1-NEXT: andl $4, %ecx
12083; AVX1-NEXT: shll $27, %ecx
12084; AVX1-NEXT: orl %edx, %ecx
12085; AVX1-NEXT: movl %eax, %edx
12086; AVX1-NEXT: andl $8, %edx
12087; AVX1-NEXT: shll $25, %edx
12088; AVX1-NEXT: orl %ecx, %edx
12089; AVX1-NEXT: movl %eax, %esi
12090; AVX1-NEXT: andl $16, %esi
12091; AVX1-NEXT: shll $23, %esi
12092; AVX1-NEXT: orl %edx, %esi
12093; AVX1-NEXT: movl %eax, %ecx
12094; AVX1-NEXT: andl $32, %ecx
12095; AVX1-NEXT: shll $21, %ecx
12096; AVX1-NEXT: orl %esi, %ecx
12097; AVX1-NEXT: movl %eax, %edx
12098; AVX1-NEXT: andl $64, %edx
12099; AVX1-NEXT: shll $19, %edx
12100; AVX1-NEXT: movl %eax, %esi
12101; AVX1-NEXT: shll $17, %esi
12102; AVX1-NEXT: andl $16777216, %esi # imm = 0x1000000
12103; AVX1-NEXT: orl %edx, %esi
12104; AVX1-NEXT: movl %eax, %edx
12105; AVX1-NEXT: shll $15, %edx
12106; AVX1-NEXT: andl $8388608, %edx # imm = 0x800000
12107; AVX1-NEXT: orl %esi, %edx
12108; AVX1-NEXT: movl %eax, %esi
12109; AVX1-NEXT: shll $13, %esi
12110; AVX1-NEXT: andl $4194304, %esi # imm = 0x400000
12111; AVX1-NEXT: orl %edx, %esi
12112; AVX1-NEXT: movl %eax, %edx
12113; AVX1-NEXT: shll $11, %edx
12114; AVX1-NEXT: andl $2097152, %edx # imm = 0x200000
12115; AVX1-NEXT: orl %esi, %edx
12116; AVX1-NEXT: movl %eax, %esi
12117; AVX1-NEXT: shll $9, %esi
12118; AVX1-NEXT: andl $1048576, %esi # imm = 0x100000
12119; AVX1-NEXT: orl %edx, %esi
12120; AVX1-NEXT: movl %eax, %edx
12121; AVX1-NEXT: shll $7, %edx
12122; AVX1-NEXT: andl $524288, %edx # imm = 0x80000
12123; AVX1-NEXT: orl %esi, %edx
12124; AVX1-NEXT: movl %eax, %esi
12125; AVX1-NEXT: shll $5, %esi
12126; AVX1-NEXT: andl $262144, %esi # imm = 0x40000
12127; AVX1-NEXT: orl %edx, %esi
12128; AVX1-NEXT: leal (,%rax,8), %edx
12129; AVX1-NEXT: andl $131072, %edx # imm = 0x20000
12130; AVX1-NEXT: orl %esi, %edx
12131; AVX1-NEXT: leal (%rax,%rax), %esi
12132; AVX1-NEXT: andl $65536, %esi # imm = 0x10000
12133; AVX1-NEXT: orl %edx, %esi
12134; AVX1-NEXT: movl %eax, %edx
12135; AVX1-NEXT: shrl %edx
12136; AVX1-NEXT: andl $32768, %edx # imm = 0x8000
12137; AVX1-NEXT: orl %esi, %edx
12138; AVX1-NEXT: movl %eax, %esi
12139; AVX1-NEXT: shrl $3, %esi
12140; AVX1-NEXT: andl $16384, %esi # imm = 0x4000
12141; AVX1-NEXT: orl %edx, %esi
12142; AVX1-NEXT: movl %eax, %edx
12143; AVX1-NEXT: shrl $5, %edx
12144; AVX1-NEXT: andl $8192, %edx # imm = 0x2000
12145; AVX1-NEXT: orl %esi, %edx
12146; AVX1-NEXT: movl %eax, %esi
12147; AVX1-NEXT: shrl $7, %esi
12148; AVX1-NEXT: andl $4096, %esi # imm = 0x1000
12149; AVX1-NEXT: orl %edx, %esi
12150; AVX1-NEXT: movl %eax, %edx
12151; AVX1-NEXT: shrl $9, %edx
12152; AVX1-NEXT: andl $2048, %edx # imm = 0x800
12153; AVX1-NEXT: orl %esi, %edx
12154; AVX1-NEXT: movl %eax, %esi
12155; AVX1-NEXT: shrl $11, %esi
12156; AVX1-NEXT: andl $1024, %esi # imm = 0x400
12157; AVX1-NEXT: orl %edx, %esi
12158; AVX1-NEXT: movl %eax, %edx
12159; AVX1-NEXT: shrl $13, %edx
12160; AVX1-NEXT: andl $512, %edx # imm = 0x200
12161; AVX1-NEXT: orl %esi, %edx
12162; AVX1-NEXT: movl %eax, %esi
12163; AVX1-NEXT: shrl $15, %esi
12164; AVX1-NEXT: andl $256, %esi # imm = 0x100
12165; AVX1-NEXT: orl %edx, %esi
12166; AVX1-NEXT: movl %eax, %edx
12167; AVX1-NEXT: shrl $17, %edx
12168; AVX1-NEXT: andl $128, %edx
12169; AVX1-NEXT: orl %esi, %edx
12170; AVX1-NEXT: movl %eax, %esi
12171; AVX1-NEXT: shrl $19, %esi
12172; AVX1-NEXT: andl $64, %esi
12173; AVX1-NEXT: orl %edx, %esi
12174; AVX1-NEXT: movl %eax, %edx
12175; AVX1-NEXT: shrl $21, %edx
12176; AVX1-NEXT: andl $32, %edx
12177; AVX1-NEXT: orl %esi, %edx
12178; AVX1-NEXT: movl %eax, %esi
12179; AVX1-NEXT: shrl $23, %esi
12180; AVX1-NEXT: andl $16, %esi
12181; AVX1-NEXT: orl %edx, %esi
12182; AVX1-NEXT: movl %eax, %edx
12183; AVX1-NEXT: shrl $25, %edx
12184; AVX1-NEXT: andl $8, %edx
12185; AVX1-NEXT: orl %esi, %edx
12186; AVX1-NEXT: movl %eax, %esi
12187; AVX1-NEXT: shrl $27, %esi
12188; AVX1-NEXT: andl $4, %esi
12189; AVX1-NEXT: orl %edx, %esi
12190; AVX1-NEXT: movl %eax, %edx
12191; AVX1-NEXT: shrl $29, %edx
12192; AVX1-NEXT: andl $2, %edx
12193; AVX1-NEXT: orl %esi, %edx
12194; AVX1-NEXT: shrl $31, %eax
12195; AVX1-NEXT: orl %edx, %eax
12196; AVX1-NEXT: orl %ecx, %eax
12197; AVX1-NEXT: vmovd %xmm0, %ecx
12198; AVX1-NEXT: movl %ecx, %edx
12199; AVX1-NEXT: shll $31, %edx
12200; AVX1-NEXT: movl %ecx, %esi
12201; AVX1-NEXT: andl $2, %esi
12202; AVX1-NEXT: shll $29, %esi
12203; AVX1-NEXT: orl %edx, %esi
12204; AVX1-NEXT: movl %ecx, %edx
12205; AVX1-NEXT: andl $4, %edx
12206; AVX1-NEXT: shll $27, %edx
12207; AVX1-NEXT: orl %esi, %edx
12208; AVX1-NEXT: movl %ecx, %esi
12209; AVX1-NEXT: andl $8, %esi
12210; AVX1-NEXT: shll $25, %esi
12211; AVX1-NEXT: orl %edx, %esi
12212; AVX1-NEXT: movl %ecx, %edi
12213; AVX1-NEXT: andl $16, %edi
12214; AVX1-NEXT: shll $23, %edi
12215; AVX1-NEXT: orl %esi, %edi
12216; AVX1-NEXT: movl %ecx, %edx
12217; AVX1-NEXT: andl $32, %edx
12218; AVX1-NEXT: shll $21, %edx
12219; AVX1-NEXT: orl %edi, %edx
12220; AVX1-NEXT: movl %ecx, %esi
12221; AVX1-NEXT: andl $64, %esi
12222; AVX1-NEXT: shll $19, %esi
12223; AVX1-NEXT: movl %ecx, %edi
12224; AVX1-NEXT: shll $17, %edi
12225; AVX1-NEXT: andl $16777216, %edi # imm = 0x1000000
12226; AVX1-NEXT: orl %esi, %edi
12227; AVX1-NEXT: movl %ecx, %esi
12228; AVX1-NEXT: shll $15, %esi
12229; AVX1-NEXT: andl $8388608, %esi # imm = 0x800000
12230; AVX1-NEXT: orl %edi, %esi
12231; AVX1-NEXT: movl %ecx, %edi
12232; AVX1-NEXT: shll $13, %edi
12233; AVX1-NEXT: andl $4194304, %edi # imm = 0x400000
12234; AVX1-NEXT: orl %esi, %edi
12235; AVX1-NEXT: movl %ecx, %esi
12236; AVX1-NEXT: shll $11, %esi
12237; AVX1-NEXT: andl $2097152, %esi # imm = 0x200000
12238; AVX1-NEXT: orl %edi, %esi
12239; AVX1-NEXT: movl %ecx, %edi
12240; AVX1-NEXT: shll $9, %edi
12241; AVX1-NEXT: andl $1048576, %edi # imm = 0x100000
12242; AVX1-NEXT: orl %esi, %edi
12243; AVX1-NEXT: movl %ecx, %esi
12244; AVX1-NEXT: shll $7, %esi
12245; AVX1-NEXT: andl $524288, %esi # imm = 0x80000
12246; AVX1-NEXT: orl %edi, %esi
12247; AVX1-NEXT: movl %ecx, %edi
12248; AVX1-NEXT: shll $5, %edi
12249; AVX1-NEXT: andl $262144, %edi # imm = 0x40000
12250; AVX1-NEXT: orl %esi, %edi
12251; AVX1-NEXT: leal (,%rcx,8), %esi
12252; AVX1-NEXT: andl $131072, %esi # imm = 0x20000
12253; AVX1-NEXT: orl %edi, %esi
12254; AVX1-NEXT: leal (%rcx,%rcx), %edi
12255; AVX1-NEXT: andl $65536, %edi # imm = 0x10000
12256; AVX1-NEXT: orl %esi, %edi
12257; AVX1-NEXT: movl %ecx, %esi
12258; AVX1-NEXT: shrl %esi
12259; AVX1-NEXT: andl $32768, %esi # imm = 0x8000
12260; AVX1-NEXT: orl %edi, %esi
12261; AVX1-NEXT: movl %ecx, %edi
12262; AVX1-NEXT: shrl $3, %edi
12263; AVX1-NEXT: andl $16384, %edi # imm = 0x4000
12264; AVX1-NEXT: orl %esi, %edi
12265; AVX1-NEXT: movl %ecx, %esi
12266; AVX1-NEXT: shrl $5, %esi
12267; AVX1-NEXT: andl $8192, %esi # imm = 0x2000
12268; AVX1-NEXT: orl %edi, %esi
12269; AVX1-NEXT: movl %ecx, %edi
12270; AVX1-NEXT: shrl $7, %edi
12271; AVX1-NEXT: andl $4096, %edi # imm = 0x1000
12272; AVX1-NEXT: orl %esi, %edi
12273; AVX1-NEXT: movl %ecx, %esi
12274; AVX1-NEXT: shrl $9, %esi
12275; AVX1-NEXT: andl $2048, %esi # imm = 0x800
12276; AVX1-NEXT: orl %edi, %esi
12277; AVX1-NEXT: movl %ecx, %edi
12278; AVX1-NEXT: shrl $11, %edi
12279; AVX1-NEXT: andl $1024, %edi # imm = 0x400
12280; AVX1-NEXT: orl %esi, %edi
12281; AVX1-NEXT: movl %ecx, %esi
12282; AVX1-NEXT: shrl $13, %esi
12283; AVX1-NEXT: andl $512, %esi # imm = 0x200
12284; AVX1-NEXT: orl %edi, %esi
12285; AVX1-NEXT: movl %ecx, %edi
12286; AVX1-NEXT: shrl $15, %edi
12287; AVX1-NEXT: andl $256, %edi # imm = 0x100
12288; AVX1-NEXT: orl %esi, %edi
12289; AVX1-NEXT: movl %ecx, %esi
12290; AVX1-NEXT: shrl $17, %esi
12291; AVX1-NEXT: andl $128, %esi
12292; AVX1-NEXT: orl %edi, %esi
12293; AVX1-NEXT: movl %ecx, %edi
12294; AVX1-NEXT: shrl $19, %edi
12295; AVX1-NEXT: andl $64, %edi
12296; AVX1-NEXT: orl %esi, %edi
12297; AVX1-NEXT: movl %ecx, %esi
12298; AVX1-NEXT: shrl $21, %esi
12299; AVX1-NEXT: andl $32, %esi
12300; AVX1-NEXT: orl %edi, %esi
12301; AVX1-NEXT: movl %ecx, %edi
12302; AVX1-NEXT: shrl $23, %edi
12303; AVX1-NEXT: andl $16, %edi
12304; AVX1-NEXT: orl %esi, %edi
12305; AVX1-NEXT: movl %ecx, %esi
12306; AVX1-NEXT: shrl $25, %esi
12307; AVX1-NEXT: andl $8, %esi
12308; AVX1-NEXT: orl %edi, %esi
12309; AVX1-NEXT: movl %ecx, %edi
12310; AVX1-NEXT: shrl $27, %edi
12311; AVX1-NEXT: andl $4, %edi
12312; AVX1-NEXT: orl %esi, %edi
12313; AVX1-NEXT: movl %ecx, %esi
12314; AVX1-NEXT: shrl $29, %esi
12315; AVX1-NEXT: andl $2, %esi
12316; AVX1-NEXT: orl %edi, %esi
12317; AVX1-NEXT: shrl $31, %ecx
12318; AVX1-NEXT: orl %esi, %ecx
12319; AVX1-NEXT: orl %edx, %ecx
12320; AVX1-NEXT: vmovd %ecx, %xmm2
12321; AVX1-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
12322; AVX1-NEXT: vpextrd $2, %xmm0, %eax
12323; AVX1-NEXT: movl %eax, %ecx
12324; AVX1-NEXT: shll $31, %ecx
12325; AVX1-NEXT: movl %eax, %edx
12326; AVX1-NEXT: andl $2, %edx
12327; AVX1-NEXT: shll $29, %edx
12328; AVX1-NEXT: orl %ecx, %edx
12329; AVX1-NEXT: movl %eax, %ecx
12330; AVX1-NEXT: andl $4, %ecx
12331; AVX1-NEXT: shll $27, %ecx
12332; AVX1-NEXT: orl %edx, %ecx
12333; AVX1-NEXT: movl %eax, %edx
12334; AVX1-NEXT: andl $8, %edx
12335; AVX1-NEXT: shll $25, %edx
12336; AVX1-NEXT: orl %ecx, %edx
12337; AVX1-NEXT: movl %eax, %esi
12338; AVX1-NEXT: andl $16, %esi
12339; AVX1-NEXT: shll $23, %esi
12340; AVX1-NEXT: orl %edx, %esi
12341; AVX1-NEXT: movl %eax, %ecx
12342; AVX1-NEXT: andl $32, %ecx
12343; AVX1-NEXT: shll $21, %ecx
12344; AVX1-NEXT: orl %esi, %ecx
12345; AVX1-NEXT: movl %eax, %edx
12346; AVX1-NEXT: andl $64, %edx
12347; AVX1-NEXT: shll $19, %edx
12348; AVX1-NEXT: movl %eax, %esi
12349; AVX1-NEXT: shll $17, %esi
12350; AVX1-NEXT: andl $16777216, %esi # imm = 0x1000000
12351; AVX1-NEXT: orl %edx, %esi
12352; AVX1-NEXT: movl %eax, %edx
12353; AVX1-NEXT: shll $15, %edx
12354; AVX1-NEXT: andl $8388608, %edx # imm = 0x800000
12355; AVX1-NEXT: orl %esi, %edx
12356; AVX1-NEXT: movl %eax, %esi
12357; AVX1-NEXT: shll $13, %esi
12358; AVX1-NEXT: andl $4194304, %esi # imm = 0x400000
12359; AVX1-NEXT: orl %edx, %esi
12360; AVX1-NEXT: movl %eax, %edx
12361; AVX1-NEXT: shll $11, %edx
12362; AVX1-NEXT: andl $2097152, %edx # imm = 0x200000
12363; AVX1-NEXT: orl %esi, %edx
12364; AVX1-NEXT: movl %eax, %esi
12365; AVX1-NEXT: shll $9, %esi
12366; AVX1-NEXT: andl $1048576, %esi # imm = 0x100000
12367; AVX1-NEXT: orl %edx, %esi
12368; AVX1-NEXT: movl %eax, %edx
12369; AVX1-NEXT: shll $7, %edx
12370; AVX1-NEXT: andl $524288, %edx # imm = 0x80000
12371; AVX1-NEXT: orl %esi, %edx
12372; AVX1-NEXT: movl %eax, %esi
12373; AVX1-NEXT: shll $5, %esi
12374; AVX1-NEXT: andl $262144, %esi # imm = 0x40000
12375; AVX1-NEXT: orl %edx, %esi
12376; AVX1-NEXT: leal (,%rax,8), %edx
12377; AVX1-NEXT: andl $131072, %edx # imm = 0x20000
12378; AVX1-NEXT: orl %esi, %edx
12379; AVX1-NEXT: leal (%rax,%rax), %esi
12380; AVX1-NEXT: andl $65536, %esi # imm = 0x10000
12381; AVX1-NEXT: orl %edx, %esi
12382; AVX1-NEXT: movl %eax, %edx
12383; AVX1-NEXT: shrl %edx
12384; AVX1-NEXT: andl $32768, %edx # imm = 0x8000
12385; AVX1-NEXT: orl %esi, %edx
12386; AVX1-NEXT: movl %eax, %esi
12387; AVX1-NEXT: shrl $3, %esi
12388; AVX1-NEXT: andl $16384, %esi # imm = 0x4000
12389; AVX1-NEXT: orl %edx, %esi
12390; AVX1-NEXT: movl %eax, %edx
12391; AVX1-NEXT: shrl $5, %edx
12392; AVX1-NEXT: andl $8192, %edx # imm = 0x2000
12393; AVX1-NEXT: orl %esi, %edx
12394; AVX1-NEXT: movl %eax, %esi
12395; AVX1-NEXT: shrl $7, %esi
12396; AVX1-NEXT: andl $4096, %esi # imm = 0x1000
12397; AVX1-NEXT: orl %edx, %esi
12398; AVX1-NEXT: movl %eax, %edx
12399; AVX1-NEXT: shrl $9, %edx
12400; AVX1-NEXT: andl $2048, %edx # imm = 0x800
12401; AVX1-NEXT: orl %esi, %edx
12402; AVX1-NEXT: movl %eax, %esi
12403; AVX1-NEXT: shrl $11, %esi
12404; AVX1-NEXT: andl $1024, %esi # imm = 0x400
12405; AVX1-NEXT: orl %edx, %esi
12406; AVX1-NEXT: movl %eax, %edx
12407; AVX1-NEXT: shrl $13, %edx
12408; AVX1-NEXT: andl $512, %edx # imm = 0x200
12409; AVX1-NEXT: orl %esi, %edx
12410; AVX1-NEXT: movl %eax, %esi
12411; AVX1-NEXT: shrl $15, %esi
12412; AVX1-NEXT: andl $256, %esi # imm = 0x100
12413; AVX1-NEXT: orl %edx, %esi
12414; AVX1-NEXT: movl %eax, %edx
12415; AVX1-NEXT: shrl $17, %edx
12416; AVX1-NEXT: andl $128, %edx
12417; AVX1-NEXT: orl %esi, %edx
12418; AVX1-NEXT: movl %eax, %esi
12419; AVX1-NEXT: shrl $19, %esi
12420; AVX1-NEXT: andl $64, %esi
12421; AVX1-NEXT: orl %edx, %esi
12422; AVX1-NEXT: movl %eax, %edx
12423; AVX1-NEXT: shrl $21, %edx
12424; AVX1-NEXT: andl $32, %edx
12425; AVX1-NEXT: orl %esi, %edx
12426; AVX1-NEXT: movl %eax, %esi
12427; AVX1-NEXT: shrl $23, %esi
12428; AVX1-NEXT: andl $16, %esi
12429; AVX1-NEXT: orl %edx, %esi
12430; AVX1-NEXT: movl %eax, %edx
12431; AVX1-NEXT: shrl $25, %edx
12432; AVX1-NEXT: andl $8, %edx
12433; AVX1-NEXT: orl %esi, %edx
12434; AVX1-NEXT: movl %eax, %esi
12435; AVX1-NEXT: shrl $27, %esi
12436; AVX1-NEXT: andl $4, %esi
12437; AVX1-NEXT: orl %edx, %esi
12438; AVX1-NEXT: movl %eax, %edx
12439; AVX1-NEXT: shrl $29, %edx
12440; AVX1-NEXT: andl $2, %edx
12441; AVX1-NEXT: orl %esi, %edx
12442; AVX1-NEXT: shrl $31, %eax
12443; AVX1-NEXT: orl %edx, %eax
12444; AVX1-NEXT: orl %ecx, %eax
12445; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
12446; AVX1-NEXT: vpextrd $3, %xmm0, %eax
12447; AVX1-NEXT: movl %eax, %ecx
12448; AVX1-NEXT: shll $31, %ecx
12449; AVX1-NEXT: movl %eax, %edx
12450; AVX1-NEXT: andl $2, %edx
12451; AVX1-NEXT: shll $29, %edx
12452; AVX1-NEXT: orl %ecx, %edx
12453; AVX1-NEXT: movl %eax, %ecx
12454; AVX1-NEXT: andl $4, %ecx
12455; AVX1-NEXT: shll $27, %ecx
12456; AVX1-NEXT: orl %edx, %ecx
12457; AVX1-NEXT: movl %eax, %edx
12458; AVX1-NEXT: andl $8, %edx
12459; AVX1-NEXT: shll $25, %edx
12460; AVX1-NEXT: orl %ecx, %edx
12461; AVX1-NEXT: movl %eax, %esi
12462; AVX1-NEXT: andl $16, %esi
12463; AVX1-NEXT: shll $23, %esi
12464; AVX1-NEXT: orl %edx, %esi
12465; AVX1-NEXT: movl %eax, %ecx
12466; AVX1-NEXT: andl $32, %ecx
12467; AVX1-NEXT: shll $21, %ecx
12468; AVX1-NEXT: orl %esi, %ecx
12469; AVX1-NEXT: movl %eax, %edx
12470; AVX1-NEXT: andl $64, %edx
12471; AVX1-NEXT: shll $19, %edx
12472; AVX1-NEXT: movl %eax, %esi
12473; AVX1-NEXT: shll $17, %esi
12474; AVX1-NEXT: andl $16777216, %esi # imm = 0x1000000
12475; AVX1-NEXT: orl %edx, %esi
12476; AVX1-NEXT: movl %eax, %edx
12477; AVX1-NEXT: shll $15, %edx
12478; AVX1-NEXT: andl $8388608, %edx # imm = 0x800000
12479; AVX1-NEXT: orl %esi, %edx
12480; AVX1-NEXT: movl %eax, %esi
12481; AVX1-NEXT: shll $13, %esi
12482; AVX1-NEXT: andl $4194304, %esi # imm = 0x400000
12483; AVX1-NEXT: orl %edx, %esi
12484; AVX1-NEXT: movl %eax, %edx
12485; AVX1-NEXT: shll $11, %edx
12486; AVX1-NEXT: andl $2097152, %edx # imm = 0x200000
12487; AVX1-NEXT: orl %esi, %edx
12488; AVX1-NEXT: movl %eax, %esi
12489; AVX1-NEXT: shll $9, %esi
12490; AVX1-NEXT: andl $1048576, %esi # imm = 0x100000
12491; AVX1-NEXT: orl %edx, %esi
12492; AVX1-NEXT: movl %eax, %edx
12493; AVX1-NEXT: shll $7, %edx
12494; AVX1-NEXT: andl $524288, %edx # imm = 0x80000
12495; AVX1-NEXT: orl %esi, %edx
12496; AVX1-NEXT: movl %eax, %esi
12497; AVX1-NEXT: shll $5, %esi
12498; AVX1-NEXT: andl $262144, %esi # imm = 0x40000
12499; AVX1-NEXT: orl %edx, %esi
12500; AVX1-NEXT: leal (,%rax,8), %edx
12501; AVX1-NEXT: andl $131072, %edx # imm = 0x20000
12502; AVX1-NEXT: orl %esi, %edx
12503; AVX1-NEXT: leal (%rax,%rax), %esi
12504; AVX1-NEXT: andl $65536, %esi # imm = 0x10000
12505; AVX1-NEXT: orl %edx, %esi
12506; AVX1-NEXT: movl %eax, %edx
12507; AVX1-NEXT: shrl %edx
12508; AVX1-NEXT: andl $32768, %edx # imm = 0x8000
12509; AVX1-NEXT: orl %esi, %edx
12510; AVX1-NEXT: movl %eax, %esi
12511; AVX1-NEXT: shrl $3, %esi
12512; AVX1-NEXT: andl $16384, %esi # imm = 0x4000
12513; AVX1-NEXT: orl %edx, %esi
12514; AVX1-NEXT: movl %eax, %edx
12515; AVX1-NEXT: shrl $5, %edx
12516; AVX1-NEXT: andl $8192, %edx # imm = 0x2000
12517; AVX1-NEXT: orl %esi, %edx
12518; AVX1-NEXT: movl %eax, %esi
12519; AVX1-NEXT: shrl $7, %esi
12520; AVX1-NEXT: andl $4096, %esi # imm = 0x1000
12521; AVX1-NEXT: orl %edx, %esi
12522; AVX1-NEXT: movl %eax, %edx
12523; AVX1-NEXT: shrl $9, %edx
12524; AVX1-NEXT: andl $2048, %edx # imm = 0x800
12525; AVX1-NEXT: orl %esi, %edx
12526; AVX1-NEXT: movl %eax, %esi
12527; AVX1-NEXT: shrl $11, %esi
12528; AVX1-NEXT: andl $1024, %esi # imm = 0x400
12529; AVX1-NEXT: orl %edx, %esi
12530; AVX1-NEXT: movl %eax, %edx
12531; AVX1-NEXT: shrl $13, %edx
12532; AVX1-NEXT: andl $512, %edx # imm = 0x200
12533; AVX1-NEXT: orl %esi, %edx
12534; AVX1-NEXT: movl %eax, %esi
12535; AVX1-NEXT: shrl $15, %esi
12536; AVX1-NEXT: andl $256, %esi # imm = 0x100
12537; AVX1-NEXT: orl %edx, %esi
12538; AVX1-NEXT: movl %eax, %edx
12539; AVX1-NEXT: shrl $17, %edx
12540; AVX1-NEXT: andl $128, %edx
12541; AVX1-NEXT: orl %esi, %edx
12542; AVX1-NEXT: movl %eax, %esi
12543; AVX1-NEXT: shrl $19, %esi
12544; AVX1-NEXT: andl $64, %esi
12545; AVX1-NEXT: orl %edx, %esi
12546; AVX1-NEXT: movl %eax, %edx
12547; AVX1-NEXT: shrl $21, %edx
12548; AVX1-NEXT: andl $32, %edx
12549; AVX1-NEXT: orl %esi, %edx
12550; AVX1-NEXT: movl %eax, %esi
12551; AVX1-NEXT: shrl $23, %esi
12552; AVX1-NEXT: andl $16, %esi
12553; AVX1-NEXT: orl %edx, %esi
12554; AVX1-NEXT: movl %eax, %edx
12555; AVX1-NEXT: shrl $25, %edx
12556; AVX1-NEXT: andl $8, %edx
12557; AVX1-NEXT: orl %esi, %edx
12558; AVX1-NEXT: movl %eax, %esi
12559; AVX1-NEXT: shrl $27, %esi
12560; AVX1-NEXT: andl $4, %esi
12561; AVX1-NEXT: orl %edx, %esi
12562; AVX1-NEXT: movl %eax, %edx
12563; AVX1-NEXT: shrl $29, %edx
12564; AVX1-NEXT: andl $2, %edx
12565; AVX1-NEXT: orl %esi, %edx
12566; AVX1-NEXT: shrl $31, %eax
12567; AVX1-NEXT: orl %edx, %eax
12568; AVX1-NEXT: orl %ecx, %eax
12569; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0
12570; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
12571; AVX1-NEXT: retq
12572;
12573; AVX2-LABEL: test_bitreverse_v8i32:
12574; AVX2: # BB#0:
12575; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
12576; AVX2-NEXT: vpextrd $1, %xmm1, %eax
12577; AVX2-NEXT: movl %eax, %ecx
12578; AVX2-NEXT: shll $31, %ecx
12579; AVX2-NEXT: movl %eax, %edx
12580; AVX2-NEXT: andl $2, %edx
12581; AVX2-NEXT: shll $29, %edx
12582; AVX2-NEXT: orl %ecx, %edx
12583; AVX2-NEXT: movl %eax, %ecx
12584; AVX2-NEXT: andl $4, %ecx
12585; AVX2-NEXT: shll $27, %ecx
12586; AVX2-NEXT: orl %edx, %ecx
12587; AVX2-NEXT: movl %eax, %edx
12588; AVX2-NEXT: andl $8, %edx
12589; AVX2-NEXT: shll $25, %edx
12590; AVX2-NEXT: orl %ecx, %edx
12591; AVX2-NEXT: movl %eax, %esi
12592; AVX2-NEXT: andl $16, %esi
12593; AVX2-NEXT: shll $23, %esi
12594; AVX2-NEXT: orl %edx, %esi
12595; AVX2-NEXT: movl %eax, %ecx
12596; AVX2-NEXT: andl $32, %ecx
12597; AVX2-NEXT: shll $21, %ecx
12598; AVX2-NEXT: orl %esi, %ecx
12599; AVX2-NEXT: movl %eax, %edx
12600; AVX2-NEXT: andl $64, %edx
12601; AVX2-NEXT: shll $19, %edx
12602; AVX2-NEXT: movl %eax, %esi
12603; AVX2-NEXT: shll $17, %esi
12604; AVX2-NEXT: andl $16777216, %esi # imm = 0x1000000
12605; AVX2-NEXT: orl %edx, %esi
12606; AVX2-NEXT: movl %eax, %edx
12607; AVX2-NEXT: shll $15, %edx
12608; AVX2-NEXT: andl $8388608, %edx # imm = 0x800000
12609; AVX2-NEXT: orl %esi, %edx
12610; AVX2-NEXT: movl %eax, %esi
12611; AVX2-NEXT: shll $13, %esi
12612; AVX2-NEXT: andl $4194304, %esi # imm = 0x400000
12613; AVX2-NEXT: orl %edx, %esi
12614; AVX2-NEXT: movl %eax, %edx
12615; AVX2-NEXT: shll $11, %edx
12616; AVX2-NEXT: andl $2097152, %edx # imm = 0x200000
12617; AVX2-NEXT: orl %esi, %edx
12618; AVX2-NEXT: movl %eax, %esi
12619; AVX2-NEXT: shll $9, %esi
12620; AVX2-NEXT: andl $1048576, %esi # imm = 0x100000
12621; AVX2-NEXT: orl %edx, %esi
12622; AVX2-NEXT: movl %eax, %edx
12623; AVX2-NEXT: shll $7, %edx
12624; AVX2-NEXT: andl $524288, %edx # imm = 0x80000
12625; AVX2-NEXT: orl %esi, %edx
12626; AVX2-NEXT: movl %eax, %esi
12627; AVX2-NEXT: shll $5, %esi
12628; AVX2-NEXT: andl $262144, %esi # imm = 0x40000
12629; AVX2-NEXT: orl %edx, %esi
12630; AVX2-NEXT: leal (,%rax,8), %edx
12631; AVX2-NEXT: andl $131072, %edx # imm = 0x20000
12632; AVX2-NEXT: orl %esi, %edx
12633; AVX2-NEXT: leal (%rax,%rax), %esi
12634; AVX2-NEXT: andl $65536, %esi # imm = 0x10000
12635; AVX2-NEXT: orl %edx, %esi
12636; AVX2-NEXT: movl %eax, %edx
12637; AVX2-NEXT: shrl %edx
12638; AVX2-NEXT: andl $32768, %edx # imm = 0x8000
12639; AVX2-NEXT: orl %esi, %edx
12640; AVX2-NEXT: movl %eax, %esi
12641; AVX2-NEXT: shrl $3, %esi
12642; AVX2-NEXT: andl $16384, %esi # imm = 0x4000
12643; AVX2-NEXT: orl %edx, %esi
12644; AVX2-NEXT: movl %eax, %edx
12645; AVX2-NEXT: shrl $5, %edx
12646; AVX2-NEXT: andl $8192, %edx # imm = 0x2000
12647; AVX2-NEXT: orl %esi, %edx
12648; AVX2-NEXT: movl %eax, %esi
12649; AVX2-NEXT: shrl $7, %esi
12650; AVX2-NEXT: andl $4096, %esi # imm = 0x1000
12651; AVX2-NEXT: orl %edx, %esi
12652; AVX2-NEXT: movl %eax, %edx
12653; AVX2-NEXT: shrl $9, %edx
12654; AVX2-NEXT: andl $2048, %edx # imm = 0x800
12655; AVX2-NEXT: orl %esi, %edx
12656; AVX2-NEXT: movl %eax, %esi
12657; AVX2-NEXT: shrl $11, %esi
12658; AVX2-NEXT: andl $1024, %esi # imm = 0x400
12659; AVX2-NEXT: orl %edx, %esi
12660; AVX2-NEXT: movl %eax, %edx
12661; AVX2-NEXT: shrl $13, %edx
12662; AVX2-NEXT: andl $512, %edx # imm = 0x200
12663; AVX2-NEXT: orl %esi, %edx
12664; AVX2-NEXT: movl %eax, %esi
12665; AVX2-NEXT: shrl $15, %esi
12666; AVX2-NEXT: andl $256, %esi # imm = 0x100
12667; AVX2-NEXT: orl %edx, %esi
12668; AVX2-NEXT: movl %eax, %edx
12669; AVX2-NEXT: shrl $17, %edx
12670; AVX2-NEXT: andl $128, %edx
12671; AVX2-NEXT: orl %esi, %edx
12672; AVX2-NEXT: movl %eax, %esi
12673; AVX2-NEXT: shrl $19, %esi
12674; AVX2-NEXT: andl $64, %esi
12675; AVX2-NEXT: orl %edx, %esi
12676; AVX2-NEXT: movl %eax, %edx
12677; AVX2-NEXT: shrl $21, %edx
12678; AVX2-NEXT: andl $32, %edx
12679; AVX2-NEXT: orl %esi, %edx
12680; AVX2-NEXT: movl %eax, %esi
12681; AVX2-NEXT: shrl $23, %esi
12682; AVX2-NEXT: andl $16, %esi
12683; AVX2-NEXT: orl %edx, %esi
12684; AVX2-NEXT: movl %eax, %edx
12685; AVX2-NEXT: shrl $25, %edx
12686; AVX2-NEXT: andl $8, %edx
12687; AVX2-NEXT: orl %esi, %edx
12688; AVX2-NEXT: movl %eax, %esi
12689; AVX2-NEXT: shrl $27, %esi
12690; AVX2-NEXT: andl $4, %esi
12691; AVX2-NEXT: orl %edx, %esi
12692; AVX2-NEXT: movl %eax, %edx
12693; AVX2-NEXT: shrl $29, %edx
12694; AVX2-NEXT: andl $2, %edx
12695; AVX2-NEXT: orl %esi, %edx
12696; AVX2-NEXT: shrl $31, %eax
12697; AVX2-NEXT: orl %edx, %eax
12698; AVX2-NEXT: orl %ecx, %eax
12699; AVX2-NEXT: vmovd %xmm1, %ecx
12700; AVX2-NEXT: movl %ecx, %edx
12701; AVX2-NEXT: shll $31, %edx
12702; AVX2-NEXT: movl %ecx, %esi
12703; AVX2-NEXT: andl $2, %esi
12704; AVX2-NEXT: shll $29, %esi
12705; AVX2-NEXT: orl %edx, %esi
12706; AVX2-NEXT: movl %ecx, %edx
12707; AVX2-NEXT: andl $4, %edx
12708; AVX2-NEXT: shll $27, %edx
12709; AVX2-NEXT: orl %esi, %edx
12710; AVX2-NEXT: movl %ecx, %esi
12711; AVX2-NEXT: andl $8, %esi
12712; AVX2-NEXT: shll $25, %esi
12713; AVX2-NEXT: orl %edx, %esi
12714; AVX2-NEXT: movl %ecx, %edi
12715; AVX2-NEXT: andl $16, %edi
12716; AVX2-NEXT: shll $23, %edi
12717; AVX2-NEXT: orl %esi, %edi
12718; AVX2-NEXT: movl %ecx, %edx
12719; AVX2-NEXT: andl $32, %edx
12720; AVX2-NEXT: shll $21, %edx
12721; AVX2-NEXT: orl %edi, %edx
12722; AVX2-NEXT: movl %ecx, %esi
12723; AVX2-NEXT: andl $64, %esi
12724; AVX2-NEXT: shll $19, %esi
12725; AVX2-NEXT: movl %ecx, %edi
12726; AVX2-NEXT: shll $17, %edi
12727; AVX2-NEXT: andl $16777216, %edi # imm = 0x1000000
12728; AVX2-NEXT: orl %esi, %edi
12729; AVX2-NEXT: movl %ecx, %esi
12730; AVX2-NEXT: shll $15, %esi
12731; AVX2-NEXT: andl $8388608, %esi # imm = 0x800000
12732; AVX2-NEXT: orl %edi, %esi
12733; AVX2-NEXT: movl %ecx, %edi
12734; AVX2-NEXT: shll $13, %edi
12735; AVX2-NEXT: andl $4194304, %edi # imm = 0x400000
12736; AVX2-NEXT: orl %esi, %edi
12737; AVX2-NEXT: movl %ecx, %esi
12738; AVX2-NEXT: shll $11, %esi
12739; AVX2-NEXT: andl $2097152, %esi # imm = 0x200000
12740; AVX2-NEXT: orl %edi, %esi
12741; AVX2-NEXT: movl %ecx, %edi
12742; AVX2-NEXT: shll $9, %edi
12743; AVX2-NEXT: andl $1048576, %edi # imm = 0x100000
12744; AVX2-NEXT: orl %esi, %edi
12745; AVX2-NEXT: movl %ecx, %esi
12746; AVX2-NEXT: shll $7, %esi
12747; AVX2-NEXT: andl $524288, %esi # imm = 0x80000
12748; AVX2-NEXT: orl %edi, %esi
12749; AVX2-NEXT: movl %ecx, %edi
12750; AVX2-NEXT: shll $5, %edi
12751; AVX2-NEXT: andl $262144, %edi # imm = 0x40000
12752; AVX2-NEXT: orl %esi, %edi
12753; AVX2-NEXT: leal (,%rcx,8), %esi
12754; AVX2-NEXT: andl $131072, %esi # imm = 0x20000
12755; AVX2-NEXT: orl %edi, %esi
12756; AVX2-NEXT: leal (%rcx,%rcx), %edi
12757; AVX2-NEXT: andl $65536, %edi # imm = 0x10000
12758; AVX2-NEXT: orl %esi, %edi
12759; AVX2-NEXT: movl %ecx, %esi
12760; AVX2-NEXT: shrl %esi
12761; AVX2-NEXT: andl $32768, %esi # imm = 0x8000
12762; AVX2-NEXT: orl %edi, %esi
12763; AVX2-NEXT: movl %ecx, %edi
12764; AVX2-NEXT: shrl $3, %edi
12765; AVX2-NEXT: andl $16384, %edi # imm = 0x4000
12766; AVX2-NEXT: orl %esi, %edi
12767; AVX2-NEXT: movl %ecx, %esi
12768; AVX2-NEXT: shrl $5, %esi
12769; AVX2-NEXT: andl $8192, %esi # imm = 0x2000
12770; AVX2-NEXT: orl %edi, %esi
12771; AVX2-NEXT: movl %ecx, %edi
12772; AVX2-NEXT: shrl $7, %edi
12773; AVX2-NEXT: andl $4096, %edi # imm = 0x1000
12774; AVX2-NEXT: orl %esi, %edi
12775; AVX2-NEXT: movl %ecx, %esi
12776; AVX2-NEXT: shrl $9, %esi
12777; AVX2-NEXT: andl $2048, %esi # imm = 0x800
12778; AVX2-NEXT: orl %edi, %esi
12779; AVX2-NEXT: movl %ecx, %edi
12780; AVX2-NEXT: shrl $11, %edi
12781; AVX2-NEXT: andl $1024, %edi # imm = 0x400
12782; AVX2-NEXT: orl %esi, %edi
12783; AVX2-NEXT: movl %ecx, %esi
12784; AVX2-NEXT: shrl $13, %esi
12785; AVX2-NEXT: andl $512, %esi # imm = 0x200
12786; AVX2-NEXT: orl %edi, %esi
12787; AVX2-NEXT: movl %ecx, %edi
12788; AVX2-NEXT: shrl $15, %edi
12789; AVX2-NEXT: andl $256, %edi # imm = 0x100
12790; AVX2-NEXT: orl %esi, %edi
12791; AVX2-NEXT: movl %ecx, %esi
12792; AVX2-NEXT: shrl $17, %esi
12793; AVX2-NEXT: andl $128, %esi
12794; AVX2-NEXT: orl %edi, %esi
12795; AVX2-NEXT: movl %ecx, %edi
12796; AVX2-NEXT: shrl $19, %edi
12797; AVX2-NEXT: andl $64, %edi
12798; AVX2-NEXT: orl %esi, %edi
12799; AVX2-NEXT: movl %ecx, %esi
12800; AVX2-NEXT: shrl $21, %esi
12801; AVX2-NEXT: andl $32, %esi
12802; AVX2-NEXT: orl %edi, %esi
12803; AVX2-NEXT: movl %ecx, %edi
12804; AVX2-NEXT: shrl $23, %edi
12805; AVX2-NEXT: andl $16, %edi
12806; AVX2-NEXT: orl %esi, %edi
12807; AVX2-NEXT: movl %ecx, %esi
12808; AVX2-NEXT: shrl $25, %esi
12809; AVX2-NEXT: andl $8, %esi
12810; AVX2-NEXT: orl %edi, %esi
12811; AVX2-NEXT: movl %ecx, %edi
12812; AVX2-NEXT: shrl $27, %edi
12813; AVX2-NEXT: andl $4, %edi
12814; AVX2-NEXT: orl %esi, %edi
12815; AVX2-NEXT: movl %ecx, %esi
12816; AVX2-NEXT: shrl $29, %esi
12817; AVX2-NEXT: andl $2, %esi
12818; AVX2-NEXT: orl %edi, %esi
12819; AVX2-NEXT: shrl $31, %ecx
12820; AVX2-NEXT: orl %esi, %ecx
12821; AVX2-NEXT: orl %edx, %ecx
12822; AVX2-NEXT: vmovd %ecx, %xmm2
12823; AVX2-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
12824; AVX2-NEXT: vpextrd $2, %xmm1, %eax
12825; AVX2-NEXT: movl %eax, %ecx
12826; AVX2-NEXT: shll $31, %ecx
12827; AVX2-NEXT: movl %eax, %edx
12828; AVX2-NEXT: andl $2, %edx
12829; AVX2-NEXT: shll $29, %edx
12830; AVX2-NEXT: orl %ecx, %edx
12831; AVX2-NEXT: movl %eax, %ecx
12832; AVX2-NEXT: andl $4, %ecx
12833; AVX2-NEXT: shll $27, %ecx
12834; AVX2-NEXT: orl %edx, %ecx
12835; AVX2-NEXT: movl %eax, %edx
12836; AVX2-NEXT: andl $8, %edx
12837; AVX2-NEXT: shll $25, %edx
12838; AVX2-NEXT: orl %ecx, %edx
12839; AVX2-NEXT: movl %eax, %esi
12840; AVX2-NEXT: andl $16, %esi
12841; AVX2-NEXT: shll $23, %esi
12842; AVX2-NEXT: orl %edx, %esi
12843; AVX2-NEXT: movl %eax, %ecx
12844; AVX2-NEXT: andl $32, %ecx
12845; AVX2-NEXT: shll $21, %ecx
12846; AVX2-NEXT: orl %esi, %ecx
12847; AVX2-NEXT: movl %eax, %edx
12848; AVX2-NEXT: andl $64, %edx
12849; AVX2-NEXT: shll $19, %edx
12850; AVX2-NEXT: movl %eax, %esi
12851; AVX2-NEXT: shll $17, %esi
12852; AVX2-NEXT: andl $16777216, %esi # imm = 0x1000000
12853; AVX2-NEXT: orl %edx, %esi
12854; AVX2-NEXT: movl %eax, %edx
12855; AVX2-NEXT: shll $15, %edx
12856; AVX2-NEXT: andl $8388608, %edx # imm = 0x800000
12857; AVX2-NEXT: orl %esi, %edx
12858; AVX2-NEXT: movl %eax, %esi
12859; AVX2-NEXT: shll $13, %esi
12860; AVX2-NEXT: andl $4194304, %esi # imm = 0x400000
12861; AVX2-NEXT: orl %edx, %esi
12862; AVX2-NEXT: movl %eax, %edx
12863; AVX2-NEXT: shll $11, %edx
12864; AVX2-NEXT: andl $2097152, %edx # imm = 0x200000
12865; AVX2-NEXT: orl %esi, %edx
12866; AVX2-NEXT: movl %eax, %esi
12867; AVX2-NEXT: shll $9, %esi
12868; AVX2-NEXT: andl $1048576, %esi # imm = 0x100000
12869; AVX2-NEXT: orl %edx, %esi
12870; AVX2-NEXT: movl %eax, %edx
12871; AVX2-NEXT: shll $7, %edx
12872; AVX2-NEXT: andl $524288, %edx # imm = 0x80000
12873; AVX2-NEXT: orl %esi, %edx
12874; AVX2-NEXT: movl %eax, %esi
12875; AVX2-NEXT: shll $5, %esi
12876; AVX2-NEXT: andl $262144, %esi # imm = 0x40000
12877; AVX2-NEXT: orl %edx, %esi
12878; AVX2-NEXT: leal (,%rax,8), %edx
12879; AVX2-NEXT: andl $131072, %edx # imm = 0x20000
12880; AVX2-NEXT: orl %esi, %edx
12881; AVX2-NEXT: leal (%rax,%rax), %esi
12882; AVX2-NEXT: andl $65536, %esi # imm = 0x10000
12883; AVX2-NEXT: orl %edx, %esi
12884; AVX2-NEXT: movl %eax, %edx
12885; AVX2-NEXT: shrl %edx
12886; AVX2-NEXT: andl $32768, %edx # imm = 0x8000
12887; AVX2-NEXT: orl %esi, %edx
12888; AVX2-NEXT: movl %eax, %esi
12889; AVX2-NEXT: shrl $3, %esi
12890; AVX2-NEXT: andl $16384, %esi # imm = 0x4000
12891; AVX2-NEXT: orl %edx, %esi
12892; AVX2-NEXT: movl %eax, %edx
12893; AVX2-NEXT: shrl $5, %edx
12894; AVX2-NEXT: andl $8192, %edx # imm = 0x2000
12895; AVX2-NEXT: orl %esi, %edx
12896; AVX2-NEXT: movl %eax, %esi
12897; AVX2-NEXT: shrl $7, %esi
12898; AVX2-NEXT: andl $4096, %esi # imm = 0x1000
12899; AVX2-NEXT: orl %edx, %esi
12900; AVX2-NEXT: movl %eax, %edx
12901; AVX2-NEXT: shrl $9, %edx
12902; AVX2-NEXT: andl $2048, %edx # imm = 0x800
12903; AVX2-NEXT: orl %esi, %edx
12904; AVX2-NEXT: movl %eax, %esi
12905; AVX2-NEXT: shrl $11, %esi
12906; AVX2-NEXT: andl $1024, %esi # imm = 0x400
12907; AVX2-NEXT: orl %edx, %esi
12908; AVX2-NEXT: movl %eax, %edx
12909; AVX2-NEXT: shrl $13, %edx
12910; AVX2-NEXT: andl $512, %edx # imm = 0x200
12911; AVX2-NEXT: orl %esi, %edx
12912; AVX2-NEXT: movl %eax, %esi
12913; AVX2-NEXT: shrl $15, %esi
12914; AVX2-NEXT: andl $256, %esi # imm = 0x100
12915; AVX2-NEXT: orl %edx, %esi
12916; AVX2-NEXT: movl %eax, %edx
12917; AVX2-NEXT: shrl $17, %edx
12918; AVX2-NEXT: andl $128, %edx
12919; AVX2-NEXT: orl %esi, %edx
12920; AVX2-NEXT: movl %eax, %esi
12921; AVX2-NEXT: shrl $19, %esi
12922; AVX2-NEXT: andl $64, %esi
12923; AVX2-NEXT: orl %edx, %esi
12924; AVX2-NEXT: movl %eax, %edx
12925; AVX2-NEXT: shrl $21, %edx
12926; AVX2-NEXT: andl $32, %edx
12927; AVX2-NEXT: orl %esi, %edx
12928; AVX2-NEXT: movl %eax, %esi
12929; AVX2-NEXT: shrl $23, %esi
12930; AVX2-NEXT: andl $16, %esi
12931; AVX2-NEXT: orl %edx, %esi
12932; AVX2-NEXT: movl %eax, %edx
12933; AVX2-NEXT: shrl $25, %edx
12934; AVX2-NEXT: andl $8, %edx
12935; AVX2-NEXT: orl %esi, %edx
12936; AVX2-NEXT: movl %eax, %esi
12937; AVX2-NEXT: shrl $27, %esi
12938; AVX2-NEXT: andl $4, %esi
12939; AVX2-NEXT: orl %edx, %esi
12940; AVX2-NEXT: movl %eax, %edx
12941; AVX2-NEXT: shrl $29, %edx
12942; AVX2-NEXT: andl $2, %edx
12943; AVX2-NEXT: orl %esi, %edx
12944; AVX2-NEXT: shrl $31, %eax
12945; AVX2-NEXT: orl %edx, %eax
12946; AVX2-NEXT: orl %ecx, %eax
12947; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
12948; AVX2-NEXT: vpextrd $3, %xmm1, %eax
12949; AVX2-NEXT: movl %eax, %ecx
12950; AVX2-NEXT: shll $31, %ecx
12951; AVX2-NEXT: movl %eax, %edx
12952; AVX2-NEXT: andl $2, %edx
12953; AVX2-NEXT: shll $29, %edx
12954; AVX2-NEXT: orl %ecx, %edx
12955; AVX2-NEXT: movl %eax, %ecx
12956; AVX2-NEXT: andl $4, %ecx
12957; AVX2-NEXT: shll $27, %ecx
12958; AVX2-NEXT: orl %edx, %ecx
12959; AVX2-NEXT: movl %eax, %edx
12960; AVX2-NEXT: andl $8, %edx
12961; AVX2-NEXT: shll $25, %edx
12962; AVX2-NEXT: orl %ecx, %edx
12963; AVX2-NEXT: movl %eax, %esi
12964; AVX2-NEXT: andl $16, %esi
12965; AVX2-NEXT: shll $23, %esi
12966; AVX2-NEXT: orl %edx, %esi
12967; AVX2-NEXT: movl %eax, %ecx
12968; AVX2-NEXT: andl $32, %ecx
12969; AVX2-NEXT: shll $21, %ecx
12970; AVX2-NEXT: orl %esi, %ecx
12971; AVX2-NEXT: movl %eax, %edx
12972; AVX2-NEXT: andl $64, %edx
12973; AVX2-NEXT: shll $19, %edx
12974; AVX2-NEXT: movl %eax, %esi
12975; AVX2-NEXT: shll $17, %esi
12976; AVX2-NEXT: andl $16777216, %esi # imm = 0x1000000
12977; AVX2-NEXT: orl %edx, %esi
12978; AVX2-NEXT: movl %eax, %edx
12979; AVX2-NEXT: shll $15, %edx
12980; AVX2-NEXT: andl $8388608, %edx # imm = 0x800000
12981; AVX2-NEXT: orl %esi, %edx
12982; AVX2-NEXT: movl %eax, %esi
12983; AVX2-NEXT: shll $13, %esi
12984; AVX2-NEXT: andl $4194304, %esi # imm = 0x400000
12985; AVX2-NEXT: orl %edx, %esi
12986; AVX2-NEXT: movl %eax, %edx
12987; AVX2-NEXT: shll $11, %edx
12988; AVX2-NEXT: andl $2097152, %edx # imm = 0x200000
12989; AVX2-NEXT: orl %esi, %edx
12990; AVX2-NEXT: movl %eax, %esi
12991; AVX2-NEXT: shll $9, %esi
12992; AVX2-NEXT: andl $1048576, %esi # imm = 0x100000
12993; AVX2-NEXT: orl %edx, %esi
12994; AVX2-NEXT: movl %eax, %edx
12995; AVX2-NEXT: shll $7, %edx
12996; AVX2-NEXT: andl $524288, %edx # imm = 0x80000
12997; AVX2-NEXT: orl %esi, %edx
12998; AVX2-NEXT: movl %eax, %esi
12999; AVX2-NEXT: shll $5, %esi
13000; AVX2-NEXT: andl $262144, %esi # imm = 0x40000
13001; AVX2-NEXT: orl %edx, %esi
13002; AVX2-NEXT: leal (,%rax,8), %edx
13003; AVX2-NEXT: andl $131072, %edx # imm = 0x20000
13004; AVX2-NEXT: orl %esi, %edx
13005; AVX2-NEXT: leal (%rax,%rax), %esi
13006; AVX2-NEXT: andl $65536, %esi # imm = 0x10000
13007; AVX2-NEXT: orl %edx, %esi
13008; AVX2-NEXT: movl %eax, %edx
13009; AVX2-NEXT: shrl %edx
13010; AVX2-NEXT: andl $32768, %edx # imm = 0x8000
13011; AVX2-NEXT: orl %esi, %edx
13012; AVX2-NEXT: movl %eax, %esi
13013; AVX2-NEXT: shrl $3, %esi
13014; AVX2-NEXT: andl $16384, %esi # imm = 0x4000
13015; AVX2-NEXT: orl %edx, %esi
13016; AVX2-NEXT: movl %eax, %edx
13017; AVX2-NEXT: shrl $5, %edx
13018; AVX2-NEXT: andl $8192, %edx # imm = 0x2000
13019; AVX2-NEXT: orl %esi, %edx
13020; AVX2-NEXT: movl %eax, %esi
13021; AVX2-NEXT: shrl $7, %esi
13022; AVX2-NEXT: andl $4096, %esi # imm = 0x1000
13023; AVX2-NEXT: orl %edx, %esi
13024; AVX2-NEXT: movl %eax, %edx
13025; AVX2-NEXT: shrl $9, %edx
13026; AVX2-NEXT: andl $2048, %edx # imm = 0x800
13027; AVX2-NEXT: orl %esi, %edx
13028; AVX2-NEXT: movl %eax, %esi
13029; AVX2-NEXT: shrl $11, %esi
13030; AVX2-NEXT: andl $1024, %esi # imm = 0x400
13031; AVX2-NEXT: orl %edx, %esi
13032; AVX2-NEXT: movl %eax, %edx
13033; AVX2-NEXT: shrl $13, %edx
13034; AVX2-NEXT: andl $512, %edx # imm = 0x200
13035; AVX2-NEXT: orl %esi, %edx
13036; AVX2-NEXT: movl %eax, %esi
13037; AVX2-NEXT: shrl $15, %esi
13038; AVX2-NEXT: andl $256, %esi # imm = 0x100
13039; AVX2-NEXT: orl %edx, %esi
13040; AVX2-NEXT: movl %eax, %edx
13041; AVX2-NEXT: shrl $17, %edx
13042; AVX2-NEXT: andl $128, %edx
13043; AVX2-NEXT: orl %esi, %edx
13044; AVX2-NEXT: movl %eax, %esi
13045; AVX2-NEXT: shrl $19, %esi
13046; AVX2-NEXT: andl $64, %esi
13047; AVX2-NEXT: orl %edx, %esi
13048; AVX2-NEXT: movl %eax, %edx
13049; AVX2-NEXT: shrl $21, %edx
13050; AVX2-NEXT: andl $32, %edx
13051; AVX2-NEXT: orl %esi, %edx
13052; AVX2-NEXT: movl %eax, %esi
13053; AVX2-NEXT: shrl $23, %esi
13054; AVX2-NEXT: andl $16, %esi
13055; AVX2-NEXT: orl %edx, %esi
13056; AVX2-NEXT: movl %eax, %edx
13057; AVX2-NEXT: shrl $25, %edx
13058; AVX2-NEXT: andl $8, %edx
13059; AVX2-NEXT: orl %esi, %edx
13060; AVX2-NEXT: movl %eax, %esi
13061; AVX2-NEXT: shrl $27, %esi
13062; AVX2-NEXT: andl $4, %esi
13063; AVX2-NEXT: orl %edx, %esi
13064; AVX2-NEXT: movl %eax, %edx
13065; AVX2-NEXT: shrl $29, %edx
13066; AVX2-NEXT: andl $2, %edx
13067; AVX2-NEXT: orl %esi, %edx
13068; AVX2-NEXT: shrl $31, %eax
13069; AVX2-NEXT: orl %edx, %eax
13070; AVX2-NEXT: orl %ecx, %eax
13071; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1
13072; AVX2-NEXT: vpextrd $1, %xmm0, %eax
13073; AVX2-NEXT: movl %eax, %ecx
13074; AVX2-NEXT: shll $31, %ecx
13075; AVX2-NEXT: movl %eax, %edx
13076; AVX2-NEXT: andl $2, %edx
13077; AVX2-NEXT: shll $29, %edx
13078; AVX2-NEXT: orl %ecx, %edx
13079; AVX2-NEXT: movl %eax, %ecx
13080; AVX2-NEXT: andl $4, %ecx
13081; AVX2-NEXT: shll $27, %ecx
13082; AVX2-NEXT: orl %edx, %ecx
13083; AVX2-NEXT: movl %eax, %edx
13084; AVX2-NEXT: andl $8, %edx
13085; AVX2-NEXT: shll $25, %edx
13086; AVX2-NEXT: orl %ecx, %edx
13087; AVX2-NEXT: movl %eax, %esi
13088; AVX2-NEXT: andl $16, %esi
13089; AVX2-NEXT: shll $23, %esi
13090; AVX2-NEXT: orl %edx, %esi
13091; AVX2-NEXT: movl %eax, %ecx
13092; AVX2-NEXT: andl $32, %ecx
13093; AVX2-NEXT: shll $21, %ecx
13094; AVX2-NEXT: orl %esi, %ecx
13095; AVX2-NEXT: movl %eax, %edx
13096; AVX2-NEXT: andl $64, %edx
13097; AVX2-NEXT: shll $19, %edx
13098; AVX2-NEXT: movl %eax, %esi
13099; AVX2-NEXT: shll $17, %esi
13100; AVX2-NEXT: andl $16777216, %esi # imm = 0x1000000
13101; AVX2-NEXT: orl %edx, %esi
13102; AVX2-NEXT: movl %eax, %edx
13103; AVX2-NEXT: shll $15, %edx
13104; AVX2-NEXT: andl $8388608, %edx # imm = 0x800000
13105; AVX2-NEXT: orl %esi, %edx
13106; AVX2-NEXT: movl %eax, %esi
13107; AVX2-NEXT: shll $13, %esi
13108; AVX2-NEXT: andl $4194304, %esi # imm = 0x400000
13109; AVX2-NEXT: orl %edx, %esi
13110; AVX2-NEXT: movl %eax, %edx
13111; AVX2-NEXT: shll $11, %edx
13112; AVX2-NEXT: andl $2097152, %edx # imm = 0x200000
13113; AVX2-NEXT: orl %esi, %edx
13114; AVX2-NEXT: movl %eax, %esi
13115; AVX2-NEXT: shll $9, %esi
13116; AVX2-NEXT: andl $1048576, %esi # imm = 0x100000
13117; AVX2-NEXT: orl %edx, %esi
13118; AVX2-NEXT: movl %eax, %edx
13119; AVX2-NEXT: shll $7, %edx
13120; AVX2-NEXT: andl $524288, %edx # imm = 0x80000
13121; AVX2-NEXT: orl %esi, %edx
13122; AVX2-NEXT: movl %eax, %esi
13123; AVX2-NEXT: shll $5, %esi
13124; AVX2-NEXT: andl $262144, %esi # imm = 0x40000
13125; AVX2-NEXT: orl %edx, %esi
13126; AVX2-NEXT: leal (,%rax,8), %edx
13127; AVX2-NEXT: andl $131072, %edx # imm = 0x20000
13128; AVX2-NEXT: orl %esi, %edx
13129; AVX2-NEXT: leal (%rax,%rax), %esi
13130; AVX2-NEXT: andl $65536, %esi # imm = 0x10000
13131; AVX2-NEXT: orl %edx, %esi
13132; AVX2-NEXT: movl %eax, %edx
13133; AVX2-NEXT: shrl %edx
13134; AVX2-NEXT: andl $32768, %edx # imm = 0x8000
13135; AVX2-NEXT: orl %esi, %edx
13136; AVX2-NEXT: movl %eax, %esi
13137; AVX2-NEXT: shrl $3, %esi
13138; AVX2-NEXT: andl $16384, %esi # imm = 0x4000
13139; AVX2-NEXT: orl %edx, %esi
13140; AVX2-NEXT: movl %eax, %edx
13141; AVX2-NEXT: shrl $5, %edx
13142; AVX2-NEXT: andl $8192, %edx # imm = 0x2000
13143; AVX2-NEXT: orl %esi, %edx
13144; AVX2-NEXT: movl %eax, %esi
13145; AVX2-NEXT: shrl $7, %esi
13146; AVX2-NEXT: andl $4096, %esi # imm = 0x1000
13147; AVX2-NEXT: orl %edx, %esi
13148; AVX2-NEXT: movl %eax, %edx
13149; AVX2-NEXT: shrl $9, %edx
13150; AVX2-NEXT: andl $2048, %edx # imm = 0x800
13151; AVX2-NEXT: orl %esi, %edx
13152; AVX2-NEXT: movl %eax, %esi
13153; AVX2-NEXT: shrl $11, %esi
13154; AVX2-NEXT: andl $1024, %esi # imm = 0x400
13155; AVX2-NEXT: orl %edx, %esi
13156; AVX2-NEXT: movl %eax, %edx
13157; AVX2-NEXT: shrl $13, %edx
13158; AVX2-NEXT: andl $512, %edx # imm = 0x200
13159; AVX2-NEXT: orl %esi, %edx
13160; AVX2-NEXT: movl %eax, %esi
13161; AVX2-NEXT: shrl $15, %esi
13162; AVX2-NEXT: andl $256, %esi # imm = 0x100
13163; AVX2-NEXT: orl %edx, %esi
13164; AVX2-NEXT: movl %eax, %edx
13165; AVX2-NEXT: shrl $17, %edx
13166; AVX2-NEXT: andl $128, %edx
13167; AVX2-NEXT: orl %esi, %edx
13168; AVX2-NEXT: movl %eax, %esi
13169; AVX2-NEXT: shrl $19, %esi
13170; AVX2-NEXT: andl $64, %esi
13171; AVX2-NEXT: orl %edx, %esi
13172; AVX2-NEXT: movl %eax, %edx
13173; AVX2-NEXT: shrl $21, %edx
13174; AVX2-NEXT: andl $32, %edx
13175; AVX2-NEXT: orl %esi, %edx
13176; AVX2-NEXT: movl %eax, %esi
13177; AVX2-NEXT: shrl $23, %esi
13178; AVX2-NEXT: andl $16, %esi
13179; AVX2-NEXT: orl %edx, %esi
13180; AVX2-NEXT: movl %eax, %edx
13181; AVX2-NEXT: shrl $25, %edx
13182; AVX2-NEXT: andl $8, %edx
13183; AVX2-NEXT: orl %esi, %edx
13184; AVX2-NEXT: movl %eax, %esi
13185; AVX2-NEXT: shrl $27, %esi
13186; AVX2-NEXT: andl $4, %esi
13187; AVX2-NEXT: orl %edx, %esi
13188; AVX2-NEXT: movl %eax, %edx
13189; AVX2-NEXT: shrl $29, %edx
13190; AVX2-NEXT: andl $2, %edx
13191; AVX2-NEXT: orl %esi, %edx
13192; AVX2-NEXT: shrl $31, %eax
13193; AVX2-NEXT: orl %edx, %eax
13194; AVX2-NEXT: orl %ecx, %eax
13195; AVX2-NEXT: vmovd %xmm0, %ecx
13196; AVX2-NEXT: movl %ecx, %edx
13197; AVX2-NEXT: shll $31, %edx
13198; AVX2-NEXT: movl %ecx, %esi
13199; AVX2-NEXT: andl $2, %esi
13200; AVX2-NEXT: shll $29, %esi
13201; AVX2-NEXT: orl %edx, %esi
13202; AVX2-NEXT: movl %ecx, %edx
13203; AVX2-NEXT: andl $4, %edx
13204; AVX2-NEXT: shll $27, %edx
13205; AVX2-NEXT: orl %esi, %edx
13206; AVX2-NEXT: movl %ecx, %esi
13207; AVX2-NEXT: andl $8, %esi
13208; AVX2-NEXT: shll $25, %esi
13209; AVX2-NEXT: orl %edx, %esi
13210; AVX2-NEXT: movl %ecx, %edi
13211; AVX2-NEXT: andl $16, %edi
13212; AVX2-NEXT: shll $23, %edi
13213; AVX2-NEXT: orl %esi, %edi
13214; AVX2-NEXT: movl %ecx, %edx
13215; AVX2-NEXT: andl $32, %edx
13216; AVX2-NEXT: shll $21, %edx
13217; AVX2-NEXT: orl %edi, %edx
13218; AVX2-NEXT: movl %ecx, %esi
13219; AVX2-NEXT: andl $64, %esi
13220; AVX2-NEXT: shll $19, %esi
13221; AVX2-NEXT: movl %ecx, %edi
13222; AVX2-NEXT: shll $17, %edi
13223; AVX2-NEXT: andl $16777216, %edi # imm = 0x1000000
13224; AVX2-NEXT: orl %esi, %edi
13225; AVX2-NEXT: movl %ecx, %esi
13226; AVX2-NEXT: shll $15, %esi
13227; AVX2-NEXT: andl $8388608, %esi # imm = 0x800000
13228; AVX2-NEXT: orl %edi, %esi
13229; AVX2-NEXT: movl %ecx, %edi
13230; AVX2-NEXT: shll $13, %edi
13231; AVX2-NEXT: andl $4194304, %edi # imm = 0x400000
13232; AVX2-NEXT: orl %esi, %edi
13233; AVX2-NEXT: movl %ecx, %esi
13234; AVX2-NEXT: shll $11, %esi
13235; AVX2-NEXT: andl $2097152, %esi # imm = 0x200000
13236; AVX2-NEXT: orl %edi, %esi
13237; AVX2-NEXT: movl %ecx, %edi
13238; AVX2-NEXT: shll $9, %edi
13239; AVX2-NEXT: andl $1048576, %edi # imm = 0x100000
13240; AVX2-NEXT: orl %esi, %edi
13241; AVX2-NEXT: movl %ecx, %esi
13242; AVX2-NEXT: shll $7, %esi
13243; AVX2-NEXT: andl $524288, %esi # imm = 0x80000
13244; AVX2-NEXT: orl %edi, %esi
13245; AVX2-NEXT: movl %ecx, %edi
13246; AVX2-NEXT: shll $5, %edi
13247; AVX2-NEXT: andl $262144, %edi # imm = 0x40000
13248; AVX2-NEXT: orl %esi, %edi
13249; AVX2-NEXT: leal (,%rcx,8), %esi
13250; AVX2-NEXT: andl $131072, %esi # imm = 0x20000
13251; AVX2-NEXT: orl %edi, %esi
13252; AVX2-NEXT: leal (%rcx,%rcx), %edi
13253; AVX2-NEXT: andl $65536, %edi # imm = 0x10000
13254; AVX2-NEXT: orl %esi, %edi
13255; AVX2-NEXT: movl %ecx, %esi
13256; AVX2-NEXT: shrl %esi
13257; AVX2-NEXT: andl $32768, %esi # imm = 0x8000
13258; AVX2-NEXT: orl %edi, %esi
13259; AVX2-NEXT: movl %ecx, %edi
13260; AVX2-NEXT: shrl $3, %edi
13261; AVX2-NEXT: andl $16384, %edi # imm = 0x4000
13262; AVX2-NEXT: orl %esi, %edi
13263; AVX2-NEXT: movl %ecx, %esi
13264; AVX2-NEXT: shrl $5, %esi
13265; AVX2-NEXT: andl $8192, %esi # imm = 0x2000
13266; AVX2-NEXT: orl %edi, %esi
13267; AVX2-NEXT: movl %ecx, %edi
13268; AVX2-NEXT: shrl $7, %edi
13269; AVX2-NEXT: andl $4096, %edi # imm = 0x1000
13270; AVX2-NEXT: orl %esi, %edi
13271; AVX2-NEXT: movl %ecx, %esi
13272; AVX2-NEXT: shrl $9, %esi
13273; AVX2-NEXT: andl $2048, %esi # imm = 0x800
13274; AVX2-NEXT: orl %edi, %esi
13275; AVX2-NEXT: movl %ecx, %edi
13276; AVX2-NEXT: shrl $11, %edi
13277; AVX2-NEXT: andl $1024, %edi # imm = 0x400
13278; AVX2-NEXT: orl %esi, %edi
13279; AVX2-NEXT: movl %ecx, %esi
13280; AVX2-NEXT: shrl $13, %esi
13281; AVX2-NEXT: andl $512, %esi # imm = 0x200
13282; AVX2-NEXT: orl %edi, %esi
13283; AVX2-NEXT: movl %ecx, %edi
13284; AVX2-NEXT: shrl $15, %edi
13285; AVX2-NEXT: andl $256, %edi # imm = 0x100
13286; AVX2-NEXT: orl %esi, %edi
13287; AVX2-NEXT: movl %ecx, %esi
13288; AVX2-NEXT: shrl $17, %esi
13289; AVX2-NEXT: andl $128, %esi
13290; AVX2-NEXT: orl %edi, %esi
13291; AVX2-NEXT: movl %ecx, %edi
13292; AVX2-NEXT: shrl $19, %edi
13293; AVX2-NEXT: andl $64, %edi
13294; AVX2-NEXT: orl %esi, %edi
13295; AVX2-NEXT: movl %ecx, %esi
13296; AVX2-NEXT: shrl $21, %esi
13297; AVX2-NEXT: andl $32, %esi
13298; AVX2-NEXT: orl %edi, %esi
13299; AVX2-NEXT: movl %ecx, %edi
13300; AVX2-NEXT: shrl $23, %edi
13301; AVX2-NEXT: andl $16, %edi
13302; AVX2-NEXT: orl %esi, %edi
13303; AVX2-NEXT: movl %ecx, %esi
13304; AVX2-NEXT: shrl $25, %esi
13305; AVX2-NEXT: andl $8, %esi
13306; AVX2-NEXT: orl %edi, %esi
13307; AVX2-NEXT: movl %ecx, %edi
13308; AVX2-NEXT: shrl $27, %edi
13309; AVX2-NEXT: andl $4, %edi
13310; AVX2-NEXT: orl %esi, %edi
13311; AVX2-NEXT: movl %ecx, %esi
13312; AVX2-NEXT: shrl $29, %esi
13313; AVX2-NEXT: andl $2, %esi
13314; AVX2-NEXT: orl %edi, %esi
13315; AVX2-NEXT: shrl $31, %ecx
13316; AVX2-NEXT: orl %esi, %ecx
13317; AVX2-NEXT: orl %edx, %ecx
13318; AVX2-NEXT: vmovd %ecx, %xmm2
13319; AVX2-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
13320; AVX2-NEXT: vpextrd $2, %xmm0, %eax
13321; AVX2-NEXT: movl %eax, %ecx
13322; AVX2-NEXT: shll $31, %ecx
13323; AVX2-NEXT: movl %eax, %edx
13324; AVX2-NEXT: andl $2, %edx
13325; AVX2-NEXT: shll $29, %edx
13326; AVX2-NEXT: orl %ecx, %edx
13327; AVX2-NEXT: movl %eax, %ecx
13328; AVX2-NEXT: andl $4, %ecx
13329; AVX2-NEXT: shll $27, %ecx
13330; AVX2-NEXT: orl %edx, %ecx
13331; AVX2-NEXT: movl %eax, %edx
13332; AVX2-NEXT: andl $8, %edx
13333; AVX2-NEXT: shll $25, %edx
13334; AVX2-NEXT: orl %ecx, %edx
13335; AVX2-NEXT: movl %eax, %esi
13336; AVX2-NEXT: andl $16, %esi
13337; AVX2-NEXT: shll $23, %esi
13338; AVX2-NEXT: orl %edx, %esi
13339; AVX2-NEXT: movl %eax, %ecx
13340; AVX2-NEXT: andl $32, %ecx
13341; AVX2-NEXT: shll $21, %ecx
13342; AVX2-NEXT: orl %esi, %ecx
13343; AVX2-NEXT: movl %eax, %edx
13344; AVX2-NEXT: andl $64, %edx
13345; AVX2-NEXT: shll $19, %edx
13346; AVX2-NEXT: movl %eax, %esi
13347; AVX2-NEXT: shll $17, %esi
13348; AVX2-NEXT: andl $16777216, %esi # imm = 0x1000000
13349; AVX2-NEXT: orl %edx, %esi
13350; AVX2-NEXT: movl %eax, %edx
13351; AVX2-NEXT: shll $15, %edx
13352; AVX2-NEXT: andl $8388608, %edx # imm = 0x800000
13353; AVX2-NEXT: orl %esi, %edx
13354; AVX2-NEXT: movl %eax, %esi
13355; AVX2-NEXT: shll $13, %esi
13356; AVX2-NEXT: andl $4194304, %esi # imm = 0x400000
13357; AVX2-NEXT: orl %edx, %esi
13358; AVX2-NEXT: movl %eax, %edx
13359; AVX2-NEXT: shll $11, %edx
13360; AVX2-NEXT: andl $2097152, %edx # imm = 0x200000
13361; AVX2-NEXT: orl %esi, %edx
13362; AVX2-NEXT: movl %eax, %esi
13363; AVX2-NEXT: shll $9, %esi
13364; AVX2-NEXT: andl $1048576, %esi # imm = 0x100000
13365; AVX2-NEXT: orl %edx, %esi
13366; AVX2-NEXT: movl %eax, %edx
13367; AVX2-NEXT: shll $7, %edx
13368; AVX2-NEXT: andl $524288, %edx # imm = 0x80000
13369; AVX2-NEXT: orl %esi, %edx
13370; AVX2-NEXT: movl %eax, %esi
13371; AVX2-NEXT: shll $5, %esi
13372; AVX2-NEXT: andl $262144, %esi # imm = 0x40000
13373; AVX2-NEXT: orl %edx, %esi
13374; AVX2-NEXT: leal (,%rax,8), %edx
13375; AVX2-NEXT: andl $131072, %edx # imm = 0x20000
13376; AVX2-NEXT: orl %esi, %edx
13377; AVX2-NEXT: leal (%rax,%rax), %esi
13378; AVX2-NEXT: andl $65536, %esi # imm = 0x10000
13379; AVX2-NEXT: orl %edx, %esi
13380; AVX2-NEXT: movl %eax, %edx
13381; AVX2-NEXT: shrl %edx
13382; AVX2-NEXT: andl $32768, %edx # imm = 0x8000
13383; AVX2-NEXT: orl %esi, %edx
13384; AVX2-NEXT: movl %eax, %esi
13385; AVX2-NEXT: shrl $3, %esi
13386; AVX2-NEXT: andl $16384, %esi # imm = 0x4000
13387; AVX2-NEXT: orl %edx, %esi
13388; AVX2-NEXT: movl %eax, %edx
13389; AVX2-NEXT: shrl $5, %edx
13390; AVX2-NEXT: andl $8192, %edx # imm = 0x2000
13391; AVX2-NEXT: orl %esi, %edx
13392; AVX2-NEXT: movl %eax, %esi
13393; AVX2-NEXT: shrl $7, %esi
13394; AVX2-NEXT: andl $4096, %esi # imm = 0x1000
13395; AVX2-NEXT: orl %edx, %esi
13396; AVX2-NEXT: movl %eax, %edx
13397; AVX2-NEXT: shrl $9, %edx
13398; AVX2-NEXT: andl $2048, %edx # imm = 0x800
13399; AVX2-NEXT: orl %esi, %edx
13400; AVX2-NEXT: movl %eax, %esi
13401; AVX2-NEXT: shrl $11, %esi
13402; AVX2-NEXT: andl $1024, %esi # imm = 0x400
13403; AVX2-NEXT: orl %edx, %esi
13404; AVX2-NEXT: movl %eax, %edx
13405; AVX2-NEXT: shrl $13, %edx
13406; AVX2-NEXT: andl $512, %edx # imm = 0x200
13407; AVX2-NEXT: orl %esi, %edx
13408; AVX2-NEXT: movl %eax, %esi
13409; AVX2-NEXT: shrl $15, %esi
13410; AVX2-NEXT: andl $256, %esi # imm = 0x100
13411; AVX2-NEXT: orl %edx, %esi
13412; AVX2-NEXT: movl %eax, %edx
13413; AVX2-NEXT: shrl $17, %edx
13414; AVX2-NEXT: andl $128, %edx
13415; AVX2-NEXT: orl %esi, %edx
13416; AVX2-NEXT: movl %eax, %esi
13417; AVX2-NEXT: shrl $19, %esi
13418; AVX2-NEXT: andl $64, %esi
13419; AVX2-NEXT: orl %edx, %esi
13420; AVX2-NEXT: movl %eax, %edx
13421; AVX2-NEXT: shrl $21, %edx
13422; AVX2-NEXT: andl $32, %edx
13423; AVX2-NEXT: orl %esi, %edx
13424; AVX2-NEXT: movl %eax, %esi
13425; AVX2-NEXT: shrl $23, %esi
13426; AVX2-NEXT: andl $16, %esi
13427; AVX2-NEXT: orl %edx, %esi
13428; AVX2-NEXT: movl %eax, %edx
13429; AVX2-NEXT: shrl $25, %edx
13430; AVX2-NEXT: andl $8, %edx
13431; AVX2-NEXT: orl %esi, %edx
13432; AVX2-NEXT: movl %eax, %esi
13433; AVX2-NEXT: shrl $27, %esi
13434; AVX2-NEXT: andl $4, %esi
13435; AVX2-NEXT: orl %edx, %esi
13436; AVX2-NEXT: movl %eax, %edx
13437; AVX2-NEXT: shrl $29, %edx
13438; AVX2-NEXT: andl $2, %edx
13439; AVX2-NEXT: orl %esi, %edx
13440; AVX2-NEXT: shrl $31, %eax
13441; AVX2-NEXT: orl %edx, %eax
13442; AVX2-NEXT: orl %ecx, %eax
13443; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
13444; AVX2-NEXT: vpextrd $3, %xmm0, %eax
13445; AVX2-NEXT: movl %eax, %ecx
13446; AVX2-NEXT: shll $31, %ecx
13447; AVX2-NEXT: movl %eax, %edx
13448; AVX2-NEXT: andl $2, %edx
13449; AVX2-NEXT: shll $29, %edx
13450; AVX2-NEXT: orl %ecx, %edx
13451; AVX2-NEXT: movl %eax, %ecx
13452; AVX2-NEXT: andl $4, %ecx
13453; AVX2-NEXT: shll $27, %ecx
13454; AVX2-NEXT: orl %edx, %ecx
13455; AVX2-NEXT: movl %eax, %edx
13456; AVX2-NEXT: andl $8, %edx
13457; AVX2-NEXT: shll $25, %edx
13458; AVX2-NEXT: orl %ecx, %edx
13459; AVX2-NEXT: movl %eax, %esi
13460; AVX2-NEXT: andl $16, %esi
13461; AVX2-NEXT: shll $23, %esi
13462; AVX2-NEXT: orl %edx, %esi
13463; AVX2-NEXT: movl %eax, %ecx
13464; AVX2-NEXT: andl $32, %ecx
13465; AVX2-NEXT: shll $21, %ecx
13466; AVX2-NEXT: orl %esi, %ecx
13467; AVX2-NEXT: movl %eax, %edx
13468; AVX2-NEXT: andl $64, %edx
13469; AVX2-NEXT: shll $19, %edx
13470; AVX2-NEXT: movl %eax, %esi
13471; AVX2-NEXT: shll $17, %esi
13472; AVX2-NEXT: andl $16777216, %esi # imm = 0x1000000
13473; AVX2-NEXT: orl %edx, %esi
13474; AVX2-NEXT: movl %eax, %edx
13475; AVX2-NEXT: shll $15, %edx
13476; AVX2-NEXT: andl $8388608, %edx # imm = 0x800000
13477; AVX2-NEXT: orl %esi, %edx
13478; AVX2-NEXT: movl %eax, %esi
13479; AVX2-NEXT: shll $13, %esi
13480; AVX2-NEXT: andl $4194304, %esi # imm = 0x400000
13481; AVX2-NEXT: orl %edx, %esi
13482; AVX2-NEXT: movl %eax, %edx
13483; AVX2-NEXT: shll $11, %edx
13484; AVX2-NEXT: andl $2097152, %edx # imm = 0x200000
13485; AVX2-NEXT: orl %esi, %edx
13486; AVX2-NEXT: movl %eax, %esi
13487; AVX2-NEXT: shll $9, %esi
13488; AVX2-NEXT: andl $1048576, %esi # imm = 0x100000
13489; AVX2-NEXT: orl %edx, %esi
13490; AVX2-NEXT: movl %eax, %edx
13491; AVX2-NEXT: shll $7, %edx
13492; AVX2-NEXT: andl $524288, %edx # imm = 0x80000
13493; AVX2-NEXT: orl %esi, %edx
13494; AVX2-NEXT: movl %eax, %esi
13495; AVX2-NEXT: shll $5, %esi
13496; AVX2-NEXT: andl $262144, %esi # imm = 0x40000
13497; AVX2-NEXT: orl %edx, %esi
13498; AVX2-NEXT: leal (,%rax,8), %edx
13499; AVX2-NEXT: andl $131072, %edx # imm = 0x20000
13500; AVX2-NEXT: orl %esi, %edx
13501; AVX2-NEXT: leal (%rax,%rax), %esi
13502; AVX2-NEXT: andl $65536, %esi # imm = 0x10000
13503; AVX2-NEXT: orl %edx, %esi
13504; AVX2-NEXT: movl %eax, %edx
13505; AVX2-NEXT: shrl %edx
13506; AVX2-NEXT: andl $32768, %edx # imm = 0x8000
13507; AVX2-NEXT: orl %esi, %edx
13508; AVX2-NEXT: movl %eax, %esi
13509; AVX2-NEXT: shrl $3, %esi
13510; AVX2-NEXT: andl $16384, %esi # imm = 0x4000
13511; AVX2-NEXT: orl %edx, %esi
13512; AVX2-NEXT: movl %eax, %edx
13513; AVX2-NEXT: shrl $5, %edx
13514; AVX2-NEXT: andl $8192, %edx # imm = 0x2000
13515; AVX2-NEXT: orl %esi, %edx
13516; AVX2-NEXT: movl %eax, %esi
13517; AVX2-NEXT: shrl $7, %esi
13518; AVX2-NEXT: andl $4096, %esi # imm = 0x1000
13519; AVX2-NEXT: orl %edx, %esi
13520; AVX2-NEXT: movl %eax, %edx
13521; AVX2-NEXT: shrl $9, %edx
13522; AVX2-NEXT: andl $2048, %edx # imm = 0x800
13523; AVX2-NEXT: orl %esi, %edx
13524; AVX2-NEXT: movl %eax, %esi
13525; AVX2-NEXT: shrl $11, %esi
13526; AVX2-NEXT: andl $1024, %esi # imm = 0x400
13527; AVX2-NEXT: orl %edx, %esi
13528; AVX2-NEXT: movl %eax, %edx
13529; AVX2-NEXT: shrl $13, %edx
13530; AVX2-NEXT: andl $512, %edx # imm = 0x200
13531; AVX2-NEXT: orl %esi, %edx
13532; AVX2-NEXT: movl %eax, %esi
13533; AVX2-NEXT: shrl $15, %esi
13534; AVX2-NEXT: andl $256, %esi # imm = 0x100
13535; AVX2-NEXT: orl %edx, %esi
13536; AVX2-NEXT: movl %eax, %edx
13537; AVX2-NEXT: shrl $17, %edx
13538; AVX2-NEXT: andl $128, %edx
13539; AVX2-NEXT: orl %esi, %edx
13540; AVX2-NEXT: movl %eax, %esi
13541; AVX2-NEXT: shrl $19, %esi
13542; AVX2-NEXT: andl $64, %esi
13543; AVX2-NEXT: orl %edx, %esi
13544; AVX2-NEXT: movl %eax, %edx
13545; AVX2-NEXT: shrl $21, %edx
13546; AVX2-NEXT: andl $32, %edx
13547; AVX2-NEXT: orl %esi, %edx
13548; AVX2-NEXT: movl %eax, %esi
13549; AVX2-NEXT: shrl $23, %esi
13550; AVX2-NEXT: andl $16, %esi
13551; AVX2-NEXT: orl %edx, %esi
13552; AVX2-NEXT: movl %eax, %edx
13553; AVX2-NEXT: shrl $25, %edx
13554; AVX2-NEXT: andl $8, %edx
13555; AVX2-NEXT: orl %esi, %edx
13556; AVX2-NEXT: movl %eax, %esi
13557; AVX2-NEXT: shrl $27, %esi
13558; AVX2-NEXT: andl $4, %esi
13559; AVX2-NEXT: orl %edx, %esi
13560; AVX2-NEXT: movl %eax, %edx
13561; AVX2-NEXT: shrl $29, %edx
13562; AVX2-NEXT: andl $2, %edx
13563; AVX2-NEXT: orl %esi, %edx
13564; AVX2-NEXT: shrl $31, %eax
13565; AVX2-NEXT: orl %edx, %eax
13566; AVX2-NEXT: orl %ecx, %eax
13567; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0
13568; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
13569; AVX2-NEXT: retq
13570;
Simon Pilgrimb87ffe82016-03-30 14:14:00 +000013571; XOPAVX1-LABEL: test_bitreverse_v8i32:
13572; XOPAVX1: # BB#0:
13573; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
13574; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [83,82,81,80,87,86,85,84,91,90,89,88,95,94,93,92]
13575; XOPAVX1-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm1
13576; XOPAVX1-NEXT: vpperm %xmm2, %xmm0, %xmm0, %xmm0
13577; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
13578; XOPAVX1-NEXT: retq
13579;
13580; XOPAVX2-LABEL: test_bitreverse_v8i32:
13581; XOPAVX2: # BB#0:
13582; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
13583; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [83,82,81,80,87,86,85,84,91,90,89,88,95,94,93,92]
13584; XOPAVX2-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm1
13585; XOPAVX2-NEXT: vpperm %xmm2, %xmm0, %xmm0, %xmm0
13586; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
13587; XOPAVX2-NEXT: retq
13588 %b = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
13589 ret <8 x i32> %b
13590}
13591
Simon Pilgrim2d0104c2016-04-24 15:45:06 +000013592define <4 x i64> @test_bitreverse_v4i64(<4 x i64> %a) nounwind {
13593; SSE-LABEL: test_bitreverse_v4i64:
13594; SSE: # BB#0:
13595; SSE-NEXT: movdqa %xmm1, %xmm5
13596; SSE-NEXT: movdqa %xmm0, %xmm1
13597; SSE-NEXT: movdqa %xmm1, %xmm2
13598; SSE-NEXT: psllq $61, %xmm2
13599; SSE-NEXT: pand {{.*}}(%rip), %xmm2
13600; SSE-NEXT: movdqa %xmm1, %xmm3
13601; SSE-NEXT: psllq $59, %xmm3
13602; SSE-NEXT: pand {{.*}}(%rip), %xmm3
13603; SSE-NEXT: por %xmm2, %xmm3
13604; SSE-NEXT: movdqa %xmm1, %xmm2
13605; SSE-NEXT: psllq $57, %xmm2
13606; SSE-NEXT: pand {{.*}}(%rip), %xmm2
13607; SSE-NEXT: por %xmm3, %xmm2
13608; SSE-NEXT: movdqa %xmm1, %xmm3
13609; SSE-NEXT: psllq $55, %xmm3
13610; SSE-NEXT: pand {{.*}}(%rip), %xmm3
13611; SSE-NEXT: por %xmm2, %xmm3
13612; SSE-NEXT: movdqa %xmm1, %xmm2
13613; SSE-NEXT: psllq $53, %xmm2
13614; SSE-NEXT: pand {{.*}}(%rip), %xmm2
13615; SSE-NEXT: por %xmm3, %xmm2
13616; SSE-NEXT: movdqa %xmm1, %xmm3
13617; SSE-NEXT: psllq $51, %xmm3
13618; SSE-NEXT: pand {{.*}}(%rip), %xmm3
13619; SSE-NEXT: por %xmm2, %xmm3
13620; SSE-NEXT: movdqa %xmm1, %xmm2
13621; SSE-NEXT: psllq $49, %xmm2
13622; SSE-NEXT: pand {{.*}}(%rip), %xmm2
13623; SSE-NEXT: por %xmm3, %xmm2
13624; SSE-NEXT: movdqa %xmm1, %xmm3
13625; SSE-NEXT: psllq $47, %xmm3
13626; SSE-NEXT: pand {{.*}}(%rip), %xmm3
13627; SSE-NEXT: por %xmm2, %xmm3
13628; SSE-NEXT: movdqa %xmm1, %xmm2
13629; SSE-NEXT: psllq $45, %xmm2
13630; SSE-NEXT: pand {{.*}}(%rip), %xmm2
13631; SSE-NEXT: por %xmm3, %xmm2
13632; SSE-NEXT: movdqa %xmm1, %xmm3
13633; SSE-NEXT: psllq $43, %xmm3
13634; SSE-NEXT: pand {{.*}}(%rip), %xmm3
13635; SSE-NEXT: por %xmm2, %xmm3
13636; SSE-NEXT: movdqa %xmm1, %xmm2
13637; SSE-NEXT: psllq $41, %xmm2
13638; SSE-NEXT: pand {{.*}}(%rip), %xmm2
13639; SSE-NEXT: por %xmm3, %xmm2
13640; SSE-NEXT: movdqa %xmm1, %xmm3
13641; SSE-NEXT: psllq $39, %xmm3
13642; SSE-NEXT: pand {{.*}}(%rip), %xmm3
13643; SSE-NEXT: por %xmm2, %xmm3
13644; SSE-NEXT: movdqa %xmm1, %xmm2
13645; SSE-NEXT: psllq $37, %xmm2
13646; SSE-NEXT: pand {{.*}}(%rip), %xmm2
13647; SSE-NEXT: por %xmm3, %xmm2
13648; SSE-NEXT: movdqa %xmm1, %xmm3
13649; SSE-NEXT: psllq $35, %xmm3
13650; SSE-NEXT: pand {{.*}}(%rip), %xmm3
13651; SSE-NEXT: por %xmm2, %xmm3
13652; SSE-NEXT: movdqa %xmm1, %xmm2
13653; SSE-NEXT: psllq $33, %xmm2
13654; SSE-NEXT: pand {{.*}}(%rip), %xmm2
13655; SSE-NEXT: por %xmm3, %xmm2
13656; SSE-NEXT: movdqa %xmm1, %xmm3
13657; SSE-NEXT: psllq $31, %xmm3
13658; SSE-NEXT: pand {{.*}}(%rip), %xmm3
13659; SSE-NEXT: por %xmm2, %xmm3
13660; SSE-NEXT: movdqa %xmm1, %xmm2
13661; SSE-NEXT: psllq $29, %xmm2
13662; SSE-NEXT: pand {{.*}}(%rip), %xmm2
13663; SSE-NEXT: por %xmm3, %xmm2
13664; SSE-NEXT: movdqa %xmm1, %xmm3
13665; SSE-NEXT: psllq $27, %xmm3
13666; SSE-NEXT: pand {{.*}}(%rip), %xmm3
13667; SSE-NEXT: por %xmm2, %xmm3
13668; SSE-NEXT: movdqa %xmm1, %xmm2
13669; SSE-NEXT: psllq $25, %xmm2
13670; SSE-NEXT: pand {{.*}}(%rip), %xmm2
13671; SSE-NEXT: por %xmm3, %xmm2
13672; SSE-NEXT: movdqa %xmm1, %xmm3
13673; SSE-NEXT: psllq $23, %xmm3
13674; SSE-NEXT: pand {{.*}}(%rip), %xmm3
13675; SSE-NEXT: por %xmm2, %xmm3
13676; SSE-NEXT: movdqa %xmm1, %xmm2
13677; SSE-NEXT: psllq $21, %xmm2
13678; SSE-NEXT: pand {{.*}}(%rip), %xmm2
13679; SSE-NEXT: por %xmm3, %xmm2
13680; SSE-NEXT: movdqa %xmm1, %xmm3
13681; SSE-NEXT: psllq $19, %xmm3
13682; SSE-NEXT: pand {{.*}}(%rip), %xmm3
13683; SSE-NEXT: por %xmm2, %xmm3
13684; SSE-NEXT: movdqa %xmm1, %xmm2
13685; SSE-NEXT: psllq $17, %xmm2
13686; SSE-NEXT: pand {{.*}}(%rip), %xmm2
13687; SSE-NEXT: por %xmm3, %xmm2
13688; SSE-NEXT: movdqa %xmm1, %xmm3
13689; SSE-NEXT: psllq $15, %xmm3
13690; SSE-NEXT: pand {{.*}}(%rip), %xmm3
13691; SSE-NEXT: por %xmm2, %xmm3
13692; SSE-NEXT: movdqa %xmm1, %xmm2
13693; SSE-NEXT: psllq $13, %xmm2
13694; SSE-NEXT: pand {{.*}}(%rip), %xmm2
13695; SSE-NEXT: por %xmm3, %xmm2
13696; SSE-NEXT: movdqa %xmm1, %xmm3
13697; SSE-NEXT: psllq $11, %xmm3
13698; SSE-NEXT: pand {{.*}}(%rip), %xmm3
13699; SSE-NEXT: por %xmm2, %xmm3
13700; SSE-NEXT: movdqa %xmm1, %xmm2
13701; SSE-NEXT: psllq $9, %xmm2
13702; SSE-NEXT: pand {{.*}}(%rip), %xmm2
13703; SSE-NEXT: por %xmm3, %xmm2
13704; SSE-NEXT: movdqa %xmm1, %xmm3
13705; SSE-NEXT: psllq $7, %xmm3
13706; SSE-NEXT: pand {{.*}}(%rip), %xmm3
13707; SSE-NEXT: por %xmm2, %xmm3
13708; SSE-NEXT: movdqa %xmm1, %xmm2
13709; SSE-NEXT: psllq $5, %xmm2
13710; SSE-NEXT: pand {{.*}}(%rip), %xmm2
13711; SSE-NEXT: por %xmm3, %xmm2
13712; SSE-NEXT: movdqa %xmm1, %xmm3
13713; SSE-NEXT: psllq $3, %xmm3
13714; SSE-NEXT: pand {{.*}}(%rip), %xmm3
13715; SSE-NEXT: por %xmm2, %xmm3
13716; SSE-NEXT: movdqa %xmm1, %xmm2
13717; SSE-NEXT: psllq $1, %xmm2
13718; SSE-NEXT: pand {{.*}}(%rip), %xmm2
13719; SSE-NEXT: por %xmm3, %xmm2
13720; SSE-NEXT: movdqa %xmm1, %xmm3
13721; SSE-NEXT: psrlq $1, %xmm3
13722; SSE-NEXT: pand {{.*}}(%rip), %xmm3
13723; SSE-NEXT: por %xmm2, %xmm3
13724; SSE-NEXT: movdqa %xmm1, %xmm2
13725; SSE-NEXT: psrlq $3, %xmm2
13726; SSE-NEXT: pand {{.*}}(%rip), %xmm2
13727; SSE-NEXT: por %xmm3, %xmm2
13728; SSE-NEXT: movdqa %xmm1, %xmm3
13729; SSE-NEXT: psrlq $5, %xmm3
13730; SSE-NEXT: pand {{.*}}(%rip), %xmm3
13731; SSE-NEXT: por %xmm2, %xmm3
13732; SSE-NEXT: movdqa %xmm1, %xmm2
13733; SSE-NEXT: psrlq $7, %xmm2
13734; SSE-NEXT: pand {{.*}}(%rip), %xmm2
13735; SSE-NEXT: por %xmm3, %xmm2
13736; SSE-NEXT: movdqa %xmm1, %xmm3
13737; SSE-NEXT: psrlq $9, %xmm3
13738; SSE-NEXT: pand {{.*}}(%rip), %xmm3
13739; SSE-NEXT: por %xmm2, %xmm3
13740; SSE-NEXT: movdqa %xmm1, %xmm2
13741; SSE-NEXT: psrlq $11, %xmm2
13742; SSE-NEXT: pand {{.*}}(%rip), %xmm2
13743; SSE-NEXT: por %xmm3, %xmm2
13744; SSE-NEXT: movdqa %xmm1, %xmm3
13745; SSE-NEXT: psrlq $13, %xmm3
13746; SSE-NEXT: pand {{.*}}(%rip), %xmm3
13747; SSE-NEXT: por %xmm2, %xmm3
13748; SSE-NEXT: movdqa %xmm1, %xmm2
13749; SSE-NEXT: psrlq $15, %xmm2
13750; SSE-NEXT: pand {{.*}}(%rip), %xmm2
13751; SSE-NEXT: por %xmm3, %xmm2
13752; SSE-NEXT: movdqa %xmm1, %xmm3
13753; SSE-NEXT: psrlq $17, %xmm3
13754; SSE-NEXT: pand {{.*}}(%rip), %xmm3
13755; SSE-NEXT: por %xmm2, %xmm3
13756; SSE-NEXT: movdqa %xmm1, %xmm2
13757; SSE-NEXT: psrlq $19, %xmm2
13758; SSE-NEXT: pand {{.*}}(%rip), %xmm2
13759; SSE-NEXT: por %xmm3, %xmm2
13760; SSE-NEXT: movdqa %xmm1, %xmm3
13761; SSE-NEXT: psrlq $21, %xmm3
13762; SSE-NEXT: pand {{.*}}(%rip), %xmm3
13763; SSE-NEXT: por %xmm2, %xmm3
13764; SSE-NEXT: movdqa %xmm1, %xmm2
13765; SSE-NEXT: psrlq $23, %xmm2
13766; SSE-NEXT: pand {{.*}}(%rip), %xmm2
13767; SSE-NEXT: por %xmm3, %xmm2
13768; SSE-NEXT: movdqa %xmm1, %xmm3
13769; SSE-NEXT: psrlq $25, %xmm3
13770; SSE-NEXT: pand {{.*}}(%rip), %xmm3
13771; SSE-NEXT: por %xmm2, %xmm3
13772; SSE-NEXT: movdqa %xmm1, %xmm2
13773; SSE-NEXT: psrlq $27, %xmm2
13774; SSE-NEXT: pand {{.*}}(%rip), %xmm2
13775; SSE-NEXT: por %xmm3, %xmm2
13776; SSE-NEXT: movdqa %xmm1, %xmm3
13777; SSE-NEXT: psrlq $29, %xmm3
13778; SSE-NEXT: pand {{.*}}(%rip), %xmm3
13779; SSE-NEXT: por %xmm2, %xmm3
13780; SSE-NEXT: movdqa %xmm1, %xmm2
13781; SSE-NEXT: psrlq $31, %xmm2
13782; SSE-NEXT: pand {{.*}}(%rip), %xmm2
13783; SSE-NEXT: por %xmm3, %xmm2
13784; SSE-NEXT: movdqa %xmm1, %xmm3
13785; SSE-NEXT: psrlq $33, %xmm3
13786; SSE-NEXT: pand {{.*}}(%rip), %xmm3
13787; SSE-NEXT: por %xmm2, %xmm3
13788; SSE-NEXT: movdqa %xmm1, %xmm2
13789; SSE-NEXT: psrlq $35, %xmm2
13790; SSE-NEXT: pand {{.*}}(%rip), %xmm2
13791; SSE-NEXT: por %xmm3, %xmm2
13792; SSE-NEXT: movdqa %xmm1, %xmm3
13793; SSE-NEXT: psrlq $37, %xmm3
13794; SSE-NEXT: pand {{.*}}(%rip), %xmm3
13795; SSE-NEXT: por %xmm2, %xmm3
13796; SSE-NEXT: movdqa %xmm1, %xmm2
13797; SSE-NEXT: psrlq $39, %xmm2
13798; SSE-NEXT: pand {{.*}}(%rip), %xmm2
13799; SSE-NEXT: por %xmm3, %xmm2
13800; SSE-NEXT: movdqa %xmm1, %xmm3
13801; SSE-NEXT: psrlq $41, %xmm3
13802; SSE-NEXT: pand {{.*}}(%rip), %xmm3
13803; SSE-NEXT: por %xmm2, %xmm3
13804; SSE-NEXT: movdqa %xmm1, %xmm2
13805; SSE-NEXT: psrlq $43, %xmm2
13806; SSE-NEXT: movdqa {{.*#+}} xmm15 = [1024,1024]
13807; SSE-NEXT: pand %xmm15, %xmm2
13808; SSE-NEXT: por %xmm3, %xmm2
13809; SSE-NEXT: movdqa %xmm1, %xmm3
13810; SSE-NEXT: psrlq $45, %xmm3
13811; SSE-NEXT: movdqa {{.*#+}} xmm14 = [512,512]
13812; SSE-NEXT: pand %xmm14, %xmm3
13813; SSE-NEXT: por %xmm2, %xmm3
13814; SSE-NEXT: movdqa %xmm1, %xmm2
13815; SSE-NEXT: psrlq $47, %xmm2
13816; SSE-NEXT: movdqa {{.*#+}} xmm13 = [256,256]
13817; SSE-NEXT: pand %xmm13, %xmm2
13818; SSE-NEXT: por %xmm3, %xmm2
13819; SSE-NEXT: movdqa %xmm1, %xmm3
13820; SSE-NEXT: psrlq $49, %xmm3
13821; SSE-NEXT: movdqa {{.*#+}} xmm12 = [128,128]
13822; SSE-NEXT: pand %xmm12, %xmm3
13823; SSE-NEXT: por %xmm2, %xmm3
13824; SSE-NEXT: movdqa %xmm1, %xmm2
13825; SSE-NEXT: psrlq $51, %xmm2
13826; SSE-NEXT: movdqa {{.*#+}} xmm11 = [64,64]
13827; SSE-NEXT: pand %xmm11, %xmm2
13828; SSE-NEXT: por %xmm3, %xmm2
13829; SSE-NEXT: movdqa %xmm1, %xmm3
13830; SSE-NEXT: psrlq $53, %xmm3
13831; SSE-NEXT: movdqa {{.*#+}} xmm10 = [32,32]
13832; SSE-NEXT: pand %xmm10, %xmm3
13833; SSE-NEXT: por %xmm2, %xmm3
13834; SSE-NEXT: movdqa %xmm1, %xmm2
13835; SSE-NEXT: psrlq $55, %xmm2
13836; SSE-NEXT: movdqa {{.*#+}} xmm9 = [16,16]
13837; SSE-NEXT: pand %xmm9, %xmm2
13838; SSE-NEXT: por %xmm3, %xmm2
13839; SSE-NEXT: movdqa %xmm1, %xmm3
13840; SSE-NEXT: psrlq $57, %xmm3
13841; SSE-NEXT: movdqa {{.*#+}} xmm8 = [8,8]
13842; SSE-NEXT: pand %xmm8, %xmm3
13843; SSE-NEXT: por %xmm2, %xmm3
13844; SSE-NEXT: movdqa %xmm1, %xmm2
13845; SSE-NEXT: psrlq $59, %xmm2
13846; SSE-NEXT: movdqa {{.*#+}} xmm7 = [4,4]
13847; SSE-NEXT: pand %xmm7, %xmm2
13848; SSE-NEXT: por %xmm3, %xmm2
13849; SSE-NEXT: psrlq $61, %xmm0
13850; SSE-NEXT: movdqa {{.*#+}} xmm6 = [2,2]
13851; SSE-NEXT: pand %xmm6, %xmm0
13852; SSE-NEXT: por %xmm2, %xmm0
13853; SSE-NEXT: movdqa %xmm1, %xmm2
13854; SSE-NEXT: psrlq $63, %xmm1
13855; SSE-NEXT: movdqa {{.*#+}} xmm3 = [1,1]
13856; SSE-NEXT: pand %xmm3, %xmm1
13857; SSE-NEXT: por %xmm0, %xmm1
13858; SSE-NEXT: psllq $63, %xmm2
13859; SSE-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
13860; SSE-NEXT: pand %xmm0, %xmm2
13861; SSE-NEXT: por %xmm2, %xmm1
13862; SSE-NEXT: movdqa %xmm5, %xmm4
13863; SSE-NEXT: psllq $61, %xmm4
13864; SSE-NEXT: pand {{.*}}(%rip), %xmm4
13865; SSE-NEXT: movdqa %xmm5, %xmm2
13866; SSE-NEXT: psllq $63, %xmm2
13867; SSE-NEXT: pand %xmm0, %xmm2
13868; SSE-NEXT: movdqa %xmm5, %xmm0
13869; SSE-NEXT: psllq $59, %xmm0
13870; SSE-NEXT: pand {{.*}}(%rip), %xmm0
13871; SSE-NEXT: por %xmm4, %xmm0
13872; SSE-NEXT: movdqa %xmm5, %xmm4
13873; SSE-NEXT: psllq $57, %xmm4
13874; SSE-NEXT: pand {{.*}}(%rip), %xmm4
13875; SSE-NEXT: por %xmm0, %xmm4
13876; SSE-NEXT: movdqa %xmm5, %xmm0
13877; SSE-NEXT: psllq $55, %xmm0
13878; SSE-NEXT: pand {{.*}}(%rip), %xmm0
13879; SSE-NEXT: por %xmm4, %xmm0
13880; SSE-NEXT: movdqa %xmm5, %xmm4
13881; SSE-NEXT: psllq $53, %xmm4
13882; SSE-NEXT: pand {{.*}}(%rip), %xmm4
13883; SSE-NEXT: por %xmm0, %xmm4
13884; SSE-NEXT: movdqa %xmm5, %xmm0
13885; SSE-NEXT: psllq $51, %xmm0
13886; SSE-NEXT: pand {{.*}}(%rip), %xmm0
13887; SSE-NEXT: por %xmm4, %xmm0
13888; SSE-NEXT: movdqa %xmm5, %xmm4
13889; SSE-NEXT: psllq $49, %xmm4
13890; SSE-NEXT: pand {{.*}}(%rip), %xmm4
13891; SSE-NEXT: por %xmm0, %xmm4
13892; SSE-NEXT: movdqa %xmm5, %xmm0
13893; SSE-NEXT: psllq $47, %xmm0
13894; SSE-NEXT: pand {{.*}}(%rip), %xmm0
13895; SSE-NEXT: por %xmm4, %xmm0
13896; SSE-NEXT: movdqa %xmm5, %xmm4
13897; SSE-NEXT: psllq $45, %xmm4
13898; SSE-NEXT: pand {{.*}}(%rip), %xmm4
13899; SSE-NEXT: por %xmm0, %xmm4
13900; SSE-NEXT: movdqa %xmm5, %xmm0
13901; SSE-NEXT: psllq $43, %xmm0
13902; SSE-NEXT: pand {{.*}}(%rip), %xmm0
13903; SSE-NEXT: por %xmm4, %xmm0
13904; SSE-NEXT: movdqa %xmm5, %xmm4
13905; SSE-NEXT: psllq $41, %xmm4
13906; SSE-NEXT: pand {{.*}}(%rip), %xmm4
13907; SSE-NEXT: por %xmm0, %xmm4
13908; SSE-NEXT: movdqa %xmm5, %xmm0
13909; SSE-NEXT: psllq $39, %xmm0
13910; SSE-NEXT: pand {{.*}}(%rip), %xmm0
13911; SSE-NEXT: por %xmm4, %xmm0
13912; SSE-NEXT: movdqa %xmm5, %xmm4
13913; SSE-NEXT: psllq $37, %xmm4
13914; SSE-NEXT: pand {{.*}}(%rip), %xmm4
13915; SSE-NEXT: por %xmm0, %xmm4
13916; SSE-NEXT: movdqa %xmm5, %xmm0
13917; SSE-NEXT: psllq $35, %xmm0
13918; SSE-NEXT: pand {{.*}}(%rip), %xmm0
13919; SSE-NEXT: por %xmm4, %xmm0
13920; SSE-NEXT: movdqa %xmm5, %xmm4
13921; SSE-NEXT: psllq $33, %xmm4
13922; SSE-NEXT: pand {{.*}}(%rip), %xmm4
13923; SSE-NEXT: por %xmm0, %xmm4
13924; SSE-NEXT: movdqa %xmm5, %xmm0
13925; SSE-NEXT: psllq $31, %xmm0
13926; SSE-NEXT: pand {{.*}}(%rip), %xmm0
13927; SSE-NEXT: por %xmm4, %xmm0
13928; SSE-NEXT: movdqa %xmm5, %xmm4
13929; SSE-NEXT: psllq $29, %xmm4
13930; SSE-NEXT: pand {{.*}}(%rip), %xmm4
13931; SSE-NEXT: por %xmm0, %xmm4
13932; SSE-NEXT: movdqa %xmm5, %xmm0
13933; SSE-NEXT: psllq $27, %xmm0
13934; SSE-NEXT: pand {{.*}}(%rip), %xmm0
13935; SSE-NEXT: por %xmm4, %xmm0
13936; SSE-NEXT: movdqa %xmm5, %xmm4
13937; SSE-NEXT: psllq $25, %xmm4
13938; SSE-NEXT: pand {{.*}}(%rip), %xmm4
13939; SSE-NEXT: por %xmm0, %xmm4
13940; SSE-NEXT: movdqa %xmm5, %xmm0
13941; SSE-NEXT: psllq $23, %xmm0
13942; SSE-NEXT: pand {{.*}}(%rip), %xmm0
13943; SSE-NEXT: por %xmm4, %xmm0
13944; SSE-NEXT: movdqa %xmm5, %xmm4
13945; SSE-NEXT: psllq $21, %xmm4
13946; SSE-NEXT: pand {{.*}}(%rip), %xmm4
13947; SSE-NEXT: por %xmm0, %xmm4
13948; SSE-NEXT: movdqa %xmm5, %xmm0
13949; SSE-NEXT: psllq $19, %xmm0
13950; SSE-NEXT: pand {{.*}}(%rip), %xmm0
13951; SSE-NEXT: por %xmm4, %xmm0
13952; SSE-NEXT: movdqa %xmm5, %xmm4
13953; SSE-NEXT: psllq $17, %xmm4
13954; SSE-NEXT: pand {{.*}}(%rip), %xmm4
13955; SSE-NEXT: por %xmm0, %xmm4
13956; SSE-NEXT: movdqa %xmm5, %xmm0
13957; SSE-NEXT: psllq $15, %xmm0
13958; SSE-NEXT: pand {{.*}}(%rip), %xmm0
13959; SSE-NEXT: por %xmm4, %xmm0
13960; SSE-NEXT: movdqa %xmm5, %xmm4
13961; SSE-NEXT: psllq $13, %xmm4
13962; SSE-NEXT: pand {{.*}}(%rip), %xmm4
13963; SSE-NEXT: por %xmm0, %xmm4
13964; SSE-NEXT: movdqa %xmm5, %xmm0
13965; SSE-NEXT: psllq $11, %xmm0
13966; SSE-NEXT: pand {{.*}}(%rip), %xmm0
13967; SSE-NEXT: por %xmm4, %xmm0
13968; SSE-NEXT: movdqa %xmm5, %xmm4
13969; SSE-NEXT: psllq $9, %xmm4
13970; SSE-NEXT: pand {{.*}}(%rip), %xmm4
13971; SSE-NEXT: por %xmm0, %xmm4
13972; SSE-NEXT: movdqa %xmm5, %xmm0
13973; SSE-NEXT: psllq $7, %xmm0
13974; SSE-NEXT: pand {{.*}}(%rip), %xmm0
13975; SSE-NEXT: por %xmm4, %xmm0
13976; SSE-NEXT: movdqa %xmm5, %xmm4
13977; SSE-NEXT: psllq $5, %xmm4
13978; SSE-NEXT: pand {{.*}}(%rip), %xmm4
13979; SSE-NEXT: por %xmm0, %xmm4
13980; SSE-NEXT: movdqa %xmm5, %xmm0
13981; SSE-NEXT: psllq $3, %xmm0
13982; SSE-NEXT: pand {{.*}}(%rip), %xmm0
13983; SSE-NEXT: por %xmm4, %xmm0
13984; SSE-NEXT: movdqa %xmm5, %xmm4
13985; SSE-NEXT: psllq $1, %xmm4
13986; SSE-NEXT: pand {{.*}}(%rip), %xmm4
13987; SSE-NEXT: por %xmm0, %xmm4
13988; SSE-NEXT: movdqa %xmm5, %xmm0
13989; SSE-NEXT: psrlq $1, %xmm0
13990; SSE-NEXT: pand {{.*}}(%rip), %xmm0
13991; SSE-NEXT: por %xmm4, %xmm0
13992; SSE-NEXT: movdqa %xmm5, %xmm4
13993; SSE-NEXT: psrlq $3, %xmm4
13994; SSE-NEXT: pand {{.*}}(%rip), %xmm4
13995; SSE-NEXT: por %xmm0, %xmm4
13996; SSE-NEXT: movdqa %xmm5, %xmm0
13997; SSE-NEXT: psrlq $5, %xmm0
13998; SSE-NEXT: pand {{.*}}(%rip), %xmm0
13999; SSE-NEXT: por %xmm4, %xmm0
14000; SSE-NEXT: movdqa %xmm5, %xmm4
14001; SSE-NEXT: psrlq $7, %xmm4
14002; SSE-NEXT: pand {{.*}}(%rip), %xmm4
14003; SSE-NEXT: por %xmm0, %xmm4
14004; SSE-NEXT: movdqa %xmm5, %xmm0
14005; SSE-NEXT: psrlq $9, %xmm0
14006; SSE-NEXT: pand {{.*}}(%rip), %xmm0
14007; SSE-NEXT: por %xmm4, %xmm0
14008; SSE-NEXT: movdqa %xmm5, %xmm4
14009; SSE-NEXT: psrlq $11, %xmm4
14010; SSE-NEXT: pand {{.*}}(%rip), %xmm4
14011; SSE-NEXT: por %xmm0, %xmm4
14012; SSE-NEXT: movdqa %xmm5, %xmm0
14013; SSE-NEXT: psrlq $13, %xmm0
14014; SSE-NEXT: pand {{.*}}(%rip), %xmm0
14015; SSE-NEXT: por %xmm4, %xmm0
14016; SSE-NEXT: movdqa %xmm5, %xmm4
14017; SSE-NEXT: psrlq $15, %xmm4
14018; SSE-NEXT: pand {{.*}}(%rip), %xmm4
14019; SSE-NEXT: por %xmm0, %xmm4
14020; SSE-NEXT: movdqa %xmm5, %xmm0
14021; SSE-NEXT: psrlq $17, %xmm0
14022; SSE-NEXT: pand {{.*}}(%rip), %xmm0
14023; SSE-NEXT: por %xmm4, %xmm0
14024; SSE-NEXT: movdqa %xmm5, %xmm4
14025; SSE-NEXT: psrlq $19, %xmm4
14026; SSE-NEXT: pand {{.*}}(%rip), %xmm4
14027; SSE-NEXT: por %xmm0, %xmm4
14028; SSE-NEXT: movdqa %xmm5, %xmm0
14029; SSE-NEXT: psrlq $21, %xmm0
14030; SSE-NEXT: pand {{.*}}(%rip), %xmm0
14031; SSE-NEXT: por %xmm4, %xmm0
14032; SSE-NEXT: movdqa %xmm5, %xmm4
14033; SSE-NEXT: psrlq $23, %xmm4
14034; SSE-NEXT: pand {{.*}}(%rip), %xmm4
14035; SSE-NEXT: por %xmm0, %xmm4
14036; SSE-NEXT: movdqa %xmm5, %xmm0
14037; SSE-NEXT: psrlq $25, %xmm0
14038; SSE-NEXT: pand {{.*}}(%rip), %xmm0
14039; SSE-NEXT: por %xmm4, %xmm0
14040; SSE-NEXT: movdqa %xmm5, %xmm4
14041; SSE-NEXT: psrlq $27, %xmm4
14042; SSE-NEXT: pand {{.*}}(%rip), %xmm4
14043; SSE-NEXT: por %xmm0, %xmm4
14044; SSE-NEXT: movdqa %xmm5, %xmm0
14045; SSE-NEXT: psrlq $29, %xmm0
14046; SSE-NEXT: pand {{.*}}(%rip), %xmm0
14047; SSE-NEXT: por %xmm4, %xmm0
14048; SSE-NEXT: movdqa %xmm5, %xmm4
14049; SSE-NEXT: psrlq $31, %xmm4
14050; SSE-NEXT: pand {{.*}}(%rip), %xmm4
14051; SSE-NEXT: por %xmm0, %xmm4
14052; SSE-NEXT: movdqa %xmm5, %xmm0
14053; SSE-NEXT: psrlq $33, %xmm0
14054; SSE-NEXT: pand {{.*}}(%rip), %xmm0
14055; SSE-NEXT: por %xmm4, %xmm0
14056; SSE-NEXT: movdqa %xmm5, %xmm4
14057; SSE-NEXT: psrlq $35, %xmm4
14058; SSE-NEXT: pand {{.*}}(%rip), %xmm4
14059; SSE-NEXT: por %xmm0, %xmm4
14060; SSE-NEXT: movdqa %xmm5, %xmm0
14061; SSE-NEXT: psrlq $37, %xmm0
14062; SSE-NEXT: pand {{.*}}(%rip), %xmm0
14063; SSE-NEXT: por %xmm4, %xmm0
14064; SSE-NEXT: movdqa %xmm5, %xmm4
14065; SSE-NEXT: psrlq $39, %xmm4
14066; SSE-NEXT: pand {{.*}}(%rip), %xmm4
14067; SSE-NEXT: por %xmm0, %xmm4
14068; SSE-NEXT: movdqa %xmm5, %xmm0
14069; SSE-NEXT: psrlq $41, %xmm0
14070; SSE-NEXT: pand {{.*}}(%rip), %xmm0
14071; SSE-NEXT: por %xmm4, %xmm0
14072; SSE-NEXT: movdqa %xmm5, %xmm4
14073; SSE-NEXT: psrlq $43, %xmm4
14074; SSE-NEXT: pand %xmm15, %xmm4
14075; SSE-NEXT: por %xmm0, %xmm4
14076; SSE-NEXT: movdqa %xmm5, %xmm0
14077; SSE-NEXT: psrlq $45, %xmm0
14078; SSE-NEXT: pand %xmm14, %xmm0
14079; SSE-NEXT: por %xmm4, %xmm0
14080; SSE-NEXT: movdqa %xmm5, %xmm4
14081; SSE-NEXT: psrlq $47, %xmm4
14082; SSE-NEXT: pand %xmm13, %xmm4
14083; SSE-NEXT: por %xmm0, %xmm4
14084; SSE-NEXT: movdqa %xmm5, %xmm0
14085; SSE-NEXT: psrlq $49, %xmm0
14086; SSE-NEXT: pand %xmm12, %xmm0
14087; SSE-NEXT: por %xmm4, %xmm0
14088; SSE-NEXT: movdqa %xmm5, %xmm4
14089; SSE-NEXT: psrlq $51, %xmm4
14090; SSE-NEXT: pand %xmm11, %xmm4
14091; SSE-NEXT: por %xmm0, %xmm4
14092; SSE-NEXT: movdqa %xmm5, %xmm0
14093; SSE-NEXT: psrlq $53, %xmm0
14094; SSE-NEXT: pand %xmm10, %xmm0
14095; SSE-NEXT: por %xmm4, %xmm0
14096; SSE-NEXT: movdqa %xmm5, %xmm4
14097; SSE-NEXT: psrlq $55, %xmm4
14098; SSE-NEXT: pand %xmm9, %xmm4
14099; SSE-NEXT: por %xmm0, %xmm4
14100; SSE-NEXT: movdqa %xmm5, %xmm0
14101; SSE-NEXT: psrlq $57, %xmm0
14102; SSE-NEXT: pand %xmm8, %xmm0
14103; SSE-NEXT: por %xmm4, %xmm0
14104; SSE-NEXT: movdqa %xmm5, %xmm4
14105; SSE-NEXT: psrlq $59, %xmm4
14106; SSE-NEXT: pand %xmm7, %xmm4
14107; SSE-NEXT: por %xmm0, %xmm4
14108; SSE-NEXT: movdqa %xmm5, %xmm0
14109; SSE-NEXT: psrlq $61, %xmm0
14110; SSE-NEXT: pand %xmm6, %xmm0
14111; SSE-NEXT: por %xmm4, %xmm0
14112; SSE-NEXT: psrlq $63, %xmm5
14113; SSE-NEXT: pand %xmm3, %xmm5
14114; SSE-NEXT: por %xmm0, %xmm5
14115; SSE-NEXT: por %xmm2, %xmm5
14116; SSE-NEXT: movdqa %xmm1, %xmm0
14117; SSE-NEXT: movdqa %xmm5, %xmm1
14118; SSE-NEXT: retq
14119;
14120; AVX1-LABEL: test_bitreverse_v4i64:
14121; AVX1: # BB#0:
14122; AVX1-NEXT: vpsllq $61, %xmm0, %xmm2
14123; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
14124; AVX1-NEXT: vpsllq $61, %xmm1, %xmm3
14125; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
14126; AVX1-NEXT: vandps {{.*}}(%rip), %ymm2, %ymm2
14127; AVX1-NEXT: vpsllq $63, %xmm0, %xmm3
14128; AVX1-NEXT: vpsllq $63, %xmm1, %xmm4
14129; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14130; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14131; AVX1-NEXT: vorps %ymm2, %ymm3, %ymm2
14132; AVX1-NEXT: vpsllq $59, %xmm0, %xmm3
14133; AVX1-NEXT: vpsllq $59, %xmm1, %xmm4
14134; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14135; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14136; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14137; AVX1-NEXT: vpsllq $57, %xmm0, %xmm3
14138; AVX1-NEXT: vpsllq $57, %xmm1, %xmm4
14139; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14140; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14141; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14142; AVX1-NEXT: vpsllq $55, %xmm0, %xmm3
14143; AVX1-NEXT: vpsllq $55, %xmm1, %xmm4
14144; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14145; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14146; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14147; AVX1-NEXT: vpsllq $53, %xmm0, %xmm3
14148; AVX1-NEXT: vpsllq $53, %xmm1, %xmm4
14149; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14150; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14151; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14152; AVX1-NEXT: vpsllq $51, %xmm0, %xmm3
14153; AVX1-NEXT: vpsllq $51, %xmm1, %xmm4
14154; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14155; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14156; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14157; AVX1-NEXT: vpsllq $49, %xmm0, %xmm3
14158; AVX1-NEXT: vpsllq $49, %xmm1, %xmm4
14159; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14160; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14161; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14162; AVX1-NEXT: vpsllq $47, %xmm0, %xmm3
14163; AVX1-NEXT: vpsllq $47, %xmm1, %xmm4
14164; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14165; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14166; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14167; AVX1-NEXT: vpsllq $45, %xmm0, %xmm3
14168; AVX1-NEXT: vpsllq $45, %xmm1, %xmm4
14169; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14170; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14171; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14172; AVX1-NEXT: vpsllq $43, %xmm0, %xmm3
14173; AVX1-NEXT: vpsllq $43, %xmm1, %xmm4
14174; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14175; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14176; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14177; AVX1-NEXT: vpsllq $41, %xmm0, %xmm3
14178; AVX1-NEXT: vpsllq $41, %xmm1, %xmm4
14179; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14180; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14181; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14182; AVX1-NEXT: vpsllq $39, %xmm0, %xmm3
14183; AVX1-NEXT: vpsllq $39, %xmm1, %xmm4
14184; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14185; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14186; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14187; AVX1-NEXT: vpsllq $37, %xmm0, %xmm3
14188; AVX1-NEXT: vpsllq $37, %xmm1, %xmm4
14189; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14190; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14191; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14192; AVX1-NEXT: vpsllq $35, %xmm0, %xmm3
14193; AVX1-NEXT: vpsllq $35, %xmm1, %xmm4
14194; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14195; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14196; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14197; AVX1-NEXT: vpsllq $33, %xmm0, %xmm3
14198; AVX1-NEXT: vpsllq $33, %xmm1, %xmm4
14199; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14200; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14201; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14202; AVX1-NEXT: vpsllq $31, %xmm0, %xmm3
14203; AVX1-NEXT: vpsllq $31, %xmm1, %xmm4
14204; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14205; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14206; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14207; AVX1-NEXT: vpsllq $29, %xmm0, %xmm3
14208; AVX1-NEXT: vpsllq $29, %xmm1, %xmm4
14209; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14210; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14211; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14212; AVX1-NEXT: vpsllq $27, %xmm0, %xmm3
14213; AVX1-NEXT: vpsllq $27, %xmm1, %xmm4
14214; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14215; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14216; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14217; AVX1-NEXT: vpsllq $25, %xmm0, %xmm3
14218; AVX1-NEXT: vpsllq $25, %xmm1, %xmm4
14219; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14220; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14221; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14222; AVX1-NEXT: vpsllq $23, %xmm0, %xmm3
14223; AVX1-NEXT: vpsllq $23, %xmm1, %xmm4
14224; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14225; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14226; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14227; AVX1-NEXT: vpsllq $21, %xmm0, %xmm3
14228; AVX1-NEXT: vpsllq $21, %xmm1, %xmm4
14229; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14230; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14231; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14232; AVX1-NEXT: vpsllq $19, %xmm0, %xmm3
14233; AVX1-NEXT: vpsllq $19, %xmm1, %xmm4
14234; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14235; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14236; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14237; AVX1-NEXT: vpsllq $17, %xmm0, %xmm3
14238; AVX1-NEXT: vpsllq $17, %xmm1, %xmm4
14239; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14240; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14241; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14242; AVX1-NEXT: vpsllq $15, %xmm0, %xmm3
14243; AVX1-NEXT: vpsllq $15, %xmm1, %xmm4
14244; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14245; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14246; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14247; AVX1-NEXT: vpsllq $13, %xmm0, %xmm3
14248; AVX1-NEXT: vpsllq $13, %xmm1, %xmm4
14249; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14250; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14251; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14252; AVX1-NEXT: vpsllq $11, %xmm0, %xmm3
14253; AVX1-NEXT: vpsllq $11, %xmm1, %xmm4
14254; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14255; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14256; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14257; AVX1-NEXT: vpsllq $9, %xmm0, %xmm3
14258; AVX1-NEXT: vpsllq $9, %xmm1, %xmm4
14259; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14260; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14261; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14262; AVX1-NEXT: vpsllq $7, %xmm0, %xmm3
14263; AVX1-NEXT: vpsllq $7, %xmm1, %xmm4
14264; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14265; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14266; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14267; AVX1-NEXT: vpsllq $5, %xmm0, %xmm3
14268; AVX1-NEXT: vpsllq $5, %xmm1, %xmm4
14269; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14270; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14271; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14272; AVX1-NEXT: vpsllq $3, %xmm0, %xmm3
14273; AVX1-NEXT: vpsllq $3, %xmm1, %xmm4
14274; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14275; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14276; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14277; AVX1-NEXT: vpsllq $1, %xmm0, %xmm3
14278; AVX1-NEXT: vpsllq $1, %xmm1, %xmm4
14279; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14280; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14281; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14282; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm3
14283; AVX1-NEXT: vpsrlq $1, %xmm1, %xmm4
14284; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14285; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14286; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14287; AVX1-NEXT: vpsrlq $3, %xmm0, %xmm3
14288; AVX1-NEXT: vpsrlq $3, %xmm1, %xmm4
14289; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14290; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14291; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14292; AVX1-NEXT: vpsrlq $5, %xmm0, %xmm3
14293; AVX1-NEXT: vpsrlq $5, %xmm1, %xmm4
14294; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14295; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14296; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14297; AVX1-NEXT: vpsrlq $7, %xmm0, %xmm3
14298; AVX1-NEXT: vpsrlq $7, %xmm1, %xmm4
14299; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14300; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14301; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14302; AVX1-NEXT: vpsrlq $9, %xmm0, %xmm3
14303; AVX1-NEXT: vpsrlq $9, %xmm1, %xmm4
14304; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14305; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14306; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14307; AVX1-NEXT: vpsrlq $11, %xmm0, %xmm3
14308; AVX1-NEXT: vpsrlq $11, %xmm1, %xmm4
14309; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14310; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14311; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14312; AVX1-NEXT: vpsrlq $13, %xmm0, %xmm3
14313; AVX1-NEXT: vpsrlq $13, %xmm1, %xmm4
14314; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14315; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14316; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14317; AVX1-NEXT: vpsrlq $15, %xmm0, %xmm3
14318; AVX1-NEXT: vpsrlq $15, %xmm1, %xmm4
14319; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14320; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14321; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14322; AVX1-NEXT: vpsrlq $17, %xmm0, %xmm3
14323; AVX1-NEXT: vpsrlq $17, %xmm1, %xmm4
14324; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14325; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14326; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14327; AVX1-NEXT: vpsrlq $19, %xmm0, %xmm3
14328; AVX1-NEXT: vpsrlq $19, %xmm1, %xmm4
14329; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14330; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14331; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14332; AVX1-NEXT: vpsrlq $21, %xmm0, %xmm3
14333; AVX1-NEXT: vpsrlq $21, %xmm1, %xmm4
14334; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14335; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14336; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14337; AVX1-NEXT: vpsrlq $23, %xmm0, %xmm3
14338; AVX1-NEXT: vpsrlq $23, %xmm1, %xmm4
14339; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14340; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14341; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14342; AVX1-NEXT: vpsrlq $25, %xmm0, %xmm3
14343; AVX1-NEXT: vpsrlq $25, %xmm1, %xmm4
14344; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14345; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14346; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14347; AVX1-NEXT: vpsrlq $27, %xmm0, %xmm3
14348; AVX1-NEXT: vpsrlq $27, %xmm1, %xmm4
14349; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14350; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14351; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14352; AVX1-NEXT: vpsrlq $29, %xmm0, %xmm3
14353; AVX1-NEXT: vpsrlq $29, %xmm1, %xmm4
14354; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14355; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14356; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14357; AVX1-NEXT: vpsrlq $31, %xmm0, %xmm3
14358; AVX1-NEXT: vpsrlq $31, %xmm1, %xmm4
14359; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14360; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14361; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14362; AVX1-NEXT: vpsrlq $33, %xmm0, %xmm3
14363; AVX1-NEXT: vpsrlq $33, %xmm1, %xmm4
14364; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14365; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14366; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14367; AVX1-NEXT: vpsrlq $35, %xmm0, %xmm3
14368; AVX1-NEXT: vpsrlq $35, %xmm1, %xmm4
14369; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14370; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14371; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14372; AVX1-NEXT: vpsrlq $37, %xmm0, %xmm3
14373; AVX1-NEXT: vpsrlq $37, %xmm1, %xmm4
14374; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14375; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14376; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14377; AVX1-NEXT: vpsrlq $39, %xmm0, %xmm3
14378; AVX1-NEXT: vpsrlq $39, %xmm1, %xmm4
14379; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14380; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14381; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14382; AVX1-NEXT: vpsrlq $41, %xmm0, %xmm3
14383; AVX1-NEXT: vpsrlq $41, %xmm1, %xmm4
14384; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14385; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14386; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14387; AVX1-NEXT: vpsrlq $43, %xmm0, %xmm3
14388; AVX1-NEXT: vpsrlq $43, %xmm1, %xmm4
14389; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14390; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14391; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14392; AVX1-NEXT: vpsrlq $45, %xmm0, %xmm3
14393; AVX1-NEXT: vpsrlq $45, %xmm1, %xmm4
14394; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14395; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14396; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14397; AVX1-NEXT: vpsrlq $47, %xmm0, %xmm3
14398; AVX1-NEXT: vpsrlq $47, %xmm1, %xmm4
14399; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14400; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14401; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14402; AVX1-NEXT: vpsrlq $49, %xmm0, %xmm3
14403; AVX1-NEXT: vpsrlq $49, %xmm1, %xmm4
14404; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14405; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14406; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14407; AVX1-NEXT: vpsrlq $51, %xmm0, %xmm3
14408; AVX1-NEXT: vpsrlq $51, %xmm1, %xmm4
14409; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14410; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14411; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14412; AVX1-NEXT: vpsrlq $53, %xmm0, %xmm3
14413; AVX1-NEXT: vpsrlq $53, %xmm1, %xmm4
14414; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14415; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14416; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14417; AVX1-NEXT: vpsrlq $55, %xmm0, %xmm3
14418; AVX1-NEXT: vpsrlq $55, %xmm1, %xmm4
14419; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14420; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14421; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14422; AVX1-NEXT: vpsrlq $57, %xmm0, %xmm3
14423; AVX1-NEXT: vpsrlq $57, %xmm1, %xmm4
14424; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14425; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14426; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14427; AVX1-NEXT: vpsrlq $59, %xmm0, %xmm3
14428; AVX1-NEXT: vpsrlq $59, %xmm1, %xmm4
14429; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14430; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14431; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14432; AVX1-NEXT: vpsrlq $61, %xmm0, %xmm3
14433; AVX1-NEXT: vpsrlq $61, %xmm1, %xmm4
14434; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
14435; AVX1-NEXT: vandps {{.*}}(%rip), %ymm3, %ymm3
14436; AVX1-NEXT: vorps %ymm3, %ymm2, %ymm2
14437; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
14438; AVX1-NEXT: vpsrlq $63, %xmm1, %xmm1
14439; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
14440; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
14441; AVX1-NEXT: vorps %ymm0, %ymm2, %ymm0
14442; AVX1-NEXT: retq
14443;
14444; AVX2-LABEL: test_bitreverse_v4i64:
14445; AVX2: # BB#0:
14446; AVX2-NEXT: vpsllq $61, %ymm0, %ymm1
14447; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
14448; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm2
14449; AVX2-NEXT: vpsllq $63, %ymm0, %ymm1
14450; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm3
14451; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1
14452; AVX2-NEXT: vpsllq $59, %ymm0, %ymm3
14453; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14454; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14455; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14456; AVX2-NEXT: vpsllq $57, %ymm0, %ymm3
14457; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14458; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14459; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14460; AVX2-NEXT: vpsllq $55, %ymm0, %ymm3
14461; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14462; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14463; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14464; AVX2-NEXT: vpsllq $53, %ymm0, %ymm3
14465; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14466; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14467; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14468; AVX2-NEXT: vpsllq $51, %ymm0, %ymm3
14469; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14470; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14471; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14472; AVX2-NEXT: vpsllq $49, %ymm0, %ymm3
14473; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14474; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14475; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14476; AVX2-NEXT: vpsllq $47, %ymm0, %ymm3
14477; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14478; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14479; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14480; AVX2-NEXT: vpsllq $45, %ymm0, %ymm3
14481; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14482; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14483; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14484; AVX2-NEXT: vpsllq $43, %ymm0, %ymm3
14485; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14486; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14487; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14488; AVX2-NEXT: vpsllq $41, %ymm0, %ymm3
14489; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14490; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14491; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14492; AVX2-NEXT: vpsllq $39, %ymm0, %ymm3
14493; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14494; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14495; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14496; AVX2-NEXT: vpsllq $37, %ymm0, %ymm3
14497; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14498; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14499; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14500; AVX2-NEXT: vpsllq $35, %ymm0, %ymm3
14501; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14502; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14503; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14504; AVX2-NEXT: vpsllq $33, %ymm0, %ymm3
14505; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14506; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14507; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14508; AVX2-NEXT: vpsllq $31, %ymm0, %ymm3
14509; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14510; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14511; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14512; AVX2-NEXT: vpsllq $29, %ymm0, %ymm3
14513; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14514; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14515; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14516; AVX2-NEXT: vpsllq $27, %ymm0, %ymm3
14517; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14518; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14519; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14520; AVX2-NEXT: vpsllq $25, %ymm0, %ymm3
14521; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14522; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14523; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14524; AVX2-NEXT: vpsllq $23, %ymm0, %ymm3
14525; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14526; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14527; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14528; AVX2-NEXT: vpsllq $21, %ymm0, %ymm3
14529; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14530; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14531; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14532; AVX2-NEXT: vpsllq $19, %ymm0, %ymm3
14533; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14534; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14535; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14536; AVX2-NEXT: vpsllq $17, %ymm0, %ymm3
14537; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14538; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14539; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14540; AVX2-NEXT: vpsllq $15, %ymm0, %ymm3
14541; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14542; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14543; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14544; AVX2-NEXT: vpsllq $13, %ymm0, %ymm3
14545; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14546; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14547; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14548; AVX2-NEXT: vpsllq $11, %ymm0, %ymm3
14549; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14550; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14551; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14552; AVX2-NEXT: vpsllq $9, %ymm0, %ymm3
14553; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14554; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14555; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14556; AVX2-NEXT: vpsllq $7, %ymm0, %ymm3
14557; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14558; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14559; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14560; AVX2-NEXT: vpsllq $5, %ymm0, %ymm3
14561; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14562; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14563; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14564; AVX2-NEXT: vpsllq $3, %ymm0, %ymm3
14565; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14566; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14567; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14568; AVX2-NEXT: vpsllq $1, %ymm0, %ymm3
14569; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14570; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14571; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14572; AVX2-NEXT: vpsrlq $1, %ymm0, %ymm3
14573; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14574; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14575; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14576; AVX2-NEXT: vpsrlq $3, %ymm0, %ymm3
14577; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14578; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14579; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14580; AVX2-NEXT: vpsrlq $5, %ymm0, %ymm3
14581; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14582; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14583; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14584; AVX2-NEXT: vpsrlq $7, %ymm0, %ymm3
14585; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14586; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14587; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14588; AVX2-NEXT: vpsrlq $9, %ymm0, %ymm3
14589; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14590; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14591; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14592; AVX2-NEXT: vpsrlq $11, %ymm0, %ymm3
14593; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14594; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14595; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14596; AVX2-NEXT: vpsrlq $13, %ymm0, %ymm3
14597; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14598; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14599; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14600; AVX2-NEXT: vpsrlq $15, %ymm0, %ymm3
14601; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14602; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14603; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14604; AVX2-NEXT: vpsrlq $17, %ymm0, %ymm3
14605; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14606; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14607; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14608; AVX2-NEXT: vpsrlq $19, %ymm0, %ymm3
14609; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14610; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14611; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14612; AVX2-NEXT: vpsrlq $21, %ymm0, %ymm3
14613; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14614; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14615; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14616; AVX2-NEXT: vpsrlq $23, %ymm0, %ymm3
14617; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14618; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14619; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14620; AVX2-NEXT: vpsrlq $25, %ymm0, %ymm3
14621; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14622; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14623; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14624; AVX2-NEXT: vpsrlq $27, %ymm0, %ymm3
14625; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14626; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14627; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14628; AVX2-NEXT: vpsrlq $29, %ymm0, %ymm3
14629; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14630; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14631; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14632; AVX2-NEXT: vpsrlq $31, %ymm0, %ymm3
14633; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14634; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14635; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14636; AVX2-NEXT: vpsrlq $33, %ymm0, %ymm3
14637; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14638; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14639; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14640; AVX2-NEXT: vpsrlq $35, %ymm0, %ymm3
14641; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14642; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14643; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14644; AVX2-NEXT: vpsrlq $37, %ymm0, %ymm3
14645; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14646; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14647; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14648; AVX2-NEXT: vpsrlq $39, %ymm0, %ymm3
14649; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14650; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14651; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14652; AVX2-NEXT: vpsrlq $41, %ymm0, %ymm3
14653; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14654; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14655; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14656; AVX2-NEXT: vpsrlq $43, %ymm0, %ymm3
14657; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14658; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14659; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14660; AVX2-NEXT: vpsrlq $45, %ymm0, %ymm3
14661; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14662; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14663; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14664; AVX2-NEXT: vpsrlq $47, %ymm0, %ymm3
14665; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14666; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14667; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14668; AVX2-NEXT: vpsrlq $49, %ymm0, %ymm3
14669; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14670; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14671; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14672; AVX2-NEXT: vpsrlq $51, %ymm0, %ymm3
14673; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14674; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14675; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14676; AVX2-NEXT: vpsrlq $53, %ymm0, %ymm3
14677; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14678; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14679; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14680; AVX2-NEXT: vpsrlq $55, %ymm0, %ymm3
14681; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14682; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14683; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14684; AVX2-NEXT: vpsrlq $57, %ymm0, %ymm3
14685; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14686; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14687; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14688; AVX2-NEXT: vpsrlq $59, %ymm0, %ymm3
14689; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14690; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14691; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14692; AVX2-NEXT: vpsrlq $61, %ymm0, %ymm3
14693; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
14694; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
14695; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm2
14696; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0
14697; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm3
14698; AVX2-NEXT: vpand %ymm3, %ymm0, %ymm0
14699; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
14700; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
14701; AVX2-NEXT: retq
14702;
Simon Pilgrimb87ffe82016-03-30 14:14:00 +000014703; XOPAVX1-LABEL: test_bitreverse_v4i64:
14704; XOPAVX1: # BB#0:
14705; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
14706; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [87,86,85,84,83,82,81,80,95,94,93,92,91,90,89,88]
14707; XOPAVX1-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm1
14708; XOPAVX1-NEXT: vpperm %xmm2, %xmm0, %xmm0, %xmm0
14709; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
14710; XOPAVX1-NEXT: retq
14711;
14712; XOPAVX2-LABEL: test_bitreverse_v4i64:
14713; XOPAVX2: # BB#0:
14714; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
14715; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [87,86,85,84,83,82,81,80,95,94,93,92,91,90,89,88]
14716; XOPAVX2-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm1
14717; XOPAVX2-NEXT: vpperm %xmm2, %xmm0, %xmm0, %xmm0
14718; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
14719; XOPAVX2-NEXT: retq
14720 %b = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a)
14721 ret <4 x i64> %b
14722}
14723
14724declare i8 @llvm.bitreverse.i8(i8) readnone
14725declare i16 @llvm.bitreverse.i16(i16) readnone
14726declare i32 @llvm.bitreverse.i32(i32) readnone
14727declare i64 @llvm.bitreverse.i64(i64) readnone
14728
14729declare <16 x i8> @llvm.bitreverse.v16i8(<16 x i8>) readnone
14730declare <8 x i16> @llvm.bitreverse.v8i16(<8 x i16>) readnone
14731declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) readnone
14732declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>) readnone
14733
14734declare <32 x i8> @llvm.bitreverse.v32i8(<32 x i8>) readnone
14735declare <16 x i16> @llvm.bitreverse.v16i16(<16 x i16>) readnone
14736declare <8 x i32> @llvm.bitreverse.v8i32(<8 x i32>) readnone
14737declare <4 x i64> @llvm.bitreverse.v4i64(<4 x i64>) readnone