blob: 5ea40511fda37a8ce5c4801ac2cd8565c33984a1 [file] [log] [blame]
Simon Pilgrim1bfaa452017-10-27 18:14:12 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VL
10
11define <2 x i64> @insert_v2i64_x1(<2 x i64> %a) {
12; SSE2-LABEL: insert_v2i64_x1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000013; SSE2: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +000014; SSE2-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
15; SSE2-NEXT: retq
16;
17; SSE3-LABEL: insert_v2i64_x1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000018; SSE3: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +000019; SSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
20; SSE3-NEXT: retq
21;
22; SSSE3-LABEL: insert_v2i64_x1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000023; SSSE3: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +000024; SSSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
25; SSSE3-NEXT: retq
26;
27; SSE41-LABEL: insert_v2i64_x1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000028; SSE41: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +000029; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
30; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
31; SSE41-NEXT: retq
32;
33; AVX1-LABEL: insert_v2i64_x1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000034; AVX1: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +000035; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
36; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
37; AVX1-NEXT: retq
38;
39; AVX2-LABEL: insert_v2i64_x1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000040; AVX2: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +000041; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
42; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
43; AVX2-NEXT: retq
44;
45; AVX512-LABEL: insert_v2i64_x1:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000046; AVX512: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +000047; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
48; AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
49; AVX512-NEXT: retq
50 %1 = insertelement <2 x i64> %a, i64 -1, i32 0
51 ret <2 x i64> %1
52}
53
54define <4 x i64> @insert_v4i64_01x3(<4 x i64> %a) {
55; SSE2-LABEL: insert_v4i64_01x3:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000056; SSE2: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +000057; SSE2-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1]
58; SSE2-NEXT: retq
59;
60; SSE3-LABEL: insert_v4i64_01x3:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000061; SSE3: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +000062; SSE3-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1]
63; SSE3-NEXT: retq
64;
65; SSSE3-LABEL: insert_v4i64_01x3:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000066; SSSE3: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +000067; SSSE3-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1]
68; SSSE3-NEXT: retq
69;
70; SSE41-LABEL: insert_v4i64_01x3:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000071; SSE41: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +000072; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
73; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
74; SSE41-NEXT: retq
75;
76; AVX1-LABEL: insert_v4i64_01x3:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000077; AVX1: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +000078; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
79; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
Simon Pilgrim940eae32018-01-15 22:18:45 +000080; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
Simon Pilgrim1bfaa452017-10-27 18:14:12 +000081; AVX1-NEXT: retq
82;
83; AVX2-LABEL: insert_v4i64_01x3:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000084; AVX2: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +000085; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
86; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
87; AVX2-NEXT: retq
88;
89; AVX512-LABEL: insert_v4i64_01x3:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000090; AVX512: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +000091; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
92; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
93; AVX512-NEXT: retq
94 %1 = insertelement <4 x i64> %a, i64 -1, i32 2
95 ret <4 x i64> %1
96}
97
98define <4 x i32> @insert_v4i32_01x3(<4 x i32> %a) {
99; SSE2-LABEL: insert_v4i32_01x3:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000100; SSE2: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000101; SSE2-NEXT: movl $-1, %eax
102; SSE2-NEXT: movd %eax, %xmm1
103; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
104; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
105; SSE2-NEXT: retq
106;
107; SSE3-LABEL: insert_v4i32_01x3:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000108; SSE3: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000109; SSE3-NEXT: movl $-1, %eax
110; SSE3-NEXT: movd %eax, %xmm1
111; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
112; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
113; SSE3-NEXT: retq
114;
115; SSSE3-LABEL: insert_v4i32_01x3:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000116; SSSE3: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000117; SSSE3-NEXT: movl $-1, %eax
118; SSSE3-NEXT: movd %eax, %xmm1
119; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
120; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
121; SSSE3-NEXT: retq
122;
123; SSE41-LABEL: insert_v4i32_01x3:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000124; SSE41: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000125; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
126; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
127; SSE41-NEXT: retq
128;
129; AVX1-LABEL: insert_v4i32_01x3:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000130; AVX1: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000131; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
132; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
133; AVX1-NEXT: retq
134;
135; AVX2-LABEL: insert_v4i32_01x3:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000136; AVX2: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000137; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
138; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
139; AVX2-NEXT: retq
140;
141; AVX512-LABEL: insert_v4i32_01x3:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000142; AVX512: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000143; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
144; AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
145; AVX512-NEXT: retq
146 %1 = insertelement <4 x i32> %a, i32 -1, i32 2
147 ret <4 x i32> %1
148}
149
150define <8 x i32> @insert_v8i32_x12345x7(<8 x i32> %a) {
151; SSE2-LABEL: insert_v8i32_x12345x7:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000152; SSE2: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000153; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
154; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
155; SSE2-NEXT: movl $-1, %eax
156; SSE2-NEXT: movd %eax, %xmm2
157; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
158; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
159; SSE2-NEXT: retq
160;
161; SSE3-LABEL: insert_v8i32_x12345x7:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000162; SSE3: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000163; SSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
164; SSE3-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
165; SSE3-NEXT: movl $-1, %eax
166; SSE3-NEXT: movd %eax, %xmm2
167; SSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
168; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
169; SSE3-NEXT: retq
170;
171; SSSE3-LABEL: insert_v8i32_x12345x7:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000172; SSSE3: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000173; SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
174; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
175; SSSE3-NEXT: movl $-1, %eax
176; SSSE3-NEXT: movd %eax, %xmm2
177; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
178; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
179; SSSE3-NEXT: retq
180;
181; SSE41-LABEL: insert_v8i32_x12345x7:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000182; SSE41: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000183; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
184; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3,4,5,6,7]
185; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7]
186; SSE41-NEXT: retq
187;
188; AVX1-LABEL: insert_v8i32_x12345x7:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000189; AVX1: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000190; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
191; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
192; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
193; AVX1-NEXT: retq
194;
195; AVX2-LABEL: insert_v8i32_x12345x7:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000196; AVX2: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000197; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
198; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
199; AVX2-NEXT: retq
200;
201; AVX512-LABEL: insert_v8i32_x12345x7:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000202; AVX512: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000203; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
204; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
205; AVX512-NEXT: retq
206 %1 = insertelement <8 x i32> %a, i32 -1, i32 0
207 %2 = insertelement <8 x i32> %1, i32 -1, i32 6
208 ret <8 x i32> %2
209}
210
211define <8 x i16> @insert_v8i16_x12345x7(<8 x i16> %a) {
212; SSE2-LABEL: insert_v8i16_x12345x7:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000213; SSE2: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000214; SSE2-NEXT: movl $65535, %eax # imm = 0xFFFF
215; SSE2-NEXT: pinsrw $0, %eax, %xmm0
216; SSE2-NEXT: pinsrw $6, %eax, %xmm0
217; SSE2-NEXT: retq
218;
219; SSE3-LABEL: insert_v8i16_x12345x7:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000220; SSE3: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000221; SSE3-NEXT: movl $65535, %eax # imm = 0xFFFF
222; SSE3-NEXT: pinsrw $0, %eax, %xmm0
223; SSE3-NEXT: pinsrw $6, %eax, %xmm0
224; SSE3-NEXT: retq
225;
226; SSSE3-LABEL: insert_v8i16_x12345x7:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000227; SSSE3: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000228; SSSE3-NEXT: movl $65535, %eax # imm = 0xFFFF
229; SSSE3-NEXT: pinsrw $0, %eax, %xmm0
230; SSSE3-NEXT: pinsrw $6, %eax, %xmm0
231; SSSE3-NEXT: retq
232;
233; SSE41-LABEL: insert_v8i16_x12345x7:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000234; SSE41: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000235; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
236; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
237; SSE41-NEXT: retq
238;
239; AVX-LABEL: insert_v8i16_x12345x7:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000240; AVX: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000241; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
242; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
243; AVX-NEXT: retq
244 %1 = insertelement <8 x i16> %a, i16 -1, i32 0
245 %2 = insertelement <8 x i16> %1, i16 -1, i32 6
246 ret <8 x i16> %2
247}
248
249define <16 x i16> @insert_v16i16_x12345x789ABCDEx(<16 x i16> %a) {
250; SSE2-LABEL: insert_v16i16_x12345x789ABCDEx:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000251; SSE2: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000252; SSE2-NEXT: movl $65535, %eax # imm = 0xFFFF
253; SSE2-NEXT: pinsrw $0, %eax, %xmm0
254; SSE2-NEXT: pinsrw $6, %eax, %xmm0
255; SSE2-NEXT: pinsrw $7, %eax, %xmm1
256; SSE2-NEXT: retq
257;
258; SSE3-LABEL: insert_v16i16_x12345x789ABCDEx:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000259; SSE3: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000260; SSE3-NEXT: movl $65535, %eax # imm = 0xFFFF
261; SSE3-NEXT: pinsrw $0, %eax, %xmm0
262; SSE3-NEXT: pinsrw $6, %eax, %xmm0
263; SSE3-NEXT: pinsrw $7, %eax, %xmm1
264; SSE3-NEXT: retq
265;
266; SSSE3-LABEL: insert_v16i16_x12345x789ABCDEx:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000267; SSSE3: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000268; SSSE3-NEXT: movl $65535, %eax # imm = 0xFFFF
269; SSSE3-NEXT: pinsrw $0, %eax, %xmm0
270; SSSE3-NEXT: pinsrw $6, %eax, %xmm0
271; SSSE3-NEXT: pinsrw $7, %eax, %xmm1
272; SSSE3-NEXT: retq
273;
274; SSE41-LABEL: insert_v16i16_x12345x789ABCDEx:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000275; SSE41: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000276; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
277; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3,4,5],xmm2[6],xmm0[7]
278; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6],xmm2[7]
279; SSE41-NEXT: retq
280;
281; AVX1-LABEL: insert_v16i16_x12345x789ABCDEx:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000282; AVX1: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000283; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
284; AVX1-NEXT: vorps {{.*}}(%rip), %ymm0, %ymm0
285; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
286; AVX1-NEXT: vorps {{.*}}(%rip), %ymm0, %ymm0
287; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
288; AVX1-NEXT: vorps {{.*}}(%rip), %ymm0, %ymm0
289; AVX1-NEXT: retq
290;
291; AVX2-LABEL: insert_v16i16_x12345x789ABCDEx:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000292; AVX2: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000293; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
294; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
295; AVX2-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
296; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
297; AVX2-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
298; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0]
299; AVX2-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
300; AVX2-NEXT: retq
301;
302; AVX512F-LABEL: insert_v16i16_x12345x789ABCDEx:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000303; AVX512F: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000304; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
305; AVX512F-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
306; AVX512F-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
307; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
308; AVX512F-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
309; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0]
310; AVX512F-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
311; AVX512F-NEXT: retq
312;
313; AVX512VL-LABEL: insert_v16i16_x12345x789ABCDEx:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000314; AVX512VL: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000315; AVX512VL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
316; AVX512VL-NEXT: movw $1, %ax
317; AVX512VL-NEXT: kmovd %eax, %k1
318; AVX512VL-NEXT: vmovdqu16 %ymm1, %ymm0 {%k1}
319; AVX512VL-NEXT: movw $64, %ax
320; AVX512VL-NEXT: kmovd %eax, %k1
321; AVX512VL-NEXT: vmovdqu16 %ymm1, %ymm0 {%k1}
322; AVX512VL-NEXT: movw $-32768, %ax # imm = 0x8000
323; AVX512VL-NEXT: kmovd %eax, %k1
324; AVX512VL-NEXT: vmovdqu16 %ymm1, %ymm0 {%k1}
325; AVX512VL-NEXT: retq
326 %1 = insertelement <16 x i16> %a, i16 -1, i32 0
327 %2 = insertelement <16 x i16> %1, i16 -1, i32 6
328 %3 = insertelement <16 x i16> %2, i16 -1, i32 15
329 ret <16 x i16> %3
330}
331
332define <16 x i8> @insert_v16i8_x123456789ABCDEx(<16 x i8> %a) {
333; SSE2-LABEL: insert_v16i8_x123456789ABCDEx:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000334; SSE2: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000335; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
336; SSE2-NEXT: pand %xmm1, %xmm0
337; SSE2-NEXT: movl $255, %eax
338; SSE2-NEXT: movd %eax, %xmm2
339; SSE2-NEXT: pandn %xmm2, %xmm1
340; SSE2-NEXT: por %xmm1, %xmm0
341; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
342; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
343; SSE2-NEXT: por %xmm2, %xmm0
344; SSE2-NEXT: retq
345;
346; SSE3-LABEL: insert_v16i8_x123456789ABCDEx:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000347; SSE3: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000348; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
349; SSE3-NEXT: pand %xmm1, %xmm0
350; SSE3-NEXT: movl $255, %eax
351; SSE3-NEXT: movd %eax, %xmm2
352; SSE3-NEXT: pandn %xmm2, %xmm1
353; SSE3-NEXT: por %xmm1, %xmm0
354; SSE3-NEXT: pand {{.*}}(%rip), %xmm0
355; SSE3-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
356; SSE3-NEXT: por %xmm2, %xmm0
357; SSE3-NEXT: retq
358;
359; SSSE3-LABEL: insert_v16i8_x123456789ABCDEx:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000360; SSSE3: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000361; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
362; SSSE3-NEXT: movl $255, %eax
363; SSSE3-NEXT: movd %eax, %xmm1
364; SSSE3-NEXT: movdqa %xmm1, %xmm2
365; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
366; SSSE3-NEXT: por %xmm2, %xmm0
367; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero
368; SSSE3-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
369; SSSE3-NEXT: por %xmm1, %xmm0
370; SSSE3-NEXT: retq
371;
372; SSE41-LABEL: insert_v16i8_x123456789ABCDEx:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000373; SSE41: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000374; SSE41-NEXT: movl $255, %eax
375; SSE41-NEXT: pinsrb $0, %eax, %xmm0
376; SSE41-NEXT: pinsrb $15, %eax, %xmm0
377; SSE41-NEXT: retq
378;
379; AVX-LABEL: insert_v16i8_x123456789ABCDEx:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000380; AVX: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000381; AVX-NEXT: movl $255, %eax
382; AVX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
383; AVX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
384; AVX-NEXT: retq
385 %1 = insertelement <16 x i8> %a, i8 -1, i32 0
386 %2 = insertelement <16 x i8> %1, i8 -1, i32 15
387 ret <16 x i8> %2
388}
389
390define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) {
391; SSE2-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000392; SSE2: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000393; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
394; SSE2-NEXT: pand %xmm2, %xmm0
395; SSE2-NEXT: movl $255, %eax
396; SSE2-NEXT: movd %eax, %xmm3
397; SSE2-NEXT: pandn %xmm3, %xmm2
398; SSE2-NEXT: por %xmm2, %xmm0
399; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
400; SSE2-NEXT: pand %xmm2, %xmm0
401; SSE2-NEXT: movdqa %xmm3, %xmm4
402; SSE2-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0]
403; SSE2-NEXT: por %xmm4, %xmm0
404; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255]
405; SSE2-NEXT: pand %xmm5, %xmm1
406; SSE2-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1]
407; SSE2-NEXT: pandn %xmm3, %xmm5
408; SSE2-NEXT: por %xmm5, %xmm1
409; SSE2-NEXT: pand %xmm2, %xmm1
410; SSE2-NEXT: por %xmm4, %xmm1
411; SSE2-NEXT: retq
412;
413; SSE3-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000414; SSE3: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000415; SSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
416; SSE3-NEXT: pand %xmm2, %xmm0
417; SSE3-NEXT: movl $255, %eax
418; SSE3-NEXT: movd %eax, %xmm3
419; SSE3-NEXT: pandn %xmm3, %xmm2
420; SSE3-NEXT: por %xmm2, %xmm0
421; SSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
422; SSE3-NEXT: pand %xmm2, %xmm0
423; SSE3-NEXT: movdqa %xmm3, %xmm4
424; SSE3-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0]
425; SSE3-NEXT: por %xmm4, %xmm0
426; SSE3-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255]
427; SSE3-NEXT: pand %xmm5, %xmm1
428; SSE3-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1]
429; SSE3-NEXT: pandn %xmm3, %xmm5
430; SSE3-NEXT: por %xmm5, %xmm1
431; SSE3-NEXT: pand %xmm2, %xmm1
432; SSE3-NEXT: por %xmm4, %xmm1
433; SSE3-NEXT: retq
434;
435; SSSE3-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000436; SSSE3: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000437; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
438; SSSE3-NEXT: movl $255, %eax
439; SSSE3-NEXT: movd %eax, %xmm2
440; SSSE3-NEXT: movdqa %xmm2, %xmm3
441; SSSE3-NEXT: pshufb {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
442; SSSE3-NEXT: por %xmm3, %xmm0
443; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,128]
444; SSSE3-NEXT: pshufb %xmm3, %xmm0
445; SSSE3-NEXT: movdqa %xmm2, %xmm4
446; SSSE3-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0]
447; SSSE3-NEXT: por %xmm4, %xmm0
448; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13],zero,xmm1[15]
449; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0],zero
450; SSSE3-NEXT: por %xmm2, %xmm1
451; SSSE3-NEXT: pshufb %xmm3, %xmm1
452; SSSE3-NEXT: por %xmm4, %xmm1
453; SSSE3-NEXT: retq
454;
455; SSE41-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000456; SSE41: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000457; SSE41-NEXT: movl $255, %eax
458; SSE41-NEXT: pinsrb $0, %eax, %xmm0
459; SSE41-NEXT: pinsrb $15, %eax, %xmm0
460; SSE41-NEXT: pinsrb $14, %eax, %xmm1
461; SSE41-NEXT: pinsrb $15, %eax, %xmm1
462; SSE41-NEXT: retq
463;
464; AVX1-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000465; AVX1: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000466; AVX1-NEXT: movl $255, %eax
467; AVX1-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
468; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
469; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
470; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
471; AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
472; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
473; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
474; AVX1-NEXT: retq
475;
476; AVX2-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000477; AVX2: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000478; AVX2-NEXT: movl $255, %eax
479; AVX2-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
480; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
481; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
482; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
483; AVX2-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
484; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
485; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
486; AVX2-NEXT: retq
487;
488; AVX512-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000489; AVX512: # %bb.0:
Simon Pilgrim1bfaa452017-10-27 18:14:12 +0000490; AVX512-NEXT: movl $255, %eax
491; AVX512-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
492; AVX512-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
493; AVX512-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
494; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
495; AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
496; AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
497; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
498; AVX512-NEXT: retq
499 %1 = insertelement <32 x i8> %a, i8 -1, i32 0
500 %2 = insertelement <32 x i8> %1, i8 -1, i32 15
501 %3 = insertelement <32 x i8> %2, i8 -1, i32 30
502 %4 = insertelement <32 x i8> %3, i8 -1, i32 31
503 ret <32 x i8> %4
504}