blob: 23874935d25bc7b4ae07f3cfe2549c051ce8498a [file] [log] [blame]
Simon Pilgrim5fa0fb22015-01-21 23:43:30 +00001; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx,+aes,+pclmul < %s | FileCheck %s
Simon Pilgrim0177fa32015-01-20 23:54:17 +00002
3target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
4target triple = "x86_64-unknown-unknown"
5
6; Stack reload folding tests.
7;
8; By including a nop call with sideeffects we can force a partial register spill of the
9; relevant registers and check that the reload is correctly folded into the instruction.
10
11define <2 x i64> @stack_fold_aesdec(<2 x i64> %a0, <2 x i64> %a1) {
12 ;CHECK-LABEL: stack_fold_aesdec
13 ;CHECK: vaesdec {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
14 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
15 %2 = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1)
16 ret <2 x i64> %2
17}
18declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone
19
20define <2 x i64> @stack_fold_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) {
21 ;CHECK-LABEL: stack_fold_aesdeclast
22 ;CHECK: vaesdeclast {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
23 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
24 %2 = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1)
25 ret <2 x i64> %2
26}
27declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone
28
29define <2 x i64> @stack_fold_aesenc(<2 x i64> %a0, <2 x i64> %a1) {
30 ;CHECK-LABEL: stack_fold_aesenc
31 ;CHECK: vaesenc {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
32 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
33 %2 = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1)
34 ret <2 x i64> %2
35}
36declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone
37
38define <2 x i64> @stack_fold_aesenclast(<2 x i64> %a0, <2 x i64> %a1) {
39 ;CHECK-LABEL: stack_fold_aesenclast
40 ;CHECK: vaesenclast {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
41 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
42 %2 = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1)
43 ret <2 x i64> %2
44}
45declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone
46
47define <2 x i64> @stack_fold_aesimc(<2 x i64> %a0) {
48 ;CHECK-LABEL: stack_fold_aesimc
49 ;CHECK: vaesimc {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
50 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
51 %2 = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0)
52 ret <2 x i64> %2
53}
54declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone
55
56define <2 x i64> @stack_fold_aeskeygenassist(<2 x i64> %a0) {
57 ;CHECK-LABEL: stack_fold_aeskeygenassist
58 ;CHECK: vaeskeygenassist $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
59 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
60 %2 = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7)
61 ret <2 x i64> %2
62}
63declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone
64
Simon Pilgrim5fa0fb22015-01-21 23:43:30 +000065define <4 x i32> @stack_fold_movd_load(i32 %a0) {
66 ;CHECK-LABEL: stack_fold_movd_load
67 ;CHECK: movd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
68 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
69 %2 = insertelement <4 x i32> zeroinitializer, i32 %a0, i32 0
Simon Pilgrim6eb925a2015-02-14 14:10:44 +000070 ; add forces execution domain
71 %3 = add <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
72 ret <4 x i32> %3
Simon Pilgrim5fa0fb22015-01-21 23:43:30 +000073}
74
75define i32 @stack_fold_movd_store(<4 x i32> %a0) {
76 ;CHECK-LABEL: stack_fold_movd_store
77 ;CHECK: movd {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill
Simon Pilgrim6eb925a2015-02-14 14:10:44 +000078 ; add forces execution domain
79 %1 = add <4 x i32> %a0, <i32 1, i32 1, i32 1, i32 1>
80 %2 = extractelement <4 x i32> %1, i32 0
81 %3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
82 ret i32 %2
Simon Pilgrim5fa0fb22015-01-21 23:43:30 +000083}
84
85define <2 x i64> @stack_fold_movq_load(<2 x i64> %a0) {
86 ;CHECK-LABEL: stack_fold_movq_load
87 ;CHECK: movq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
88 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
89 %2 = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
90 ret <2 x i64> %2
91}
92
93define i64 @stack_fold_movq_store(<2 x i64> %a0) {
94 ;CHECK-LABEL: stack_fold_movq_store
95 ;CHECK: movq {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 8-byte Folded Spill
96 %1 = extractelement <2 x i64> %a0, i32 0
97 %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
98 ret i64 %1
99}
100
Simon Pilgrimcbc3b2f2015-02-07 21:20:11 +0000101define <8 x i16> @stack_fold_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
102 ;CHECK-LABEL: stack_fold_mpsadbw
103 ;CHECK: vmpsadbw $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
104 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
105 %2 = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7)
106 ret <8 x i16> %2
107}
108declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone
109
Simon Pilgrim0177fa32015-01-20 23:54:17 +0000110define <16 x i8> @stack_fold_pabsb(<16 x i8> %a0) {
111 ;CHECK-LABEL: stack_fold_pabsb
112 ;CHECK: vpabsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
113 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
114 %2 = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0)
115 ret <16 x i8> %2
116}
117declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
118
119define <4 x i32> @stack_fold_pabsd(<4 x i32> %a0) {
120 ;CHECK-LABEL: stack_fold_pabsd
121 ;CHECK: vpabsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
122 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
123 %2 = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0)
124 ret <4 x i32> %2
125}
126declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
127
128define <8 x i16> @stack_fold_pabsw(<8 x i16> %a0) {
129 ;CHECK-LABEL: stack_fold_pabsw
130 ;CHECK: vpabsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
131 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
132 %2 = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0)
133 ret <8 x i16> %2
134}
135declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
136
137define <8 x i16> @stack_fold_packssdw(<4 x i32> %a0, <4 x i32> %a1) {
138 ;CHECK-LABEL: stack_fold_packssdw
139 ;CHECK: vpackssdw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
140 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
141 %2 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1)
142 ret <8 x i16> %2
143}
144declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
145
146define <16 x i8> @stack_fold_packsswb(<8 x i16> %a0, <8 x i16> %a1) {
147 ;CHECK-LABEL: stack_fold_packsswb
148 ;CHECK: vpacksswb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
149 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
150 %2 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1)
151 ret <16 x i8> %2
152}
153declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
154
155define <8 x i16> @stack_fold_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
156 ;CHECK-LABEL: stack_fold_packusdw
157 ;CHECK: vpackusdw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
158 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
159 %2 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1)
160 ret <8 x i16> %2
161}
162declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
163
164define <16 x i8> @stack_fold_packuswb(<8 x i16> %a0, <8 x i16> %a1) {
165 ;CHECK-LABEL: stack_fold_packuswb
166 ;CHECK: vpackuswb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
167 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
168 %2 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1)
169 ret <16 x i8> %2
170}
171declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
172
173define <16 x i8> @stack_fold_paddb(<16 x i8> %a0, <16 x i8> %a1) {
174 ;CHECK-LABEL: stack_fold_paddb
175 ;CHECK: vpaddb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
176 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
177 %2 = add <16 x i8> %a0, %a1
178 ret <16 x i8> %2
179}
180
181define <4 x i32> @stack_fold_paddd(<4 x i32> %a0, <4 x i32> %a1) {
182 ;CHECK-LABEL: stack_fold_paddd
183 ;CHECK: vpaddd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
184 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
185 %2 = add <4 x i32> %a0, %a1
186 ret <4 x i32> %2
187}
188
189define <2 x i64> @stack_fold_paddq(<2 x i64> %a0, <2 x i64> %a1) {
190 ;CHECK-LABEL: stack_fold_paddq
191 ;CHECK: vpaddq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
192 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
193 %2 = add <2 x i64> %a0, %a1
194 ret <2 x i64> %2
195}
196
197define <16 x i8> @stack_fold_paddsb(<16 x i8> %a0, <16 x i8> %a1) {
198 ;CHECK-LABEL: stack_fold_paddsb
199 ;CHECK: vpaddsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
200 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
201 %2 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1)
202 ret <16 x i8> %2
203}
204declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
205
206define <8 x i16> @stack_fold_paddsw(<8 x i16> %a0, <8 x i16> %a1) {
207 ;CHECK-LABEL: stack_fold_paddsw
208 ;CHECK: vpaddsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
209 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
210 %2 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1)
211 ret <8 x i16> %2
212}
213declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
214
215define <16 x i8> @stack_fold_paddusb(<16 x i8> %a0, <16 x i8> %a1) {
216 ;CHECK-LABEL: stack_fold_paddusb
217 ;CHECK: vpaddusb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
218 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
219 %2 = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1)
220 ret <16 x i8> %2
221}
222declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
223
224define <8 x i16> @stack_fold_paddusw(<8 x i16> %a0, <8 x i16> %a1) {
225 ;CHECK-LABEL: stack_fold_paddusw
226 ;CHECK: vpaddusw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
227 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
228 %2 = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1)
229 ret <8 x i16> %2
230}
231declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
232
233define <8 x i16> @stack_fold_paddw(<8 x i16> %a0, <8 x i16> %a1) {
234 ;CHECK-LABEL: stack_fold_paddw
235 ;CHECK: vpaddw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
236 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
237 %2 = add <8 x i16> %a0, %a1
238 ret <8 x i16> %2
239}
240
241define <16 x i8> @stack_fold_palignr(<16 x i8> %a0, <16 x i8> %a1) {
242 ;CHECK-LABEL: stack_fold_palignr
243 ;CHECK: vpalignr $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
244 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
245 %2 = shufflevector <16 x i8> %a1, <16 x i8> %a0, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
246 ret <16 x i8> %2
247}
248
249define <16 x i8> @stack_fold_pand(<16 x i8> %a0, <16 x i8> %a1) {
250 ;CHECK-LABEL: stack_fold_pand
251 ;CHECK: vpand {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
252 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
253 %2 = and <16 x i8> %a0, %a1
254 ; add forces execution domain
255 %3 = add <16 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
256 ret <16 x i8> %3
257}
258
259define <16 x i8> @stack_fold_pandn(<16 x i8> %a0, <16 x i8> %a1) {
260 ;CHECK-LABEL: stack_fold_pandn
261 ;CHECK: vpandn {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
262 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
263 %2 = xor <16 x i8> %a0, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
264 %3 = and <16 x i8> %2, %a1
265 ; add forces execution domain
266 %4 = add <16 x i8> %3, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
267 ret <16 x i8> %4
268}
269
270define <16 x i8> @stack_fold_pavgb(<16 x i8> %a0, <16 x i8> %a1) {
271 ;CHECK-LABEL: stack_fold_pavgb
272 ;CHECK: vpavgb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
273 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
274 %2 = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1)
275 ret <16 x i8> %2
276}
277declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
278
279define <8 x i16> @stack_fold_pavgw(<8 x i16> %a0, <8 x i16> %a1) {
280 ;CHECK-LABEL: stack_fold_pavgw
281 ;CHECK: vpavgw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
282 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
283 %2 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1)
284 ret <8 x i16> %2
285}
286declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
287
Simon Pilgrim5fa0fb22015-01-21 23:43:30 +0000288define <16 x i8> @stack_fold_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %c) {
289 ;CHECK-LABEL: stack_fold_pblendvb
290 ;CHECK: vpblendvb {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
291 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
292 %2 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a1, <16 x i8> %c, <16 x i8> %a0)
293 ret <16 x i8> %2
294}
Simon Pilgrim0177fa32015-01-20 23:54:17 +0000295declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
296
297define <8 x i16> @stack_fold_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
298 ;CHECK-LABEL: stack_fold_pblendw
299 ;CHECK: vpblendw $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
300 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
301 %2 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7)
302 ret <8 x i16> %2
303}
304declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
305
Simon Pilgrim5fa0fb22015-01-21 23:43:30 +0000306define <2 x i64> @stack_fold_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) {
307 ;CHECK-LABEL: stack_fold_pclmulqdq
308 ;CHECK: vpclmulqdq $0, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
309 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
310 %2 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0)
311 ret <2 x i64> %2
312}
Simon Pilgrim0177fa32015-01-20 23:54:17 +0000313declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone
314
315define <16 x i8> @stack_fold_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1) {
316 ;CHECK-LABEL: stack_fold_pcmpeqb
317 ;CHECK: vpcmpeqb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
318 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
319 %2 = icmp eq <16 x i8> %a0, %a1
320 %3 = sext <16 x i1> %2 to <16 x i8>
321 ret <16 x i8> %3
322}
323
324define <4 x i32> @stack_fold_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1) {
325 ;CHECK-LABEL: stack_fold_pcmpeqd
326 ;CHECK: vpcmpeqd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
327 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
328 %2 = icmp eq <4 x i32> %a0, %a1
329 %3 = sext <4 x i1> %2 to <4 x i32>
330 ret <4 x i32> %3
331}
332
333define <2 x i64> @stack_fold_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) {
334 ;CHECK-LABEL: stack_fold_pcmpeqq
335 ;CHECK: vpcmpeqq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
336 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
337 %2 = icmp eq <2 x i64> %a0, %a1
338 %3 = sext <2 x i1> %2 to <2 x i64>
339 ret <2 x i64> %3
340}
341
342define <8 x i16> @stack_fold_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1) {
343 ;CHECK-LABEL: stack_fold_pcmpeqw
344 ;CHECK: vpcmpeqw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
345 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
346 %2 = icmp eq <8 x i16> %a0, %a1
347 %3 = sext <8 x i1> %2 to <8 x i16>
348 ret <8 x i16> %3
349}
350
Simon Pilgrim5fa0fb22015-01-21 23:43:30 +0000351define i32 @stack_fold_pcmpestri(<16 x i8> %a0, <16 x i8> %a1) {
352 ;CHECK-LABEL: stack_fold_pcmpestri
353 ;CHECK: vpcmpestri $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
354 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{rax},~{flags}"()
355 %2 = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a1, i32 7, i8 7)
356 ret i32 %2
357}
Simon Pilgrim0177fa32015-01-20 23:54:17 +0000358declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
359
Simon Pilgrim5fa0fb22015-01-21 23:43:30 +0000360define <16 x i8> @stack_fold_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1) {
361 ;CHECK-LABEL: stack_fold_pcmpestrm
362 ;CHECK: vpcmpestrm $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
363 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{rax},~{flags}"()
364 %2 = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a1, i32 7, i8 7)
365 ret <16 x i8> %2
366}
Simon Pilgrim0177fa32015-01-20 23:54:17 +0000367declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
368
369define <16 x i8> @stack_fold_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1) {
370 ;CHECK-LABEL: stack_fold_pcmpgtb
371 ;CHECK: vpcmpgtb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
372 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
373 %2 = icmp sgt <16 x i8> %a0, %a1
374 %3 = sext <16 x i1> %2 to <16 x i8>
375 ret <16 x i8> %3
376}
377
378define <4 x i32> @stack_fold_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1) {
379 ;CHECK-LABEL: stack_fold_pcmpgtd
380 ;CHECK: vpcmpgtd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
381 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
382 %2 = icmp sgt <4 x i32> %a0, %a1
383 %3 = sext <4 x i1> %2 to <4 x i32>
384 ret <4 x i32> %3
385}
386
387define <2 x i64> @stack_fold_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) {
388 ;CHECK-LABEL: stack_fold_pcmpgtq
389 ;CHECK: vpcmpgtq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
390 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
391 %2 = icmp sgt <2 x i64> %a0, %a1
392 %3 = sext <2 x i1> %2 to <2 x i64>
393 ret <2 x i64> %3
394}
395
396define <8 x i16> @stack_fold_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1) {
397 ;CHECK-LABEL: stack_fold_pcmpgtw
398 ;CHECK: vpcmpgtw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
399 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
400 %2 = icmp sgt <8 x i16> %a0, %a1
401 %3 = sext <8 x i1> %2 to <8 x i16>
402 ret <8 x i16> %3
403}
404
Simon Pilgrim5fa0fb22015-01-21 23:43:30 +0000405define i32 @stack_fold_pcmpistri(<16 x i8> %a0, <16 x i8> %a1) {
406 ;CHECK-LABEL: stack_fold_pcmpistri
407 ;CHECK: vpcmpistri $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
408 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
409 %2 = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7)
410 ret i32 %2
411}
Simon Pilgrim0177fa32015-01-20 23:54:17 +0000412declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
413
Simon Pilgrim5fa0fb22015-01-21 23:43:30 +0000414define <16 x i8> @stack_fold_pcmpistrm(<16 x i8> %a0, <16 x i8> %a1) {
415 ;CHECK-LABEL: stack_fold_pcmpistrm
416 ;CHECK: vpcmpistrm $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
417 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
418 %2 = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7)
419 ret <16 x i8> %2
420}
Simon Pilgrim0177fa32015-01-20 23:54:17 +0000421declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
422
423; TODO stack_fold_pextrb
Simon Pilgrim5fa0fb22015-01-21 23:43:30 +0000424
425define i32 @stack_fold_pextrd(<4 x i32> %a0) {
426 ;CHECK-LABEL: stack_fold_pextrd
427 ;CHECK: pextrd $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill
428 ;CHECK: movl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Reload
429 %1 = extractelement <4 x i32> %a0, i32 1
430 %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
431 ret i32 %1
432}
433
434define i64 @stack_fold_pextrq(<2 x i64> %a0) {
435 ;CHECK-LABEL: stack_fold_pextrq
436 ;CHECK: pextrq $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 8-byte Folded Spill
437 ;CHECK: movq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Reload
438 %1 = extractelement <2 x i64> %a0, i32 1
439 %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
440 ret i64 %1
441}
442
Simon Pilgrim0177fa32015-01-20 23:54:17 +0000443; TODO stack_fold_pextrw
444
445define <4 x i32> @stack_fold_phaddd(<4 x i32> %a0, <4 x i32> %a1) {
446 ;CHECK-LABEL: stack_fold_phaddd
447 ;CHECK: vphaddd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
448 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
449 %2 = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1)
450 ret <4 x i32> %2
451}
452declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
453
454define <8 x i16> @stack_fold_phaddsw(<8 x i16> %a0, <8 x i16> %a1) {
455 ;CHECK-LABEL: stack_fold_phaddsw
456 ;CHECK: vphaddsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
457 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
458 %2 = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1)
459 ret <8 x i16> %2
460}
461declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
462
463define <8 x i16> @stack_fold_phaddw(<8 x i16> %a0, <8 x i16> %a1) {
464 ;CHECK-LABEL: stack_fold_phaddw
465 ;CHECK: vphaddw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
466 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
467 %2 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1)
468 ret <8 x i16> %2
469}
470declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
471
Simon Pilgrim5fa0fb22015-01-21 23:43:30 +0000472define <8 x i16> @stack_fold_phminposuw(<8 x i16> %a0) {
473 ;CHECK-LABEL: stack_fold_phminposuw
474 ;CHECK: vphminposuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
475 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
476 %2 = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0)
477 ret <8 x i16> %2
478}
Simon Pilgrim0177fa32015-01-20 23:54:17 +0000479declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
480
481define <4 x i32> @stack_fold_phsubd(<4 x i32> %a0, <4 x i32> %a1) {
482 ;CHECK-LABEL: stack_fold_phsubd
483 ;CHECK: vphsubd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
484 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
485 %2 = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1)
486 ret <4 x i32> %2
487}
488declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
489
490define <8 x i16> @stack_fold_phsubsw(<8 x i16> %a0, <8 x i16> %a1) {
491 ;CHECK-LABEL: stack_fold_phsubsw
492 ;CHECK: vphsubsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
493 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
494 %2 = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1)
495 ret <8 x i16> %2
496}
497declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
498
499define <8 x i16> @stack_fold_phsubw(<8 x i16> %a0, <8 x i16> %a1) {
500 ;CHECK-LABEL: stack_fold_phsubw
501 ;CHECK: vphsubw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
502 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
503 %2 = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1)
504 ret <8 x i16> %2
505}
506declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
507
Simon Pilgrim5fa0fb22015-01-21 23:43:30 +0000508define <16 x i8> @stack_fold_pinsrb(<16 x i8> %a0, i8 %a1) {
509 ;CHECK-LABEL: stack_fold_pinsrb
510 ;CHECK: vpinsrb $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
511 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
512 %2 = insertelement <16 x i8> %a0, i8 %a1, i32 1
513 ret <16 x i8> %2
514}
515
516define <4 x i32> @stack_fold_pinsrd(<4 x i32> %a0, i32 %a1) {
517 ;CHECK-LABEL: stack_fold_pinsrd
518 ;CHECK: vpinsrd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
519 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
520 %2 = insertelement <4 x i32> %a0, i32 %a1, i32 1
521 ret <4 x i32> %2
522}
523
524define <2 x i64> @stack_fold_pinsrq(<2 x i64> %a0, i64 %a1) {
525 ;CHECK-LABEL: stack_fold_pinsrq
526 ;CHECK: vpinsrq $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
527 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
528 %2 = insertelement <2 x i64> %a0, i64 %a1, i32 1
529 ret <2 x i64> %2
530}
531
532define <8 x i16> @stack_fold_pinsrw(<8 x i16> %a0, i16 %a1) {
533 ;CHECK-LABEL: stack_fold_pinsrw
534 ;CHECK: vpinsrw $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
535 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
536 %2 = insertelement <8 x i16> %a0, i16 %a1, i32 1
537 ret <8 x i16> %2
538}
Simon Pilgrim0177fa32015-01-20 23:54:17 +0000539
540define <8 x i16> @stack_fold_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1) {
541 ;CHECK-LABEL: stack_fold_pmaddubsw
542 ;CHECK: vpmaddubsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
543 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
544 %2 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1)
545 ret <8 x i16> %2
546}
547declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
548
549define <4 x i32> @stack_fold_pmaddwd(<8 x i16> %a0, <8 x i16> %a1) {
550 ;CHECK-LABEL: stack_fold_pmaddwd
551 ;CHECK: vpmaddwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
552 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
553 %2 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1)
554 ret <4 x i32> %2
555}
556declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
557
558define <16 x i8> @stack_fold_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
559 ;CHECK-LABEL: stack_fold_pmaxsb
560 ;CHECK: vpmaxsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
561 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
562 %2 = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1)
563 ret <16 x i8> %2
564}
565declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
566
567define <4 x i32> @stack_fold_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
568 ;CHECK-LABEL: stack_fold_pmaxsd
569 ;CHECK: vpmaxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
570 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
571 %2 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1)
572 ret <4 x i32> %2
573}
574declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
575
576define <8 x i16> @stack_fold_pmaxsw(<8 x i16> %a0, <8 x i16> %a1) {
577 ;CHECK-LABEL: stack_fold_pmaxsw
578 ;CHECK: vpmaxsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
579 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
580 %2 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1)
581 ret <8 x i16> %2
582}
583declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
584
585define <16 x i8> @stack_fold_pmaxub(<16 x i8> %a0, <16 x i8> %a1) {
586 ;CHECK-LABEL: stack_fold_pmaxub
587 ;CHECK: vpmaxub {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
588 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
589 %2 = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1)
590 ret <16 x i8> %2
591}
592declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
593
594define <4 x i32> @stack_fold_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
595 ;CHECK-LABEL: stack_fold_pmaxud
596 ;CHECK: vpmaxud {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
597 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
598 %2 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1)
599 ret <4 x i32> %2
600}
601declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
602
603define <8 x i16> @stack_fold_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
604 ;CHECK-LABEL: stack_fold_pmaxuw
605 ;CHECK: vpmaxuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
606 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
607 %2 = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1)
608 ret <8 x i16> %2
609}
610declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
611
612define <16 x i8> @stack_fold_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
613 ;CHECK-LABEL: stack_fold_pminsb
614 ;CHECK: vpminsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
615 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
616 %2 = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1)
617 ret <16 x i8> %2
618}
619declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
620
621define <4 x i32> @stack_fold_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
622 ;CHECK-LABEL: stack_fold_pminsd
623 ;CHECK: vpminsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
624 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
625 %2 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1)
626 ret <4 x i32> %2
627}
628declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
629
630define <8 x i16> @stack_fold_pminsw(<8 x i16> %a0, <8 x i16> %a1) {
631 ;CHECK-LABEL: stack_fold_pminsw
632 ;CHECK: vpminsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
633 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
634 %2 = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1)
635 ret <8 x i16> %2
636}
637declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
638
639define <16 x i8> @stack_fold_pminub(<16 x i8> %a0, <16 x i8> %a1) {
640 ;CHECK-LABEL: stack_fold_pminub
641 ;CHECK: vpminub {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
642 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
643 %2 = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1)
644 ret <16 x i8> %2
645}
646declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
647
648define <4 x i32> @stack_fold_pminud(<4 x i32> %a0, <4 x i32> %a1) {
649 ;CHECK-LABEL: stack_fold_pminud
650 ;CHECK: vpminud {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
651 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
652 %2 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1)
653 ret <4 x i32> %2
654}
655declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
656
657define <8 x i16> @stack_fold_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
658 ;CHECK-LABEL: stack_fold_pminuw
659 ;CHECK: vpminuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
660 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
661 %2 = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1)
662 ret <8 x i16> %2
663}
664declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
665
Simon Pilgrim5fa0fb22015-01-21 23:43:30 +0000666define <4 x i32> @stack_fold_pmovsxbd(<16 x i8> %a0) {
667 ;CHECK-LABEL: stack_fold_pmovsxbd
668 ;CHECK: vpmovsxbd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
669 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
670 %2 = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0)
671 ret <4 x i32> %2
672}
Simon Pilgrim0177fa32015-01-20 23:54:17 +0000673declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
674
Simon Pilgrim5fa0fb22015-01-21 23:43:30 +0000675define <2 x i64> @stack_fold_pmovsxbq(<16 x i8> %a0) {
676 ;CHECK-LABEL: stack_fold_pmovsxbq
677 ;CHECK: pmovsxbq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
678 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
679 %2 = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0)
680 ret <2 x i64> %2
681}
Simon Pilgrim0177fa32015-01-20 23:54:17 +0000682declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
683
Simon Pilgrim5fa0fb22015-01-21 23:43:30 +0000684define <8 x i16> @stack_fold_pmovsxbw(<16 x i8> %a0) {
685 ;CHECK-LABEL: stack_fold_pmovsxbw
686 ;CHECK: vpmovsxbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
687 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
688 %2 = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0)
689 ret <8 x i16> %2
690}
Simon Pilgrim0177fa32015-01-20 23:54:17 +0000691declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
692
Simon Pilgrim5fa0fb22015-01-21 23:43:30 +0000693define <2 x i64> @stack_fold_pmovsxdq(<4 x i32> %a0) {
694 ;CHECK-LABEL: stack_fold_pmovsxdq
695 ;CHECK: vpmovsxdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
696 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
697 %2 = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0)
698 ret <2 x i64> %2
699}
Simon Pilgrim0177fa32015-01-20 23:54:17 +0000700declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
701
Simon Pilgrim5fa0fb22015-01-21 23:43:30 +0000702define <4 x i32> @stack_fold_pmovsxwd(<8 x i16> %a0) {
703 ;CHECK-LABEL: stack_fold_pmovsxwd
704 ;CHECK: vpmovsxwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
705 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
706 %2 = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0)
707 ret <4 x i32> %2
708}
Simon Pilgrim0177fa32015-01-20 23:54:17 +0000709declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
710
Simon Pilgrim5fa0fb22015-01-21 23:43:30 +0000711define <2 x i64> @stack_fold_pmovsxwq(<8 x i16> %a0) {
712 ;CHECK-LABEL: stack_fold_pmovsxwq
713 ;CHECK: vpmovsxwq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
714 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
715 %2 = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0)
716 ret <2 x i64> %2
717}
Simon Pilgrim0177fa32015-01-20 23:54:17 +0000718declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
719
Simon Pilgrim5fa0fb22015-01-21 23:43:30 +0000720define <4 x i32> @stack_fold_pmovzxbd(<16 x i8> %a0) {
721 ;CHECK-LABEL: stack_fold_pmovzxbd
722 ;CHECK: vpmovzxbd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
723 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
724 %2 = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0)
725 ret <4 x i32> %2
726}
Simon Pilgrim0177fa32015-01-20 23:54:17 +0000727declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
728
Simon Pilgrim5fa0fb22015-01-21 23:43:30 +0000729define <2 x i64> @stack_fold_pmovzxbq(<16 x i8> %a0) {
730 ;CHECK-LABEL: stack_fold_pmovzxbq
731 ;CHECK: vpmovzxbq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
732 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
733 %2 = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0)
734 ret <2 x i64> %2
735}
Simon Pilgrim0177fa32015-01-20 23:54:17 +0000736declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
737
Simon Pilgrim5fa0fb22015-01-21 23:43:30 +0000738define <8 x i16> @stack_fold_pmovzxbw(<16 x i8> %a0) {
739 ;CHECK-LABEL: stack_fold_pmovzxbw
740 ;CHECK: vpmovzxbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
741 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
742 %2 = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0)
743 ret <8 x i16> %2
744}
Simon Pilgrim0177fa32015-01-20 23:54:17 +0000745declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
746
Simon Pilgrim5fa0fb22015-01-21 23:43:30 +0000747define <2 x i64> @stack_fold_pmovzxdq(<4 x i32> %a0) {
748 ;CHECK-LABEL: stack_fold_pmovzxdq
749 ;CHECK: vpmovzxdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
750 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
751 %2 = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0)
752 ret <2 x i64> %2
753}
Simon Pilgrim0177fa32015-01-20 23:54:17 +0000754declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
755
Simon Pilgrim5fa0fb22015-01-21 23:43:30 +0000756define <4 x i32> @stack_fold_pmovzxwd(<8 x i16> %a0) {
757 ;CHECK-LABEL: stack_fold_pmovzxwd
758 ;CHECK: vpmovzxwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
759 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
760 %2 = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0)
761 ret <4 x i32> %2
762}
Simon Pilgrim0177fa32015-01-20 23:54:17 +0000763declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
764
Simon Pilgrim5fa0fb22015-01-21 23:43:30 +0000765define <2 x i64> @stack_fold_pmovzxwq(<8 x i16> %a0) {
766 ;CHECK-LABEL: stack_fold_pmovzxwq
767 ;CHECK: vpmovzxwq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
768 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
769 %2 = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0)
770 ret <2 x i64> %2
771}
Simon Pilgrim0177fa32015-01-20 23:54:17 +0000772declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
773
774define <2 x i64> @stack_fold_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
775 ;CHECK-LABEL: stack_fold_pmuldq
776 ;CHECK: vpmuldq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
777 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
778 %2 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1)
779 ret <2 x i64> %2
780}
781declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
782
783define <8 x i16> @stack_fold_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1) {
784 ;CHECK-LABEL: stack_fold_pmulhrsw
785 ;CHECK: vpmulhrsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
786 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
787 %2 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1)
788 ret <8 x i16> %2
789}
790declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
791
792define <8 x i16> @stack_fold_pmulhuw(<8 x i16> %a0, <8 x i16> %a1) {
793 ;CHECK-LABEL: stack_fold_pmulhuw
794 ;CHECK: vpmulhuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
795 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
796 %2 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1)
797 ret <8 x i16> %2
798}
799declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
800
801define <8 x i16> @stack_fold_pmulhw(<8 x i16> %a0, <8 x i16> %a1) {
802 ;CHECK-LABEL: stack_fold_pmulhw
803 ;CHECK: vpmulhw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
804 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
805 %2 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1)
806 ret <8 x i16> %2
807}
808declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
809
810define <4 x i32> @stack_fold_pmulld(<4 x i32> %a0, <4 x i32> %a1) {
811 ;CHECK-LABEL: stack_fold_pmulld
812 ;CHECK: vpmulld {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
813 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
814 %2 = mul <4 x i32> %a0, %a1
815 ret <4 x i32> %2
816}
817
818define <8 x i16> @stack_fold_pmullw(<8 x i16> %a0, <8 x i16> %a1) {
819 ;CHECK-LABEL: stack_fold_pmullw
820 ;CHECK: vpmullw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
821 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
822 %2 = mul <8 x i16> %a0, %a1
823 ret <8 x i16> %2
824}
825
826define <2 x i64> @stack_fold_pmuludq(<4 x i32> %a0, <4 x i32> %a1) {
827 ;CHECK-LABEL: stack_fold_pmuludq
828 ;CHECK: vpmuludq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
829 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
830 %2 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1)
831 ret <2 x i64> %2
832}
833declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
834
835define <16 x i8> @stack_fold_por(<16 x i8> %a0, <16 x i8> %a1) {
836 ;CHECK-LABEL: stack_fold_por
837 ;CHECK: vpor {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
838 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
839 %2 = or <16 x i8> %a0, %a1
840 ; add forces execution domain
841 %3 = add <16 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
842 ret <16 x i8> %3
843}
844
845define <2 x i64> @stack_fold_psadbw(<16 x i8> %a0, <16 x i8> %a1) {
846 ;CHECK-LABEL: stack_fold_psadbw
847 ;CHECK: vpsadbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
848 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
849 %2 = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1)
850 ret <2 x i64> %2
851}
852declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
853
854define <16 x i8> @stack_fold_pshufb(<16 x i8> %a0, <16 x i8> %a1) {
855 ;CHECK-LABEL: stack_fold_pshufb
856 ;CHECK: vpshufb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
857 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
858 %2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1)
859 ret <16 x i8> %2
860}
861declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
862
863define <4 x i32> @stack_fold_pshufd(<4 x i32> %a0) {
864 ;CHECK-LABEL: stack_fold_pshufd
865 ;CHECK: vpshufd $27, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
866 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
867 %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
868 ret <4 x i32> %2
869}
870
871define <8 x i16> @stack_fold_pshufhw(<8 x i16> %a0) {
872 ;CHECK-LABEL: stack_fold_pshufhw
873 ;CHECK: vpshufhw $11, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
874 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
875 %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 4, i32 4>
876 ret <8 x i16> %2
877}
878
879define <8 x i16> @stack_fold_pshuflw(<8 x i16> %a0) {
880 ;CHECK-LABEL: stack_fold_pshuflw
881 ;CHECK: vpshuflw $27, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
882 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
883 %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
884 ret <8 x i16> %2
885}
886
887define <16 x i8> @stack_fold_psignb(<16 x i8> %a0, <16 x i8> %a1) {
888 ;CHECK-LABEL: stack_fold_psignb
889 ;CHECK: vpsignb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
890 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
891 %2 = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1)
892 ret <16 x i8> %2
893}
894declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
895
896define <4 x i32> @stack_fold_psignd(<4 x i32> %a0, <4 x i32> %a1) {
897 ;CHECK-LABEL: stack_fold_psignd
898 ;CHECK: vpsignd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
899 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
900 %2 = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1)
901 ret <4 x i32> %2
902}
903declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
904
905define <8 x i16> @stack_fold_psignw(<8 x i16> %a0, <8 x i16> %a1) {
906 ;CHECK-LABEL: stack_fold_psignw
907 ;CHECK: vpsignw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
908 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
909 %2 = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1)
910 ret <8 x i16> %2
911}
912declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
913
914define <4 x i32> @stack_fold_pslld(<4 x i32> %a0, <4 x i32> %a1) {
915 ;CHECK-LABEL: stack_fold_pslld
916 ;CHECK: vpslld {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
917 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
918 %2 = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1)
919 ret <4 x i32> %2
920}
921declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
922
923define <2 x i64> @stack_fold_psllq(<2 x i64> %a0, <2 x i64> %a1) {
924 ;CHECK-LABEL: stack_fold_psllq
925 ;CHECK: vpsllq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
926 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
927 %2 = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1)
928 ret <2 x i64> %2
929}
930declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
931
932define <8 x i16> @stack_fold_psllw(<8 x i16> %a0, <8 x i16> %a1) {
933 ;CHECK-LABEL: stack_fold_psllw
934 ;CHECK: vpsllw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
935 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
936 %2 = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1)
937 ret <8 x i16> %2
938}
939declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
940
941define <4 x i32> @stack_fold_psrad(<4 x i32> %a0, <4 x i32> %a1) {
942 ;CHECK-LABEL: stack_fold_psrad
943 ;CHECK: vpsrad {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
944 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
945 %2 = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1)
946 ret <4 x i32> %2
947}
948declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
949
950define <8 x i16> @stack_fold_psraw(<8 x i16> %a0, <8 x i16> %a1) {
951 ;CHECK-LABEL: stack_fold_psraw
952 ;CHECK: vpsraw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
953 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
954 %2 = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1)
955 ret <8 x i16> %2
956}
957declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
958
959define <4 x i32> @stack_fold_psrld(<4 x i32> %a0, <4 x i32> %a1) {
960 ;CHECK-LABEL: stack_fold_psrld
961 ;CHECK: vpsrld {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
962 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
963 %2 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1)
964 ret <4 x i32> %2
965}
966declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
967
968define <2 x i64> @stack_fold_psrlq(<2 x i64> %a0, <2 x i64> %a1) {
969 ;CHECK-LABEL: stack_fold_psrlq
970 ;CHECK: vpsrlq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
971 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
972 %2 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1)
973 ret <2 x i64> %2
974}
975declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
976
977define <8 x i16> @stack_fold_psrlw(<8 x i16> %a0, <8 x i16> %a1) {
978 ;CHECK-LABEL: stack_fold_psrlw
979 ;CHECK: vpsrlw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
980 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
981 %2 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1)
982 ret <8 x i16> %2
983}
984declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
985
986define <16 x i8> @stack_fold_psubb(<16 x i8> %a0, <16 x i8> %a1) {
987 ;CHECK-LABEL: stack_fold_psubb
988 ;CHECK: vpsubb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
989 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
990 %2 = sub <16 x i8> %a0, %a1
991 ret <16 x i8> %2
992}
993
994define <4 x i32> @stack_fold_psubd(<4 x i32> %a0, <4 x i32> %a1) {
995 ;CHECK-LABEL: stack_fold_psubd
996 ;CHECK: vpsubd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
997 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
998 %2 = sub <4 x i32> %a0, %a1
999 ret <4 x i32> %2
1000}
1001
Simon Pilgrim5fa0fb22015-01-21 23:43:30 +00001002define <2 x i64> @stack_fold_psubq(<2 x i64> %a0, <2 x i64> %a1) {
1003 ;CHECK-LABEL: stack_fold_psubq
1004 ;CHECK: vpsubq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1005 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1006 %2 = sub <2 x i64> %a0, %a1
1007 ret <2 x i64> %2
1008}
Simon Pilgrim0177fa32015-01-20 23:54:17 +00001009
1010define <16 x i8> @stack_fold_psubsb(<16 x i8> %a0, <16 x i8> %a1) {
1011 ;CHECK-LABEL: stack_fold_psubsb
1012 ;CHECK: vpsubsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1013 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1014 %2 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1)
1015 ret <16 x i8> %2
1016}
1017declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
1018
1019define <8 x i16> @stack_fold_psubsw(<8 x i16> %a0, <8 x i16> %a1) {
1020 ;CHECK-LABEL: stack_fold_psubsw
1021 ;CHECK: vpsubsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1022 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1023 %2 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1)
1024 ret <8 x i16> %2
1025}
1026declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
1027
Simon Pilgrim5fa0fb22015-01-21 23:43:30 +00001028define <16 x i8> @stack_fold_psubusb(<16 x i8> %a0, <16 x i8> %a1) {
1029 ;CHECK-LABEL: stack_fold_psubusb
1030 ;CHECK: vpsubusb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1031 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1032 %2 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1)
1033 ret <16 x i8> %2
1034}
Simon Pilgrim0177fa32015-01-20 23:54:17 +00001035declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
1036
Simon Pilgrim5fa0fb22015-01-21 23:43:30 +00001037define <8 x i16> @stack_fold_psubusw(<8 x i16> %a0, <8 x i16> %a1) {
1038 ;CHECK-LABEL: stack_fold_psubusw
1039 ;CHECK: vpsubusw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1040 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1041 %2 = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1)
1042 ret <8 x i16> %2
1043}
Simon Pilgrim0177fa32015-01-20 23:54:17 +00001044declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
1045
1046define <8 x i16> @stack_fold_psubw(<8 x i16> %a0, <8 x i16> %a1) {
1047 ;CHECK-LABEL: stack_fold_psubw
1048 ;CHECK: vpsubw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1049 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1050 %2 = sub <8 x i16> %a0, %a1
1051 ret <8 x i16> %2
1052}
1053
Simon Pilgrim5fa0fb22015-01-21 23:43:30 +00001054define i32 @stack_fold_ptest(<2 x i64> %a0, <2 x i64> %a1) {
1055 ;CHECK-LABEL: stack_fold_ptest
1056 ;CHECK: vptest {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1057 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1058 %2 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1)
1059 ret i32 %2
1060}
Simon Pilgrim0177fa32015-01-20 23:54:17 +00001061declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
1062
Simon Pilgrima2618672015-02-07 21:44:06 +00001063define i32 @stack_fold_ptest_ymm(<4 x i64> %a0, <4 x i64> %a1) {
1064 ;CHECK-LABEL: stack_fold_ptest_ymm
1065 ;CHECK: vptest {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
1066 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1067 %2 = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1)
1068 ret i32 %2
1069}
1070declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone
1071
Simon Pilgrim0177fa32015-01-20 23:54:17 +00001072define <16 x i8> @stack_fold_punpckhbw(<16 x i8> %a0, <16 x i8> %a1) {
1073 ;CHECK-LABEL: stack_fold_punpckhbw
1074 ;CHECK: vpunpckhbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1075 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1076 %2 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1077 ret <16 x i8> %2
1078}
1079
1080define <4 x i32> @stack_fold_punpckhdq(<4 x i32> %a0, <4 x i32> %a1) {
1081 ;CHECK-LABEL: stack_fold_punpckhdq
1082 ;CHECK: vpunpckhdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1083 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1084 %2 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
Simon Pilgrimb4a0df92015-02-12 22:47:45 +00001085 ; add forces execution domain
1086 %3 = add <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
1087 ret <4 x i32> %3
Simon Pilgrim0177fa32015-01-20 23:54:17 +00001088}
1089
1090define <2 x i64> @stack_fold_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1) {
1091 ;CHECK-LABEL: stack_fold_punpckhqdq
1092 ;CHECK: vpunpckhqdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1093 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1094 %2 = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3>
Simon Pilgrimb4a0df92015-02-12 22:47:45 +00001095 ; add forces execution domain
1096 %3 = add <2 x i64> %2, <i64 1, i64 1>
1097 ret <2 x i64> %3
Simon Pilgrim0177fa32015-01-20 23:54:17 +00001098}
1099
1100define <8 x i16> @stack_fold_punpckhwd(<8 x i16> %a0, <8 x i16> %a1) {
1101 ;CHECK-LABEL: stack_fold_punpckhwd
1102 ;CHECK: vpunpckhwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1103 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1104 %2 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1105 ret <8 x i16> %2
1106}
1107
1108define <16 x i8> @stack_fold_punpcklbw(<16 x i8> %a0, <16 x i8> %a1) {
1109 ;CHECK-LABEL: stack_fold_punpcklbw
1110 ;CHECK: vpunpcklbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1111 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1112 %2 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1113 ret <16 x i8> %2
1114}
1115
1116define <4 x i32> @stack_fold_punpckldq(<4 x i32> %a0, <4 x i32> %a1) {
1117 ;CHECK-LABEL: stack_fold_punpckldq
1118 ;CHECK: vpunpckldq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1119 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1120 %2 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
Simon Pilgrimb4a0df92015-02-12 22:47:45 +00001121 ; add forces execution domain
1122 %3 = add <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
1123 ret <4 x i32> %3
Simon Pilgrim0177fa32015-01-20 23:54:17 +00001124}
1125
1126define <2 x i64> @stack_fold_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1) {
1127 ;CHECK-LABEL: stack_fold_punpcklqdq
1128 ;CHECK: vpunpcklqdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1129 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1130 %2 = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2>
Simon Pilgrimb4a0df92015-02-12 22:47:45 +00001131 ; add forces execution domain
1132 %3 = add <2 x i64> %2, <i64 1, i64 1>
1133 ret <2 x i64> %3
Simon Pilgrim0177fa32015-01-20 23:54:17 +00001134}
1135
1136define <8 x i16> @stack_fold_punpcklwd(<8 x i16> %a0, <8 x i16> %a1) {
1137 ;CHECK-LABEL: stack_fold_punpcklwd
1138 ;CHECK: vpunpcklwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1139 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1140 %2 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1141 ret <8 x i16> %2
1142}
1143
1144define <16 x i8> @stack_fold_pxor(<16 x i8> %a0, <16 x i8> %a1) {
1145 ;CHECK-LABEL: stack_fold_pxor
1146 ;CHECK: vpxor {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
1147 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
1148 %2 = xor <16 x i8> %a0, %a1
1149 ; add forces execution domain
1150 %3 = add <16 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1151 ret <16 x i8> %3
1152}