Simon Pilgrim | b9e1f9c | 2016-07-30 16:01:30 +0000 | [diff] [blame] | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| 2 | ; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X32 |
| 3 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse2,-sse | FileCheck %s --check-prefix=X64 |
| 4 | |
| 5 | define void @fadd_2f64_mem(<2 x double>* %p0, <2 x double>* %p1, <2 x double>* %p2) nounwind { |
| 6 | ; X32-LABEL: fadd_2f64_mem: |
| 7 | ; X32: # BB#0: |
| 8 | ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| 9 | ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| 10 | ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx |
| 11 | ; X32-NEXT: fldl 8(%edx) |
| 12 | ; X32-NEXT: fldl (%edx) |
| 13 | ; X32-NEXT: faddl (%ecx) |
| 14 | ; X32-NEXT: fxch %st(1) |
| 15 | ; X32-NEXT: faddl 8(%ecx) |
| 16 | ; X32-NEXT: fstpl 8(%eax) |
| 17 | ; X32-NEXT: fstpl (%eax) |
| 18 | ; X32-NEXT: retl |
| 19 | ; |
| 20 | ; X64-LABEL: fadd_2f64_mem: |
| 21 | ; X64: # BB#0: |
| 22 | ; X64-NEXT: fldl 8(%rdi) |
| 23 | ; X64-NEXT: fldl (%rdi) |
| 24 | ; X64-NEXT: faddl (%rsi) |
| 25 | ; X64-NEXT: fxch %st(1) |
| 26 | ; X64-NEXT: faddl 8(%rsi) |
| 27 | ; X64-NEXT: fstpl 8(%rdx) |
| 28 | ; X64-NEXT: fstpl (%rdx) |
| 29 | ; X64-NEXT: retq |
| 30 | %1 = load <2 x double>, <2 x double>* %p0 |
| 31 | %2 = load <2 x double>, <2 x double>* %p1 |
| 32 | %3 = fadd <2 x double> %1, %2 |
| 33 | store <2 x double> %3, <2 x double>* %p2 |
| 34 | ret void |
| 35 | } |
| 36 | |
| 37 | define void @fadd_4f32_mem(<4 x float>* %p0, <4 x float>* %p1, <4 x float>* %p2) nounwind { |
| 38 | ; X32-LABEL: fadd_4f32_mem: |
| 39 | ; X32: # BB#0: |
| 40 | ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| 41 | ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| 42 | ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx |
| 43 | ; X32-NEXT: flds 12(%edx) |
| 44 | ; X32-NEXT: flds 8(%edx) |
| 45 | ; X32-NEXT: flds 4(%edx) |
| 46 | ; X32-NEXT: flds (%edx) |
| 47 | ; X32-NEXT: fadds (%ecx) |
| 48 | ; X32-NEXT: fxch %st(1) |
| 49 | ; X32-NEXT: fadds 4(%ecx) |
| 50 | ; X32-NEXT: fxch %st(2) |
| 51 | ; X32-NEXT: fadds 8(%ecx) |
| 52 | ; X32-NEXT: fxch %st(3) |
| 53 | ; X32-NEXT: fadds 12(%ecx) |
| 54 | ; X32-NEXT: fstps 12(%eax) |
| 55 | ; X32-NEXT: fxch %st(2) |
| 56 | ; X32-NEXT: fstps 8(%eax) |
| 57 | ; X32-NEXT: fstps 4(%eax) |
| 58 | ; X32-NEXT: fstps (%eax) |
| 59 | ; X32-NEXT: retl |
| 60 | ; |
| 61 | ; X64-LABEL: fadd_4f32_mem: |
| 62 | ; X64: # BB#0: |
| 63 | ; X64-NEXT: flds 12(%rdi) |
| 64 | ; X64-NEXT: flds 8(%rdi) |
| 65 | ; X64-NEXT: flds 4(%rdi) |
| 66 | ; X64-NEXT: flds (%rdi) |
| 67 | ; X64-NEXT: fadds (%rsi) |
| 68 | ; X64-NEXT: fxch %st(1) |
| 69 | ; X64-NEXT: fadds 4(%rsi) |
| 70 | ; X64-NEXT: fxch %st(2) |
| 71 | ; X64-NEXT: fadds 8(%rsi) |
| 72 | ; X64-NEXT: fxch %st(3) |
| 73 | ; X64-NEXT: fadds 12(%rsi) |
| 74 | ; X64-NEXT: fstps 12(%rdx) |
| 75 | ; X64-NEXT: fxch %st(2) |
| 76 | ; X64-NEXT: fstps 8(%rdx) |
| 77 | ; X64-NEXT: fstps 4(%rdx) |
| 78 | ; X64-NEXT: fstps (%rdx) |
| 79 | ; X64-NEXT: retq |
| 80 | %1 = load <4 x float>, <4 x float>* %p0 |
| 81 | %2 = load <4 x float>, <4 x float>* %p1 |
| 82 | %3 = fadd <4 x float> %1, %2 |
| 83 | store <4 x float> %3, <4 x float>* %p2 |
| 84 | ret void |
| 85 | } |
| 86 | |
| 87 | define void @fdiv_4f32_mem(<4 x float>* %p0, <4 x float>* %p1, <4 x float>* %p2) nounwind { |
| 88 | ; X32-LABEL: fdiv_4f32_mem: |
| 89 | ; X32: # BB#0: |
| 90 | ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| 91 | ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| 92 | ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx |
| 93 | ; X32-NEXT: flds 12(%edx) |
| 94 | ; X32-NEXT: flds 8(%edx) |
| 95 | ; X32-NEXT: flds 4(%edx) |
| 96 | ; X32-NEXT: flds (%edx) |
| 97 | ; X32-NEXT: fdivs (%ecx) |
| 98 | ; X32-NEXT: fxch %st(1) |
| 99 | ; X32-NEXT: fdivs 4(%ecx) |
| 100 | ; X32-NEXT: fxch %st(2) |
| 101 | ; X32-NEXT: fdivs 8(%ecx) |
| 102 | ; X32-NEXT: fxch %st(3) |
| 103 | ; X32-NEXT: fdivs 12(%ecx) |
| 104 | ; X32-NEXT: fstps 12(%eax) |
| 105 | ; X32-NEXT: fxch %st(2) |
| 106 | ; X32-NEXT: fstps 8(%eax) |
| 107 | ; X32-NEXT: fstps 4(%eax) |
| 108 | ; X32-NEXT: fstps (%eax) |
| 109 | ; X32-NEXT: retl |
| 110 | ; |
| 111 | ; X64-LABEL: fdiv_4f32_mem: |
| 112 | ; X64: # BB#0: |
| 113 | ; X64-NEXT: flds 12(%rdi) |
| 114 | ; X64-NEXT: flds 8(%rdi) |
| 115 | ; X64-NEXT: flds 4(%rdi) |
| 116 | ; X64-NEXT: flds (%rdi) |
| 117 | ; X64-NEXT: fdivs (%rsi) |
| 118 | ; X64-NEXT: fxch %st(1) |
| 119 | ; X64-NEXT: fdivs 4(%rsi) |
| 120 | ; X64-NEXT: fxch %st(2) |
| 121 | ; X64-NEXT: fdivs 8(%rsi) |
| 122 | ; X64-NEXT: fxch %st(3) |
| 123 | ; X64-NEXT: fdivs 12(%rsi) |
| 124 | ; X64-NEXT: fstps 12(%rdx) |
| 125 | ; X64-NEXT: fxch %st(2) |
| 126 | ; X64-NEXT: fstps 8(%rdx) |
| 127 | ; X64-NEXT: fstps 4(%rdx) |
| 128 | ; X64-NEXT: fstps (%rdx) |
| 129 | ; X64-NEXT: retq |
| 130 | %1 = load <4 x float>, <4 x float>* %p0 |
| 131 | %2 = load <4 x float>, <4 x float>* %p1 |
| 132 | %3 = fdiv <4 x float> %1, %2 |
| 133 | store <4 x float> %3, <4 x float>* %p2 |
| 134 | ret void |
| 135 | } |
| 136 | |
| 137 | define void @sitofp_4i64_4f32_mem(<4 x i64>* %p0, <4 x float>* %p1) nounwind { |
| 138 | ; X32-LABEL: sitofp_4i64_4f32_mem: |
| 139 | ; X32: # BB#0: |
| 140 | ; X32-NEXT: pushl %ebp |
| 141 | ; X32-NEXT: movl %esp, %ebp |
| 142 | ; X32-NEXT: pushl %ebx |
| 143 | ; X32-NEXT: pushl %edi |
| 144 | ; X32-NEXT: pushl %esi |
| 145 | ; X32-NEXT: andl $-8, %esp |
| 146 | ; X32-NEXT: subl $48, %esp |
| 147 | ; X32-NEXT: movl 8(%ebp), %eax |
| 148 | ; X32-NEXT: movl 24(%eax), %ecx |
| 149 | ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill |
| 150 | ; X32-NEXT: movl 28(%eax), %ecx |
| 151 | ; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill |
| 152 | ; X32-NEXT: movl 16(%eax), %esi |
| 153 | ; X32-NEXT: movl 20(%eax), %edi |
| 154 | ; X32-NEXT: movl 8(%eax), %ebx |
| 155 | ; X32-NEXT: movl 12(%eax), %edx |
| 156 | ; X32-NEXT: movl (%eax), %ecx |
| 157 | ; X32-NEXT: movl 4(%eax), %eax |
| 158 | ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| 159 | ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) |
| 160 | ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) |
| 161 | ; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) |
| 162 | ; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| 163 | ; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) |
| 164 | ; X32-NEXT: movl (%esp), %eax # 4-byte Reload |
| 165 | ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| 166 | ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload |
| 167 | ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| 168 | ; X32-NEXT: movl 12(%ebp), %eax |
| 169 | ; X32-NEXT: fildll {{[0-9]+}}(%esp) |
| 170 | ; X32-NEXT: fildll {{[0-9]+}}(%esp) |
| 171 | ; X32-NEXT: fildll {{[0-9]+}}(%esp) |
| 172 | ; X32-NEXT: fildll {{[0-9]+}}(%esp) |
| 173 | ; X32-NEXT: fstps 12(%eax) |
| 174 | ; X32-NEXT: fstps 8(%eax) |
| 175 | ; X32-NEXT: fstps 4(%eax) |
| 176 | ; X32-NEXT: fstps (%eax) |
| 177 | ; X32-NEXT: leal -12(%ebp), %esp |
| 178 | ; X32-NEXT: popl %esi |
| 179 | ; X32-NEXT: popl %edi |
| 180 | ; X32-NEXT: popl %ebx |
| 181 | ; X32-NEXT: popl %ebp |
| 182 | ; X32-NEXT: retl |
| 183 | ; |
| 184 | ; X64-LABEL: sitofp_4i64_4f32_mem: |
| 185 | ; X64: # BB#0: |
| 186 | ; X64-NEXT: movq 24(%rdi), %rax |
| 187 | ; X64-NEXT: movq 16(%rdi), %rcx |
| 188 | ; X64-NEXT: movq (%rdi), %rdx |
| 189 | ; X64-NEXT: movq 8(%rdi), %rdi |
| 190 | ; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| 191 | ; X64-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) |
| 192 | ; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| 193 | ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) |
| 194 | ; X64-NEXT: fildll -{{[0-9]+}}(%rsp) |
| 195 | ; X64-NEXT: fildll -{{[0-9]+}}(%rsp) |
| 196 | ; X64-NEXT: fildll -{{[0-9]+}}(%rsp) |
| 197 | ; X64-NEXT: fildll -{{[0-9]+}}(%rsp) |
| 198 | ; X64-NEXT: fstps 12(%rsi) |
| 199 | ; X64-NEXT: fstps 8(%rsi) |
| 200 | ; X64-NEXT: fstps 4(%rsi) |
| 201 | ; X64-NEXT: fstps (%rsi) |
| 202 | ; X64-NEXT: retq |
| 203 | %1 = load <4 x i64>, <4 x i64>* %p0 |
| 204 | %2 = sitofp <4 x i64> %1 to <4 x float> |
| 205 | store <4 x float> %2, <4 x float>* %p1 |
| 206 | ret void |
| 207 | } |
| 208 | |
| 209 | define void @sitofp_4i32_4f32_mem(<4 x i32>* %p0, <4 x float>* %p1) nounwind { |
| 210 | ; X32-LABEL: sitofp_4i32_4f32_mem: |
| 211 | ; X32: # BB#0: |
| 212 | ; X32-NEXT: pushl %edi |
| 213 | ; X32-NEXT: pushl %esi |
| 214 | ; X32-NEXT: subl $16, %esp |
| 215 | ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| 216 | ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| 217 | ; X32-NEXT: movl 12(%ecx), %edx |
| 218 | ; X32-NEXT: movl 8(%ecx), %esi |
| 219 | ; X32-NEXT: movl (%ecx), %edi |
| 220 | ; X32-NEXT: movl 4(%ecx), %ecx |
| 221 | ; X32-NEXT: movl %edi, (%esp) |
| 222 | ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) |
| 223 | ; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) |
| 224 | ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) |
| 225 | ; X32-NEXT: fildl (%esp) |
| 226 | ; X32-NEXT: fildl {{[0-9]+}}(%esp) |
| 227 | ; X32-NEXT: fildl {{[0-9]+}}(%esp) |
| 228 | ; X32-NEXT: fildl {{[0-9]+}}(%esp) |
| 229 | ; X32-NEXT: fstps 12(%eax) |
| 230 | ; X32-NEXT: fstps 8(%eax) |
| 231 | ; X32-NEXT: fstps 4(%eax) |
| 232 | ; X32-NEXT: fstps (%eax) |
| 233 | ; X32-NEXT: addl $16, %esp |
| 234 | ; X32-NEXT: popl %esi |
| 235 | ; X32-NEXT: popl %edi |
| 236 | ; X32-NEXT: retl |
| 237 | ; |
| 238 | ; X64-LABEL: sitofp_4i32_4f32_mem: |
| 239 | ; X64: # BB#0: |
| 240 | ; X64-NEXT: movl 12(%rdi), %eax |
| 241 | ; X64-NEXT: movl 8(%rdi), %ecx |
| 242 | ; X64-NEXT: movl (%rdi), %edx |
| 243 | ; X64-NEXT: movl 4(%rdi), %edi |
| 244 | ; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp) |
| 245 | ; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp) |
| 246 | ; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) |
| 247 | ; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) |
| 248 | ; X64-NEXT: fildl -{{[0-9]+}}(%rsp) |
| 249 | ; X64-NEXT: fildl -{{[0-9]+}}(%rsp) |
| 250 | ; X64-NEXT: fildl -{{[0-9]+}}(%rsp) |
| 251 | ; X64-NEXT: fildl -{{[0-9]+}}(%rsp) |
| 252 | ; X64-NEXT: fstps 12(%rsi) |
| 253 | ; X64-NEXT: fstps 8(%rsi) |
| 254 | ; X64-NEXT: fstps 4(%rsi) |
| 255 | ; X64-NEXT: fstps (%rsi) |
| 256 | ; X64-NEXT: retq |
| 257 | %1 = load <4 x i32>, <4 x i32>* %p0 |
| 258 | %2 = sitofp <4 x i32> %1 to <4 x float> |
| 259 | store <4 x float> %2, <4 x float>* %p1 |
| 260 | ret void |
| 261 | } |
| 262 | |
| 263 | define void @add_2i64_mem(<2 x i64>* %p0, <2 x i64>* %p1, <2 x i64>* %p2) nounwind { |
| 264 | ; X32-LABEL: add_2i64_mem: |
| 265 | ; X32: # BB#0: |
| 266 | ; X32-NEXT: pushl %ebx |
| 267 | ; X32-NEXT: pushl %edi |
| 268 | ; X32-NEXT: pushl %esi |
| 269 | ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| 270 | ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| 271 | ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx |
| 272 | ; X32-NEXT: movl 12(%edx), %esi |
| 273 | ; X32-NEXT: movl 8(%edx), %edi |
| 274 | ; X32-NEXT: movl (%edx), %ebx |
| 275 | ; X32-NEXT: movl 4(%edx), %edx |
| 276 | ; X32-NEXT: addl (%ecx), %ebx |
| 277 | ; X32-NEXT: adcl 4(%ecx), %edx |
| 278 | ; X32-NEXT: addl 8(%ecx), %edi |
| 279 | ; X32-NEXT: adcl 12(%ecx), %esi |
| 280 | ; X32-NEXT: movl %esi, 12(%eax) |
| 281 | ; X32-NEXT: movl %edi, 8(%eax) |
| 282 | ; X32-NEXT: movl %edx, 4(%eax) |
| 283 | ; X32-NEXT: movl %ebx, (%eax) |
| 284 | ; X32-NEXT: popl %esi |
| 285 | ; X32-NEXT: popl %edi |
| 286 | ; X32-NEXT: popl %ebx |
| 287 | ; X32-NEXT: retl |
| 288 | ; |
| 289 | ; X64-LABEL: add_2i64_mem: |
| 290 | ; X64: # BB#0: |
| 291 | ; X64-NEXT: movq (%rdi), %rax |
| 292 | ; X64-NEXT: movq 8(%rdi), %rcx |
| 293 | ; X64-NEXT: addq (%rsi), %rax |
| 294 | ; X64-NEXT: addq 8(%rsi), %rcx |
| 295 | ; X64-NEXT: movq %rcx, 8(%rdx) |
| 296 | ; X64-NEXT: movq %rax, (%rdx) |
| 297 | ; X64-NEXT: retq |
| 298 | %1 = load <2 x i64>, <2 x i64>* %p0 |
| 299 | %2 = load <2 x i64>, <2 x i64>* %p1 |
| 300 | %3 = add <2 x i64> %1, %2 |
| 301 | store <2 x i64> %3, <2 x i64>* %p2 |
| 302 | ret void |
| 303 | } |
| 304 | |
| 305 | define void @add_4i32_mem(<4 x i32>* %p0, <4 x i32>* %p1, <4 x i32>* %p2) nounwind { |
| 306 | ; X32-LABEL: add_4i32_mem: |
| 307 | ; X32: # BB#0: |
| 308 | ; X32-NEXT: pushl %ebx |
| 309 | ; X32-NEXT: pushl %edi |
| 310 | ; X32-NEXT: pushl %esi |
| 311 | ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| 312 | ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| 313 | ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx |
| 314 | ; X32-NEXT: movl 12(%edx), %esi |
| 315 | ; X32-NEXT: movl 8(%edx), %edi |
| 316 | ; X32-NEXT: movl (%edx), %ebx |
| 317 | ; X32-NEXT: movl 4(%edx), %edx |
| 318 | ; X32-NEXT: addl (%ecx), %ebx |
| 319 | ; X32-NEXT: addl 4(%ecx), %edx |
| 320 | ; X32-NEXT: addl 8(%ecx), %edi |
| 321 | ; X32-NEXT: addl 12(%ecx), %esi |
| 322 | ; X32-NEXT: movl %esi, 12(%eax) |
| 323 | ; X32-NEXT: movl %edi, 8(%eax) |
| 324 | ; X32-NEXT: movl %edx, 4(%eax) |
| 325 | ; X32-NEXT: movl %ebx, (%eax) |
| 326 | ; X32-NEXT: popl %esi |
| 327 | ; X32-NEXT: popl %edi |
| 328 | ; X32-NEXT: popl %ebx |
| 329 | ; X32-NEXT: retl |
| 330 | ; |
| 331 | ; X64-LABEL: add_4i32_mem: |
| 332 | ; X64: # BB#0: |
| 333 | ; X64-NEXT: movl 12(%rdi), %eax |
| 334 | ; X64-NEXT: movl 8(%rdi), %ecx |
| 335 | ; X64-NEXT: movl (%rdi), %r8d |
| 336 | ; X64-NEXT: movl 4(%rdi), %edi |
| 337 | ; X64-NEXT: addl (%rsi), %r8d |
| 338 | ; X64-NEXT: addl 4(%rsi), %edi |
| 339 | ; X64-NEXT: addl 8(%rsi), %ecx |
| 340 | ; X64-NEXT: addl 12(%rsi), %eax |
| 341 | ; X64-NEXT: movl %eax, 12(%rdx) |
| 342 | ; X64-NEXT: movl %ecx, 8(%rdx) |
| 343 | ; X64-NEXT: movl %edi, 4(%rdx) |
| 344 | ; X64-NEXT: movl %r8d, (%rdx) |
| 345 | ; X64-NEXT: retq |
| 346 | %1 = load <4 x i32>, <4 x i32>* %p0 |
| 347 | %2 = load <4 x i32>, <4 x i32>* %p1 |
| 348 | %3 = add <4 x i32> %1, %2 |
| 349 | store <4 x i32> %3, <4 x i32>* %p2 |
| 350 | ret void |
| 351 | } |