| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | 
|  | 2 | ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X86 | 
|  | 3 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X64 | 
|  | 4 |  | 
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 5 | ; If we are transferring XMM conversion results to MMX registers we could use the MMX equivalents | 
| Simon Pilgrim | ed655f0 | 2017-03-10 17:23:55 +0000 | [diff] [blame] | 6 | ; (CVTPD2PI/CVTTPD2PI + CVTPS2PI/CVTTPS2PI) without affecting rounding/exceptions etc. | 
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 7 |  | 
|  | 8 | define void @cvt_v2f64_v2i32(<2 x double>, <1 x i64>*) nounwind { | 
|  | 9 | ; X86-LABEL: cvt_v2f64_v2i32: | 
|  | 10 | ; X86:       # BB#0: | 
|  | 11 | ; X86-NEXT:    pushl %ebp | 
|  | 12 | ; X86-NEXT:    movl %esp, %ebp | 
|  | 13 | ; X86-NEXT:    andl $-8, %esp | 
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 14 | ; X86-NEXT:    subl $8, %esp | 
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 15 | ; X86-NEXT:    movl 8(%ebp), %eax | 
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 16 | ; X86-NEXT:    cvtpd2pi %xmm0, %mm0 | 
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 17 | ; X86-NEXT:    paddd %mm0, %mm0 | 
|  | 18 | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | 19 | ; X86-NEXT:    movl (%esp), %ecx | 
|  | 20 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | 21 | ; X86-NEXT:    movl %edx, 4(%eax) | 
|  | 22 | ; X86-NEXT:    movl %ecx, (%eax) | 
|  | 23 | ; X86-NEXT:    movl %ebp, %esp | 
|  | 24 | ; X86-NEXT:    popl %ebp | 
|  | 25 | ; X86-NEXT:    retl | 
|  | 26 | ; | 
|  | 27 | ; X64-LABEL: cvt_v2f64_v2i32: | 
|  | 28 | ; X64:       # BB#0: | 
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 29 | ; X64-NEXT:    cvtpd2pi %xmm0, %mm0 | 
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 30 | ; X64-NEXT:    paddd %mm0, %mm0 | 
|  | 31 | ; X64-NEXT:    movq %mm0, (%rdi) | 
|  | 32 | ; X64-NEXT:    retq | 
|  | 33 | %3 = tail call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %0) | 
|  | 34 | %4 = bitcast <4 x i32> %3 to <2 x i64> | 
|  | 35 | %5 = extractelement <2 x i64> %4, i32 0 | 
|  | 36 | %6 = bitcast i64 %5 to x86_mmx | 
|  | 37 | %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %6, x86_mmx %6) | 
|  | 38 | %8 = bitcast x86_mmx %7 to i64 | 
|  | 39 | %9 = insertelement <1 x i64> undef, i64 %8, i32 0 | 
|  | 40 | store <1 x i64> %9, <1 x i64>* %1 | 
|  | 41 | ret void | 
|  | 42 | } | 
|  | 43 |  | 
|  | 44 | define void @cvtt_v2f64_v2i32(<2 x double>, <1 x i64>*) nounwind { | 
|  | 45 | ; X86-LABEL: cvtt_v2f64_v2i32: | 
|  | 46 | ; X86:       # BB#0: | 
|  | 47 | ; X86-NEXT:    pushl %ebp | 
|  | 48 | ; X86-NEXT:    movl %esp, %ebp | 
|  | 49 | ; X86-NEXT:    andl $-8, %esp | 
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 50 | ; X86-NEXT:    subl $8, %esp | 
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 51 | ; X86-NEXT:    movl 8(%ebp), %eax | 
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 52 | ; X86-NEXT:    cvttpd2pi %xmm0, %mm0 | 
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 53 | ; X86-NEXT:    paddd %mm0, %mm0 | 
|  | 54 | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | 55 | ; X86-NEXT:    movl (%esp), %ecx | 
|  | 56 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | 57 | ; X86-NEXT:    movl %edx, 4(%eax) | 
|  | 58 | ; X86-NEXT:    movl %ecx, (%eax) | 
|  | 59 | ; X86-NEXT:    movl %ebp, %esp | 
|  | 60 | ; X86-NEXT:    popl %ebp | 
|  | 61 | ; X86-NEXT:    retl | 
|  | 62 | ; | 
|  | 63 | ; X64-LABEL: cvtt_v2f64_v2i32: | 
|  | 64 | ; X64:       # BB#0: | 
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 65 | ; X64-NEXT:    cvttpd2pi %xmm0, %mm0 | 
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 66 | ; X64-NEXT:    paddd %mm0, %mm0 | 
|  | 67 | ; X64-NEXT:    movq %mm0, (%rdi) | 
|  | 68 | ; X64-NEXT:    retq | 
|  | 69 | %3 = tail call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %0) | 
|  | 70 | %4 = bitcast <4 x i32> %3 to <2 x i64> | 
|  | 71 | %5 = extractelement <2 x i64> %4, i32 0 | 
|  | 72 | %6 = bitcast i64 %5 to x86_mmx | 
|  | 73 | %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %6, x86_mmx %6) | 
|  | 74 | %8 = bitcast x86_mmx %7 to i64 | 
|  | 75 | %9 = insertelement <1 x i64> undef, i64 %8, i32 0 | 
|  | 76 | store <1 x i64> %9, <1 x i64>* %1 | 
|  | 77 | ret void | 
|  | 78 | } | 
|  | 79 |  | 
|  | 80 | define void @fptosi_v2f64_v2i32(<2 x double>, <1 x i64>*) nounwind { | 
|  | 81 | ; X86-LABEL: fptosi_v2f64_v2i32: | 
|  | 82 | ; X86:       # BB#0: | 
|  | 83 | ; X86-NEXT:    pushl %ebp | 
|  | 84 | ; X86-NEXT:    movl %esp, %ebp | 
|  | 85 | ; X86-NEXT:    andl $-8, %esp | 
|  | 86 | ; X86-NEXT:    subl $16, %esp | 
|  | 87 | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | 88 | ; X86-NEXT:    cvttpd2dq %xmm0, %xmm0 | 
|  | 89 | ; X86-NEXT:    movlpd %xmm0, {{[0-9]+}}(%esp) | 
|  | 90 | ; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0 | 
|  | 91 | ; X86-NEXT:    paddd %mm0, %mm0 | 
|  | 92 | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | 93 | ; X86-NEXT:    movl (%esp), %ecx | 
|  | 94 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | 95 | ; X86-NEXT:    movl %edx, 4(%eax) | 
|  | 96 | ; X86-NEXT:    movl %ecx, (%eax) | 
|  | 97 | ; X86-NEXT:    movl %ebp, %esp | 
|  | 98 | ; X86-NEXT:    popl %ebp | 
|  | 99 | ; X86-NEXT:    retl | 
|  | 100 | ; | 
|  | 101 | ; X64-LABEL: fptosi_v2f64_v2i32: | 
|  | 102 | ; X64:       # BB#0: | 
|  | 103 | ; X64-NEXT:    cvttpd2dq %xmm0, %xmm0 | 
|  | 104 | ; X64-NEXT:    movlpd %xmm0, -{{[0-9]+}}(%rsp) | 
|  | 105 | ; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %mm0 | 
|  | 106 | ; X64-NEXT:    paddd %mm0, %mm0 | 
|  | 107 | ; X64-NEXT:    movq %mm0, (%rdi) | 
|  | 108 | ; X64-NEXT:    retq | 
|  | 109 | %3 = fptosi <2 x double> %0 to <2 x i32> | 
|  | 110 | %4 = bitcast <2 x i32> %3 to x86_mmx | 
|  | 111 | %5 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %4, x86_mmx %4) | 
|  | 112 | %6 = bitcast x86_mmx %5 to i64 | 
|  | 113 | %7 = insertelement <1 x i64> undef, i64 %6, i32 0 | 
|  | 114 | store <1 x i64> %7, <1 x i64>* %1 | 
|  | 115 | ret void | 
|  | 116 | } | 
|  | 117 |  | 
|  | 118 | define void @cvt_v2f32_v2i32(<4 x float>, <1 x i64>*) nounwind { | 
|  | 119 | ; X86-LABEL: cvt_v2f32_v2i32: | 
|  | 120 | ; X86:       # BB#0: | 
|  | 121 | ; X86-NEXT:    pushl %ebp | 
|  | 122 | ; X86-NEXT:    movl %esp, %ebp | 
|  | 123 | ; X86-NEXT:    andl $-8, %esp | 
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 124 | ; X86-NEXT:    subl $8, %esp | 
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 125 | ; X86-NEXT:    movl 8(%ebp), %eax | 
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 126 | ; X86-NEXT:    cvtps2pi %xmm0, %mm0 | 
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 127 | ; X86-NEXT:    paddd %mm0, %mm0 | 
|  | 128 | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | 129 | ; X86-NEXT:    movl (%esp), %ecx | 
|  | 130 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | 131 | ; X86-NEXT:    movl %edx, 4(%eax) | 
|  | 132 | ; X86-NEXT:    movl %ecx, (%eax) | 
|  | 133 | ; X86-NEXT:    movl %ebp, %esp | 
|  | 134 | ; X86-NEXT:    popl %ebp | 
|  | 135 | ; X86-NEXT:    retl | 
|  | 136 | ; | 
|  | 137 | ; X64-LABEL: cvt_v2f32_v2i32: | 
|  | 138 | ; X64:       # BB#0: | 
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 139 | ; X64-NEXT:    cvtps2pi %xmm0, %mm0 | 
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 140 | ; X64-NEXT:    paddd %mm0, %mm0 | 
|  | 141 | ; X64-NEXT:    movq %mm0, (%rdi) | 
|  | 142 | ; X64-NEXT:    retq | 
|  | 143 | %3 = tail call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %0) | 
|  | 144 | %4 = bitcast <4 x i32> %3 to <2 x i64> | 
|  | 145 | %5 = extractelement <2 x i64> %4, i32 0 | 
|  | 146 | %6 = bitcast i64 %5 to x86_mmx | 
|  | 147 | %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %6, x86_mmx %6) | 
|  | 148 | %8 = bitcast x86_mmx %7 to i64 | 
|  | 149 | %9 = insertelement <1 x i64> undef, i64 %8, i32 0 | 
|  | 150 | store <1 x i64> %9, <1 x i64>* %1 | 
|  | 151 | ret void | 
|  | 152 | } | 
|  | 153 |  | 
|  | 154 | define void @cvtt_v2f32_v2i32(<4 x float>, <1 x i64>*) nounwind { | 
|  | 155 | ; X86-LABEL: cvtt_v2f32_v2i32: | 
|  | 156 | ; X86:       # BB#0: | 
|  | 157 | ; X86-NEXT:    pushl %ebp | 
|  | 158 | ; X86-NEXT:    movl %esp, %ebp | 
|  | 159 | ; X86-NEXT:    andl $-8, %esp | 
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 160 | ; X86-NEXT:    subl $8, %esp | 
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 161 | ; X86-NEXT:    movl 8(%ebp), %eax | 
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 162 | ; X86-NEXT:    cvttps2pi %xmm0, %mm0 | 
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 163 | ; X86-NEXT:    paddd %mm0, %mm0 | 
|  | 164 | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | 165 | ; X86-NEXT:    movl (%esp), %ecx | 
|  | 166 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | 167 | ; X86-NEXT:    movl %edx, 4(%eax) | 
|  | 168 | ; X86-NEXT:    movl %ecx, (%eax) | 
|  | 169 | ; X86-NEXT:    movl %ebp, %esp | 
|  | 170 | ; X86-NEXT:    popl %ebp | 
|  | 171 | ; X86-NEXT:    retl | 
|  | 172 | ; | 
|  | 173 | ; X64-LABEL: cvtt_v2f32_v2i32: | 
|  | 174 | ; X64:       # BB#0: | 
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 175 | ; X64-NEXT:    cvttps2pi %xmm0, %mm0 | 
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 176 | ; X64-NEXT:    paddd %mm0, %mm0 | 
|  | 177 | ; X64-NEXT:    movq %mm0, (%rdi) | 
|  | 178 | ; X64-NEXT:    retq | 
|  | 179 | %3 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %0) | 
|  | 180 | %4 = bitcast <4 x i32> %3 to <2 x i64> | 
|  | 181 | %5 = extractelement <2 x i64> %4, i32 0 | 
|  | 182 | %6 = bitcast i64 %5 to x86_mmx | 
|  | 183 | %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %6, x86_mmx %6) | 
|  | 184 | %8 = bitcast x86_mmx %7 to i64 | 
|  | 185 | %9 = insertelement <1 x i64> undef, i64 %8, i32 0 | 
|  | 186 | store <1 x i64> %9, <1 x i64>* %1 | 
|  | 187 | ret void | 
|  | 188 | } | 
|  | 189 |  | 
| Simon Pilgrim | dddce31 | 2017-04-02 13:10:20 +0000 | [diff] [blame] | 190 | define void @fptosi_v4f32_v4i32(<4 x float>, <1 x i64>*) nounwind { | 
|  | 191 | ; X86-LABEL: fptosi_v4f32_v4i32: | 
|  | 192 | ; X86:       # BB#0: | 
|  | 193 | ; X86-NEXT:    pushl %ebp | 
|  | 194 | ; X86-NEXT:    movl %esp, %ebp | 
|  | 195 | ; X86-NEXT:    andl $-8, %esp | 
| Simon Pilgrim | e56a2d7 | 2017-04-02 15:52:28 +0000 | [diff] [blame] | 196 | ; X86-NEXT:    subl $8, %esp | 
| Simon Pilgrim | dddce31 | 2017-04-02 13:10:20 +0000 | [diff] [blame] | 197 | ; X86-NEXT:    movl 8(%ebp), %eax | 
| Simon Pilgrim | ba28263 | 2017-04-02 16:20:34 +0000 | [diff] [blame] | 198 | ; X86-NEXT:    cvttps2pi %xmm0, %mm0 | 
| Simon Pilgrim | dddce31 | 2017-04-02 13:10:20 +0000 | [diff] [blame] | 199 | ; X86-NEXT:    paddd %mm0, %mm0 | 
|  | 200 | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | 201 | ; X86-NEXT:    movl (%esp), %ecx | 
|  | 202 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | 203 | ; X86-NEXT:    movl %edx, 4(%eax) | 
|  | 204 | ; X86-NEXT:    movl %ecx, (%eax) | 
|  | 205 | ; X86-NEXT:    movl %ebp, %esp | 
|  | 206 | ; X86-NEXT:    popl %ebp | 
|  | 207 | ; X86-NEXT:    retl | 
|  | 208 | ; | 
|  | 209 | ; X64-LABEL: fptosi_v4f32_v4i32: | 
|  | 210 | ; X64:       # BB#0: | 
| Simon Pilgrim | ba28263 | 2017-04-02 16:20:34 +0000 | [diff] [blame] | 211 | ; X64-NEXT:    cvttps2pi %xmm0, %mm0 | 
| Simon Pilgrim | dddce31 | 2017-04-02 13:10:20 +0000 | [diff] [blame] | 212 | ; X64-NEXT:    paddd %mm0, %mm0 | 
|  | 213 | ; X64-NEXT:    movq %mm0, (%rdi) | 
|  | 214 | ; X64-NEXT:    retq | 
|  | 215 | %3 = fptosi <4 x float> %0 to <4 x i32> | 
|  | 216 | %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <2 x i32> <i32 0, i32 1> | 
|  | 217 | %5 = bitcast <2 x i32> %4 to x86_mmx | 
|  | 218 | %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5) | 
|  | 219 | %7 = bitcast x86_mmx %6 to i64 | 
|  | 220 | %8 = insertelement <1 x i64> undef, i64 %7, i32 0 | 
|  | 221 | store <1 x i64> %8, <1 x i64>* %1 | 
|  | 222 | ret void | 
|  | 223 | } | 
|  | 224 |  | 
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 225 | define void @fptosi_v2f32_v2i32(<4 x float>, <1 x i64>*) nounwind { | 
|  | 226 | ; X86-LABEL: fptosi_v2f32_v2i32: | 
|  | 227 | ; X86:       # BB#0: | 
|  | 228 | ; X86-NEXT:    pushl %ebp | 
|  | 229 | ; X86-NEXT:    movl %esp, %ebp | 
|  | 230 | ; X86-NEXT:    andl $-8, %esp | 
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 231 | ; X86-NEXT:    subl $8, %esp | 
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 232 | ; X86-NEXT:    movl 8(%ebp), %eax | 
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 233 | ; X86-NEXT:    cvttps2pi %xmm0, %mm0 | 
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 234 | ; X86-NEXT:    paddd %mm0, %mm0 | 
|  | 235 | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | 236 | ; X86-NEXT:    movl (%esp), %ecx | 
|  | 237 | ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx | 
|  | 238 | ; X86-NEXT:    movl %edx, 4(%eax) | 
|  | 239 | ; X86-NEXT:    movl %ecx, (%eax) | 
|  | 240 | ; X86-NEXT:    movl %ebp, %esp | 
|  | 241 | ; X86-NEXT:    popl %ebp | 
|  | 242 | ; X86-NEXT:    retl | 
|  | 243 | ; | 
|  | 244 | ; X64-LABEL: fptosi_v2f32_v2i32: | 
|  | 245 | ; X64:       # BB#0: | 
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 246 | ; X64-NEXT:    cvttps2pi %xmm0, %mm0 | 
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 247 | ; X64-NEXT:    paddd %mm0, %mm0 | 
|  | 248 | ; X64-NEXT:    movq %mm0, (%rdi) | 
|  | 249 | ; X64-NEXT:    retq | 
|  | 250 | %3 = fptosi <4 x float> %0 to <4 x i32> | 
|  | 251 | %4 = bitcast <4 x i32> %3 to <2 x i64> | 
|  | 252 | %5 = extractelement <2 x i64> %4, i32 0 | 
|  | 253 | %6 = bitcast i64 %5 to x86_mmx | 
|  | 254 | %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %6, x86_mmx %6) | 
|  | 255 | %8 = bitcast x86_mmx %7 to i64 | 
|  | 256 | %9 = insertelement <1 x i64> undef, i64 %8, i32 0 | 
|  | 257 | store <1 x i64> %9, <1 x i64>* %1 | 
|  | 258 | ret void | 
|  | 259 | } | 
|  | 260 |  | 
| Simon Pilgrim | e1a72a9 | 2017-03-14 12:13:41 +0000 | [diff] [blame] | 261 | ; FIXME: If we are transferring MMX registers to XMM for conversion we could use the MMX equivalents | 
| Simon Pilgrim | ed655f0 | 2017-03-10 17:23:55 +0000 | [diff] [blame] | 262 | ; (CVTPI2PD + CVTPI2PS) without affecting rounding/exceptions etc. | 
|  | 263 |  | 
|  | 264 | define <2 x double> @sitofp_v2i32_v2f64(<1 x i64>*) nounwind { | 
|  | 265 | ; X86-LABEL: sitofp_v2i32_v2f64: | 
|  | 266 | ; X86:       # BB#0: | 
|  | 267 | ; X86-NEXT:    pushl %ebp | 
|  | 268 | ; X86-NEXT:    movl %esp, %ebp | 
|  | 269 | ; X86-NEXT:    andl $-8, %esp | 
|  | 270 | ; X86-NEXT:    subl $8, %esp | 
|  | 271 | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | 272 | ; X86-NEXT:    movq (%eax), %mm0 | 
|  | 273 | ; X86-NEXT:    paddd %mm0, %mm0 | 
|  | 274 | ; X86-NEXT:    movq %mm0, (%esp) | 
| Simon Pilgrim | 128a10a | 2017-03-10 22:35:07 +0000 | [diff] [blame] | 275 | ; X86-NEXT:    cvtdq2pd (%esp), %xmm0 | 
| Simon Pilgrim | ed655f0 | 2017-03-10 17:23:55 +0000 | [diff] [blame] | 276 | ; X86-NEXT:    movl %ebp, %esp | 
|  | 277 | ; X86-NEXT:    popl %ebp | 
|  | 278 | ; X86-NEXT:    retl | 
|  | 279 | ; | 
|  | 280 | ; X64-LABEL: sitofp_v2i32_v2f64: | 
|  | 281 | ; X64:       # BB#0: | 
|  | 282 | ; X64-NEXT:    movq (%rdi), %mm0 | 
|  | 283 | ; X64-NEXT:    paddd %mm0, %mm0 | 
|  | 284 | ; X64-NEXT:    movq2dq %mm0, %xmm0 | 
|  | 285 | ; X64-NEXT:    cvtdq2pd %xmm0, %xmm0 | 
|  | 286 | ; X64-NEXT:    retq | 
|  | 287 | %2 = bitcast <1 x i64>* %0 to x86_mmx* | 
|  | 288 | %3 = load x86_mmx, x86_mmx* %2, align 8 | 
|  | 289 | %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) | 
|  | 290 | %5 = bitcast x86_mmx %4 to i64 | 
|  | 291 | %6 = insertelement <2 x i64> undef, i64 %5, i32 0 | 
|  | 292 | %7 = bitcast <2 x i64> %6 to <4 x i32> | 
|  | 293 | %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <2 x i32> <i32 0, i32 1> | 
|  | 294 | %9 = sitofp <2 x i32> %8 to <2 x double> | 
|  | 295 | ret <2 x double> %9 | 
|  | 296 | } | 
|  | 297 |  | 
|  | 298 | define <4 x float> @sitofp_v2i32_v2f32(<1 x i64>*) nounwind { | 
|  | 299 | ; X86-LABEL: sitofp_v2i32_v2f32: | 
|  | 300 | ; X86:       # BB#0: | 
|  | 301 | ; X86-NEXT:    pushl %ebp | 
|  | 302 | ; X86-NEXT:    movl %esp, %ebp | 
|  | 303 | ; X86-NEXT:    andl $-8, %esp | 
|  | 304 | ; X86-NEXT:    subl $8, %esp | 
|  | 305 | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | 306 | ; X86-NEXT:    movq (%eax), %mm0 | 
|  | 307 | ; X86-NEXT:    paddd %mm0, %mm0 | 
|  | 308 | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | 309 | ; X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero | 
|  | 310 | ; X86-NEXT:    cvtdq2ps %xmm0, %xmm0 | 
|  | 311 | ; X86-NEXT:    movl %ebp, %esp | 
|  | 312 | ; X86-NEXT:    popl %ebp | 
|  | 313 | ; X86-NEXT:    retl | 
|  | 314 | ; | 
|  | 315 | ; X64-LABEL: sitofp_v2i32_v2f32: | 
|  | 316 | ; X64:       # BB#0: | 
|  | 317 | ; X64-NEXT:    movq (%rdi), %mm0 | 
|  | 318 | ; X64-NEXT:    paddd %mm0, %mm0 | 
| Simon Pilgrim | be22cff | 2017-03-29 10:47:18 +0000 | [diff] [blame] | 319 | ; X64-NEXT:    movq %mm0, -{{[0-9]+}}(%rsp) | 
|  | 320 | ; X64-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero | 
|  | 321 | ; X64-NEXT:    cvtdq2ps %xmm0, %xmm0 | 
|  | 322 | ; X64-NEXT:    retq | 
|  | 323 | %2 = bitcast <1 x i64>* %0 to x86_mmx* | 
|  | 324 | %3 = load x86_mmx, x86_mmx* %2, align 8 | 
|  | 325 | %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) | 
|  | 326 | %5 = bitcast x86_mmx %4 to <2 x i32> | 
|  | 327 | %6 = shufflevector <2 x i32> %5, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> | 
|  | 328 | %7 = sitofp <4 x i32> %6 to <4 x float> | 
|  | 329 | ret <4 x float> %7 | 
|  | 330 | } | 
|  | 331 |  | 
|  | 332 | define <4 x float> @cvt_v2i32_v2f32(<1 x i64>*) nounwind { | 
|  | 333 | ; X86-LABEL: cvt_v2i32_v2f32: | 
|  | 334 | ; X86:       # BB#0: | 
|  | 335 | ; X86-NEXT:    pushl %ebp | 
|  | 336 | ; X86-NEXT:    movl %esp, %ebp | 
|  | 337 | ; X86-NEXT:    andl $-8, %esp | 
|  | 338 | ; X86-NEXT:    subl $8, %esp | 
|  | 339 | ; X86-NEXT:    movl 8(%ebp), %eax | 
|  | 340 | ; X86-NEXT:    movq (%eax), %mm0 | 
|  | 341 | ; X86-NEXT:    paddd %mm0, %mm0 | 
|  | 342 | ; X86-NEXT:    movq %mm0, (%esp) | 
|  | 343 | ; X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero | 
|  | 344 | ; X86-NEXT:    cvtdq2ps %xmm0, %xmm0 | 
|  | 345 | ; X86-NEXT:    movl %ebp, %esp | 
|  | 346 | ; X86-NEXT:    popl %ebp | 
|  | 347 | ; X86-NEXT:    retl | 
|  | 348 | ; | 
|  | 349 | ; X64-LABEL: cvt_v2i32_v2f32: | 
|  | 350 | ; X64:       # BB#0: | 
|  | 351 | ; X64-NEXT:    movq (%rdi), %mm0 | 
|  | 352 | ; X64-NEXT:    paddd %mm0, %mm0 | 
| Simon Pilgrim | ed655f0 | 2017-03-10 17:23:55 +0000 | [diff] [blame] | 353 | ; X64-NEXT:    movd %mm0, %rax | 
|  | 354 | ; X64-NEXT:    movd %rax, %xmm0 | 
|  | 355 | ; X64-NEXT:    cvtdq2ps %xmm0, %xmm0 | 
|  | 356 | ; X64-NEXT:    retq | 
|  | 357 | %2 = bitcast <1 x i64>* %0 to x86_mmx* | 
|  | 358 | %3 = load x86_mmx, x86_mmx* %2, align 8 | 
|  | 359 | %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) | 
|  | 360 | %5 = bitcast x86_mmx %4 to i64 | 
|  | 361 | %6 = insertelement <2 x i64> undef, i64 %5, i32 0 | 
|  | 362 | %7 = insertelement <2 x i64> %6, i64 0, i32 1 | 
|  | 363 | %8 = bitcast <2 x i64> %7 to <4 x i32> | 
|  | 364 | %9 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %8) | 
|  | 365 | ret <4 x float> %9 | 
|  | 366 | } | 
|  | 367 |  | 
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 368 | declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) | 
|  | 369 | declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) | 
|  | 370 | declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) | 
|  | 371 | declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) | 
|  | 372 | declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) | 
| Simon Pilgrim | ed655f0 | 2017-03-10 17:23:55 +0000 | [diff] [blame] | 373 | declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) |