| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| 2 | ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X86 |
| 3 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X64 |
| 4 | |
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 5 | ; If we are transferring XMM conversion results to MMX registers we could use the MMX equivalents |
| Simon Pilgrim | ed655f0 | 2017-03-10 17:23:55 +0000 | [diff] [blame] | 6 | ; (CVTPD2PI/CVTTPD2PI + CVTPS2PI/CVTTPS2PI) without affecting rounding/exceptions etc. |
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 7 | |
| 8 | define void @cvt_v2f64_v2i32(<2 x double>, <1 x i64>*) nounwind { |
| 9 | ; X86-LABEL: cvt_v2f64_v2i32: |
| Francis Visoiu Mistrih | 25528d6 | 2017-12-04 17:18:51 +0000 | [diff] [blame] | 10 | ; X86: # %bb.0: |
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 11 | ; X86-NEXT: pushl %ebp |
| 12 | ; X86-NEXT: movl %esp, %ebp |
| 13 | ; X86-NEXT: andl $-8, %esp |
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 14 | ; X86-NEXT: subl $8, %esp |
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 15 | ; X86-NEXT: movl 8(%ebp), %eax |
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 16 | ; X86-NEXT: cvtpd2pi %xmm0, %mm0 |
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 17 | ; X86-NEXT: paddd %mm0, %mm0 |
| 18 | ; X86-NEXT: movq %mm0, (%esp) |
| 19 | ; X86-NEXT: movl (%esp), %ecx |
| 20 | ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| 21 | ; X86-NEXT: movl %edx, 4(%eax) |
| 22 | ; X86-NEXT: movl %ecx, (%eax) |
| 23 | ; X86-NEXT: movl %ebp, %esp |
| 24 | ; X86-NEXT: popl %ebp |
| 25 | ; X86-NEXT: retl |
| 26 | ; |
| 27 | ; X64-LABEL: cvt_v2f64_v2i32: |
| Francis Visoiu Mistrih | 25528d6 | 2017-12-04 17:18:51 +0000 | [diff] [blame] | 28 | ; X64: # %bb.0: |
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 29 | ; X64-NEXT: cvtpd2pi %xmm0, %mm0 |
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 30 | ; X64-NEXT: paddd %mm0, %mm0 |
| 31 | ; X64-NEXT: movq %mm0, (%rdi) |
| 32 | ; X64-NEXT: retq |
| 33 | %3 = tail call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %0) |
| 34 | %4 = bitcast <4 x i32> %3 to <2 x i64> |
| 35 | %5 = extractelement <2 x i64> %4, i32 0 |
| 36 | %6 = bitcast i64 %5 to x86_mmx |
| 37 | %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %6, x86_mmx %6) |
| 38 | %8 = bitcast x86_mmx %7 to i64 |
| 39 | %9 = insertelement <1 x i64> undef, i64 %8, i32 0 |
| 40 | store <1 x i64> %9, <1 x i64>* %1 |
| 41 | ret void |
| 42 | } |
| 43 | |
| 44 | define void @cvtt_v2f64_v2i32(<2 x double>, <1 x i64>*) nounwind { |
| 45 | ; X86-LABEL: cvtt_v2f64_v2i32: |
| Francis Visoiu Mistrih | 25528d6 | 2017-12-04 17:18:51 +0000 | [diff] [blame] | 46 | ; X86: # %bb.0: |
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 47 | ; X86-NEXT: pushl %ebp |
| 48 | ; X86-NEXT: movl %esp, %ebp |
| 49 | ; X86-NEXT: andl $-8, %esp |
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 50 | ; X86-NEXT: subl $8, %esp |
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 51 | ; X86-NEXT: movl 8(%ebp), %eax |
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 52 | ; X86-NEXT: cvttpd2pi %xmm0, %mm0 |
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 53 | ; X86-NEXT: paddd %mm0, %mm0 |
| 54 | ; X86-NEXT: movq %mm0, (%esp) |
| 55 | ; X86-NEXT: movl (%esp), %ecx |
| 56 | ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| 57 | ; X86-NEXT: movl %edx, 4(%eax) |
| 58 | ; X86-NEXT: movl %ecx, (%eax) |
| 59 | ; X86-NEXT: movl %ebp, %esp |
| 60 | ; X86-NEXT: popl %ebp |
| 61 | ; X86-NEXT: retl |
| 62 | ; |
| 63 | ; X64-LABEL: cvtt_v2f64_v2i32: |
| Francis Visoiu Mistrih | 25528d6 | 2017-12-04 17:18:51 +0000 | [diff] [blame] | 64 | ; X64: # %bb.0: |
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 65 | ; X64-NEXT: cvttpd2pi %xmm0, %mm0 |
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 66 | ; X64-NEXT: paddd %mm0, %mm0 |
| 67 | ; X64-NEXT: movq %mm0, (%rdi) |
| 68 | ; X64-NEXT: retq |
| 69 | %3 = tail call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %0) |
| 70 | %4 = bitcast <4 x i32> %3 to <2 x i64> |
| 71 | %5 = extractelement <2 x i64> %4, i32 0 |
| 72 | %6 = bitcast i64 %5 to x86_mmx |
| 73 | %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %6, x86_mmx %6) |
| 74 | %8 = bitcast x86_mmx %7 to i64 |
| 75 | %9 = insertelement <1 x i64> undef, i64 %8, i32 0 |
| 76 | store <1 x i64> %9, <1 x i64>* %1 |
| 77 | ret void |
| 78 | } |
| 79 | |
| 80 | define void @fptosi_v2f64_v2i32(<2 x double>, <1 x i64>*) nounwind { |
| 81 | ; X86-LABEL: fptosi_v2f64_v2i32: |
| Francis Visoiu Mistrih | 25528d6 | 2017-12-04 17:18:51 +0000 | [diff] [blame] | 82 | ; X86: # %bb.0: |
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 83 | ; X86-NEXT: pushl %ebp |
| 84 | ; X86-NEXT: movl %esp, %ebp |
| 85 | ; X86-NEXT: andl $-8, %esp |
| Simon Pilgrim | 0e2f8cd | 2017-04-02 17:45:41 +0000 | [diff] [blame] | 86 | ; X86-NEXT: subl $8, %esp |
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 87 | ; X86-NEXT: movl 8(%ebp), %eax |
| Simon Pilgrim | 0e2f8cd | 2017-04-02 17:45:41 +0000 | [diff] [blame] | 88 | ; X86-NEXT: cvttpd2pi %xmm0, %mm0 |
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 89 | ; X86-NEXT: paddd %mm0, %mm0 |
| 90 | ; X86-NEXT: movq %mm0, (%esp) |
| 91 | ; X86-NEXT: movl (%esp), %ecx |
| 92 | ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| 93 | ; X86-NEXT: movl %edx, 4(%eax) |
| 94 | ; X86-NEXT: movl %ecx, (%eax) |
| 95 | ; X86-NEXT: movl %ebp, %esp |
| 96 | ; X86-NEXT: popl %ebp |
| 97 | ; X86-NEXT: retl |
| 98 | ; |
| 99 | ; X64-LABEL: fptosi_v2f64_v2i32: |
| Francis Visoiu Mistrih | 25528d6 | 2017-12-04 17:18:51 +0000 | [diff] [blame] | 100 | ; X64: # %bb.0: |
| Simon Pilgrim | 0e2f8cd | 2017-04-02 17:45:41 +0000 | [diff] [blame] | 101 | ; X64-NEXT: cvttpd2pi %xmm0, %mm0 |
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 102 | ; X64-NEXT: paddd %mm0, %mm0 |
| 103 | ; X64-NEXT: movq %mm0, (%rdi) |
| 104 | ; X64-NEXT: retq |
| 105 | %3 = fptosi <2 x double> %0 to <2 x i32> |
| 106 | %4 = bitcast <2 x i32> %3 to x86_mmx |
| 107 | %5 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %4, x86_mmx %4) |
| 108 | %6 = bitcast x86_mmx %5 to i64 |
| 109 | %7 = insertelement <1 x i64> undef, i64 %6, i32 0 |
| 110 | store <1 x i64> %7, <1 x i64>* %1 |
| 111 | ret void |
| 112 | } |
| 113 | |
| 114 | define void @cvt_v2f32_v2i32(<4 x float>, <1 x i64>*) nounwind { |
| 115 | ; X86-LABEL: cvt_v2f32_v2i32: |
| Francis Visoiu Mistrih | 25528d6 | 2017-12-04 17:18:51 +0000 | [diff] [blame] | 116 | ; X86: # %bb.0: |
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 117 | ; X86-NEXT: pushl %ebp |
| 118 | ; X86-NEXT: movl %esp, %ebp |
| 119 | ; X86-NEXT: andl $-8, %esp |
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 120 | ; X86-NEXT: subl $8, %esp |
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 121 | ; X86-NEXT: movl 8(%ebp), %eax |
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 122 | ; X86-NEXT: cvtps2pi %xmm0, %mm0 |
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 123 | ; X86-NEXT: paddd %mm0, %mm0 |
| 124 | ; X86-NEXT: movq %mm0, (%esp) |
| 125 | ; X86-NEXT: movl (%esp), %ecx |
| 126 | ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| 127 | ; X86-NEXT: movl %edx, 4(%eax) |
| 128 | ; X86-NEXT: movl %ecx, (%eax) |
| 129 | ; X86-NEXT: movl %ebp, %esp |
| 130 | ; X86-NEXT: popl %ebp |
| 131 | ; X86-NEXT: retl |
| 132 | ; |
| 133 | ; X64-LABEL: cvt_v2f32_v2i32: |
| Francis Visoiu Mistrih | 25528d6 | 2017-12-04 17:18:51 +0000 | [diff] [blame] | 134 | ; X64: # %bb.0: |
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 135 | ; X64-NEXT: cvtps2pi %xmm0, %mm0 |
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 136 | ; X64-NEXT: paddd %mm0, %mm0 |
| 137 | ; X64-NEXT: movq %mm0, (%rdi) |
| 138 | ; X64-NEXT: retq |
| 139 | %3 = tail call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %0) |
| 140 | %4 = bitcast <4 x i32> %3 to <2 x i64> |
| 141 | %5 = extractelement <2 x i64> %4, i32 0 |
| 142 | %6 = bitcast i64 %5 to x86_mmx |
| 143 | %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %6, x86_mmx %6) |
| 144 | %8 = bitcast x86_mmx %7 to i64 |
| 145 | %9 = insertelement <1 x i64> undef, i64 %8, i32 0 |
| 146 | store <1 x i64> %9, <1 x i64>* %1 |
| 147 | ret void |
| 148 | } |
| 149 | |
| 150 | define void @cvtt_v2f32_v2i32(<4 x float>, <1 x i64>*) nounwind { |
| 151 | ; X86-LABEL: cvtt_v2f32_v2i32: |
| Francis Visoiu Mistrih | 25528d6 | 2017-12-04 17:18:51 +0000 | [diff] [blame] | 152 | ; X86: # %bb.0: |
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 153 | ; X86-NEXT: pushl %ebp |
| 154 | ; X86-NEXT: movl %esp, %ebp |
| 155 | ; X86-NEXT: andl $-8, %esp |
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 156 | ; X86-NEXT: subl $8, %esp |
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 157 | ; X86-NEXT: movl 8(%ebp), %eax |
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 158 | ; X86-NEXT: cvttps2pi %xmm0, %mm0 |
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 159 | ; X86-NEXT: paddd %mm0, %mm0 |
| 160 | ; X86-NEXT: movq %mm0, (%esp) |
| 161 | ; X86-NEXT: movl (%esp), %ecx |
| 162 | ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| 163 | ; X86-NEXT: movl %edx, 4(%eax) |
| 164 | ; X86-NEXT: movl %ecx, (%eax) |
| 165 | ; X86-NEXT: movl %ebp, %esp |
| 166 | ; X86-NEXT: popl %ebp |
| 167 | ; X86-NEXT: retl |
| 168 | ; |
| 169 | ; X64-LABEL: cvtt_v2f32_v2i32: |
| Francis Visoiu Mistrih | 25528d6 | 2017-12-04 17:18:51 +0000 | [diff] [blame] | 170 | ; X64: # %bb.0: |
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 171 | ; X64-NEXT: cvttps2pi %xmm0, %mm0 |
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 172 | ; X64-NEXT: paddd %mm0, %mm0 |
| 173 | ; X64-NEXT: movq %mm0, (%rdi) |
| 174 | ; X64-NEXT: retq |
| 175 | %3 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %0) |
| 176 | %4 = bitcast <4 x i32> %3 to <2 x i64> |
| 177 | %5 = extractelement <2 x i64> %4, i32 0 |
| 178 | %6 = bitcast i64 %5 to x86_mmx |
| 179 | %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %6, x86_mmx %6) |
| 180 | %8 = bitcast x86_mmx %7 to i64 |
| 181 | %9 = insertelement <1 x i64> undef, i64 %8, i32 0 |
| 182 | store <1 x i64> %9, <1 x i64>* %1 |
| 183 | ret void |
| 184 | } |
| 185 | |
| Simon Pilgrim | dddce31 | 2017-04-02 13:10:20 +0000 | [diff] [blame] | 186 | define void @fptosi_v4f32_v4i32(<4 x float>, <1 x i64>*) nounwind { |
| 187 | ; X86-LABEL: fptosi_v4f32_v4i32: |
| Francis Visoiu Mistrih | 25528d6 | 2017-12-04 17:18:51 +0000 | [diff] [blame] | 188 | ; X86: # %bb.0: |
| Simon Pilgrim | dddce31 | 2017-04-02 13:10:20 +0000 | [diff] [blame] | 189 | ; X86-NEXT: pushl %ebp |
| 190 | ; X86-NEXT: movl %esp, %ebp |
| 191 | ; X86-NEXT: andl $-8, %esp |
| Simon Pilgrim | e56a2d7 | 2017-04-02 15:52:28 +0000 | [diff] [blame] | 192 | ; X86-NEXT: subl $8, %esp |
| Simon Pilgrim | dddce31 | 2017-04-02 13:10:20 +0000 | [diff] [blame] | 193 | ; X86-NEXT: movl 8(%ebp), %eax |
| Simon Pilgrim | ba28263 | 2017-04-02 16:20:34 +0000 | [diff] [blame] | 194 | ; X86-NEXT: cvttps2pi %xmm0, %mm0 |
| Simon Pilgrim | dddce31 | 2017-04-02 13:10:20 +0000 | [diff] [blame] | 195 | ; X86-NEXT: paddd %mm0, %mm0 |
| 196 | ; X86-NEXT: movq %mm0, (%esp) |
| 197 | ; X86-NEXT: movl (%esp), %ecx |
| 198 | ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| 199 | ; X86-NEXT: movl %edx, 4(%eax) |
| 200 | ; X86-NEXT: movl %ecx, (%eax) |
| 201 | ; X86-NEXT: movl %ebp, %esp |
| 202 | ; X86-NEXT: popl %ebp |
| 203 | ; X86-NEXT: retl |
| 204 | ; |
| 205 | ; X64-LABEL: fptosi_v4f32_v4i32: |
| Francis Visoiu Mistrih | 25528d6 | 2017-12-04 17:18:51 +0000 | [diff] [blame] | 206 | ; X64: # %bb.0: |
| Simon Pilgrim | ba28263 | 2017-04-02 16:20:34 +0000 | [diff] [blame] | 207 | ; X64-NEXT: cvttps2pi %xmm0, %mm0 |
| Simon Pilgrim | dddce31 | 2017-04-02 13:10:20 +0000 | [diff] [blame] | 208 | ; X64-NEXT: paddd %mm0, %mm0 |
| 209 | ; X64-NEXT: movq %mm0, (%rdi) |
| 210 | ; X64-NEXT: retq |
| 211 | %3 = fptosi <4 x float> %0 to <4 x i32> |
| 212 | %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <2 x i32> <i32 0, i32 1> |
| 213 | %5 = bitcast <2 x i32> %4 to x86_mmx |
| 214 | %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5) |
| 215 | %7 = bitcast x86_mmx %6 to i64 |
| 216 | %8 = insertelement <1 x i64> undef, i64 %7, i32 0 |
| 217 | store <1 x i64> %8, <1 x i64>* %1 |
| 218 | ret void |
| 219 | } |
| 220 | |
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 221 | define void @fptosi_v2f32_v2i32(<4 x float>, <1 x i64>*) nounwind { |
| 222 | ; X86-LABEL: fptosi_v2f32_v2i32: |
| Francis Visoiu Mistrih | 25528d6 | 2017-12-04 17:18:51 +0000 | [diff] [blame] | 223 | ; X86: # %bb.0: |
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 224 | ; X86-NEXT: pushl %ebp |
| 225 | ; X86-NEXT: movl %esp, %ebp |
| 226 | ; X86-NEXT: andl $-8, %esp |
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 227 | ; X86-NEXT: subl $8, %esp |
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 228 | ; X86-NEXT: movl 8(%ebp), %eax |
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 229 | ; X86-NEXT: cvttps2pi %xmm0, %mm0 |
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 230 | ; X86-NEXT: paddd %mm0, %mm0 |
| 231 | ; X86-NEXT: movq %mm0, (%esp) |
| 232 | ; X86-NEXT: movl (%esp), %ecx |
| 233 | ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| 234 | ; X86-NEXT: movl %edx, 4(%eax) |
| 235 | ; X86-NEXT: movl %ecx, (%eax) |
| 236 | ; X86-NEXT: movl %ebp, %esp |
| 237 | ; X86-NEXT: popl %ebp |
| 238 | ; X86-NEXT: retl |
| 239 | ; |
| 240 | ; X64-LABEL: fptosi_v2f32_v2i32: |
| Francis Visoiu Mistrih | 25528d6 | 2017-12-04 17:18:51 +0000 | [diff] [blame] | 241 | ; X64: # %bb.0: |
| Simon Pilgrim | c7c5aa4 | 2017-03-28 21:32:11 +0000 | [diff] [blame] | 242 | ; X64-NEXT: cvttps2pi %xmm0, %mm0 |
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 243 | ; X64-NEXT: paddd %mm0, %mm0 |
| 244 | ; X64-NEXT: movq %mm0, (%rdi) |
| 245 | ; X64-NEXT: retq |
| 246 | %3 = fptosi <4 x float> %0 to <4 x i32> |
| 247 | %4 = bitcast <4 x i32> %3 to <2 x i64> |
| 248 | %5 = extractelement <2 x i64> %4, i32 0 |
| 249 | %6 = bitcast i64 %5 to x86_mmx |
| 250 | %7 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %6, x86_mmx %6) |
| 251 | %8 = bitcast x86_mmx %7 to i64 |
| 252 | %9 = insertelement <1 x i64> undef, i64 %8, i32 0 |
| 253 | store <1 x i64> %9, <1 x i64>* %1 |
| 254 | ret void |
| 255 | } |
| 256 | |
| Simon Pilgrim | e1a72a9 | 2017-03-14 12:13:41 +0000 | [diff] [blame] | 257 | ; FIXME: If we are transferring MMX registers to XMM for conversion we could use the MMX equivalents |
| Simon Pilgrim | ed655f0 | 2017-03-10 17:23:55 +0000 | [diff] [blame] | 258 | ; (CVTPI2PD + CVTPI2PS) without affecting rounding/exceptions etc. |
| 259 | |
| 260 | define <2 x double> @sitofp_v2i32_v2f64(<1 x i64>*) nounwind { |
| 261 | ; X86-LABEL: sitofp_v2i32_v2f64: |
| Francis Visoiu Mistrih | 25528d6 | 2017-12-04 17:18:51 +0000 | [diff] [blame] | 262 | ; X86: # %bb.0: |
| Simon Pilgrim | ed655f0 | 2017-03-10 17:23:55 +0000 | [diff] [blame] | 263 | ; X86-NEXT: pushl %ebp |
| 264 | ; X86-NEXT: movl %esp, %ebp |
| 265 | ; X86-NEXT: andl $-8, %esp |
| 266 | ; X86-NEXT: subl $8, %esp |
| 267 | ; X86-NEXT: movl 8(%ebp), %eax |
| 268 | ; X86-NEXT: movq (%eax), %mm0 |
| 269 | ; X86-NEXT: paddd %mm0, %mm0 |
| 270 | ; X86-NEXT: movq %mm0, (%esp) |
| Simon Pilgrim | 128a10a | 2017-03-10 22:35:07 +0000 | [diff] [blame] | 271 | ; X86-NEXT: cvtdq2pd (%esp), %xmm0 |
| Simon Pilgrim | ed655f0 | 2017-03-10 17:23:55 +0000 | [diff] [blame] | 272 | ; X86-NEXT: movl %ebp, %esp |
| 273 | ; X86-NEXT: popl %ebp |
| 274 | ; X86-NEXT: retl |
| 275 | ; |
| 276 | ; X64-LABEL: sitofp_v2i32_v2f64: |
| Francis Visoiu Mistrih | 25528d6 | 2017-12-04 17:18:51 +0000 | [diff] [blame] | 277 | ; X64: # %bb.0: |
| Simon Pilgrim | ed655f0 | 2017-03-10 17:23:55 +0000 | [diff] [blame] | 278 | ; X64-NEXT: movq (%rdi), %mm0 |
| 279 | ; X64-NEXT: paddd %mm0, %mm0 |
| 280 | ; X64-NEXT: movq2dq %mm0, %xmm0 |
| 281 | ; X64-NEXT: cvtdq2pd %xmm0, %xmm0 |
| 282 | ; X64-NEXT: retq |
| 283 | %2 = bitcast <1 x i64>* %0 to x86_mmx* |
| 284 | %3 = load x86_mmx, x86_mmx* %2, align 8 |
| 285 | %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) |
| 286 | %5 = bitcast x86_mmx %4 to i64 |
| 287 | %6 = insertelement <2 x i64> undef, i64 %5, i32 0 |
| 288 | %7 = bitcast <2 x i64> %6 to <4 x i32> |
| 289 | %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <2 x i32> <i32 0, i32 1> |
| 290 | %9 = sitofp <2 x i32> %8 to <2 x double> |
| 291 | ret <2 x double> %9 |
| 292 | } |
| 293 | |
| 294 | define <4 x float> @sitofp_v2i32_v2f32(<1 x i64>*) nounwind { |
| 295 | ; X86-LABEL: sitofp_v2i32_v2f32: |
| Francis Visoiu Mistrih | 25528d6 | 2017-12-04 17:18:51 +0000 | [diff] [blame] | 296 | ; X86: # %bb.0: |
| Simon Pilgrim | ed655f0 | 2017-03-10 17:23:55 +0000 | [diff] [blame] | 297 | ; X86-NEXT: pushl %ebp |
| 298 | ; X86-NEXT: movl %esp, %ebp |
| 299 | ; X86-NEXT: andl $-8, %esp |
| 300 | ; X86-NEXT: subl $8, %esp |
| 301 | ; X86-NEXT: movl 8(%ebp), %eax |
| 302 | ; X86-NEXT: movq (%eax), %mm0 |
| 303 | ; X86-NEXT: paddd %mm0, %mm0 |
| 304 | ; X86-NEXT: movq %mm0, (%esp) |
| 305 | ; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero |
| 306 | ; X86-NEXT: cvtdq2ps %xmm0, %xmm0 |
| 307 | ; X86-NEXT: movl %ebp, %esp |
| 308 | ; X86-NEXT: popl %ebp |
| 309 | ; X86-NEXT: retl |
| 310 | ; |
| 311 | ; X64-LABEL: sitofp_v2i32_v2f32: |
| Francis Visoiu Mistrih | 25528d6 | 2017-12-04 17:18:51 +0000 | [diff] [blame] | 312 | ; X64: # %bb.0: |
| Simon Pilgrim | ed655f0 | 2017-03-10 17:23:55 +0000 | [diff] [blame] | 313 | ; X64-NEXT: movq (%rdi), %mm0 |
| 314 | ; X64-NEXT: paddd %mm0, %mm0 |
| Simon Pilgrim | be22cff | 2017-03-29 10:47:18 +0000 | [diff] [blame] | 315 | ; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) |
| 316 | ; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero |
| 317 | ; X64-NEXT: cvtdq2ps %xmm0, %xmm0 |
| 318 | ; X64-NEXT: retq |
| 319 | %2 = bitcast <1 x i64>* %0 to x86_mmx* |
| 320 | %3 = load x86_mmx, x86_mmx* %2, align 8 |
| 321 | %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) |
| 322 | %5 = bitcast x86_mmx %4 to <2 x i32> |
| 323 | %6 = shufflevector <2 x i32> %5, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| 324 | %7 = sitofp <4 x i32> %6 to <4 x float> |
| 325 | ret <4 x float> %7 |
| 326 | } |
| 327 | |
| 328 | define <4 x float> @cvt_v2i32_v2f32(<1 x i64>*) nounwind { |
| 329 | ; X86-LABEL: cvt_v2i32_v2f32: |
| Francis Visoiu Mistrih | 25528d6 | 2017-12-04 17:18:51 +0000 | [diff] [blame] | 330 | ; X86: # %bb.0: |
| Simon Pilgrim | be22cff | 2017-03-29 10:47:18 +0000 | [diff] [blame] | 331 | ; X86-NEXT: pushl %ebp |
| 332 | ; X86-NEXT: movl %esp, %ebp |
| 333 | ; X86-NEXT: andl $-8, %esp |
| 334 | ; X86-NEXT: subl $8, %esp |
| 335 | ; X86-NEXT: movl 8(%ebp), %eax |
| 336 | ; X86-NEXT: movq (%eax), %mm0 |
| 337 | ; X86-NEXT: paddd %mm0, %mm0 |
| 338 | ; X86-NEXT: movq %mm0, (%esp) |
| 339 | ; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero |
| 340 | ; X86-NEXT: cvtdq2ps %xmm0, %xmm0 |
| 341 | ; X86-NEXT: movl %ebp, %esp |
| 342 | ; X86-NEXT: popl %ebp |
| 343 | ; X86-NEXT: retl |
| 344 | ; |
| 345 | ; X64-LABEL: cvt_v2i32_v2f32: |
| Francis Visoiu Mistrih | 25528d6 | 2017-12-04 17:18:51 +0000 | [diff] [blame] | 346 | ; X64: # %bb.0: |
| Simon Pilgrim | be22cff | 2017-03-29 10:47:18 +0000 | [diff] [blame] | 347 | ; X64-NEXT: movq (%rdi), %mm0 |
| 348 | ; X64-NEXT: paddd %mm0, %mm0 |
| Craig Topper | 0048673 | 2018-01-05 20:55:12 +0000 | [diff] [blame] | 349 | ; X64-NEXT: movq %mm0, %rax |
| Ayman Musa | d9fb157 | 2017-04-26 07:08:44 +0000 | [diff] [blame] | 350 | ; X64-NEXT: movq %rax, %xmm0 |
| Simon Pilgrim | ed655f0 | 2017-03-10 17:23:55 +0000 | [diff] [blame] | 351 | ; X64-NEXT: cvtdq2ps %xmm0, %xmm0 |
| 352 | ; X64-NEXT: retq |
| 353 | %2 = bitcast <1 x i64>* %0 to x86_mmx* |
| 354 | %3 = load x86_mmx, x86_mmx* %2, align 8 |
| 355 | %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) |
| 356 | %5 = bitcast x86_mmx %4 to i64 |
| 357 | %6 = insertelement <2 x i64> undef, i64 %5, i32 0 |
| 358 | %7 = insertelement <2 x i64> %6, i64 0, i32 1 |
| 359 | %8 = bitcast <2 x i64> %7 to <4 x i32> |
| 360 | %9 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %8) |
| 361 | ret <4 x float> %9 |
| 362 | } |
| 363 | |
| Simon Pilgrim | c6b5572 | 2017-03-10 16:59:43 +0000 | [diff] [blame] | 364 | declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) |
| 365 | declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) |
| 366 | declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) |
| 367 | declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) |
| 368 | declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) |
| Simon Pilgrim | ed655f0 | 2017-03-10 17:23:55 +0000 | [diff] [blame] | 369 | declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) |