Dan Gohman | 36a0947 | 2009-09-08 23:54:48 +0000 | [diff] [blame] | 1 | ; RUN: llc < %s -march=x86 -mattr=+mmx |
Bill Wendling | 71b91ac | 2007-03-08 22:14:51 +0000 | [diff] [blame] | 2 | |
| 3 | ;; A basic sanity check to make sure that MMX arithmetic actually compiles. |
Dale Johannesen | 0488fb6 | 2010-09-30 23:57:10 +0000 | [diff] [blame] | 4 | ;; First is a straight translation of the original with bitcasts as needed. |
Bill Wendling | 71b91ac | 2007-03-08 22:14:51 +0000 | [diff] [blame] | 5 | |
Dale Johannesen | 0488fb6 | 2010-09-30 23:57:10 +0000 | [diff] [blame] | 6 | define void @foo(x86_mmx* %A, x86_mmx* %B) { |
Bill Wendling | c1fb047 | 2007-03-10 09:57:05 +0000 | [diff] [blame] | 7 | entry: |
Dale Johannesen | 0488fb6 | 2010-09-30 23:57:10 +0000 | [diff] [blame] | 8 | %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1] |
| 9 | %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 10 | %tmp1a = bitcast x86_mmx %tmp1 to <8 x i8> |
| 11 | %tmp3a = bitcast x86_mmx %tmp3 to <8 x i8> |
| 12 | %tmp4 = add <8 x i8> %tmp1a, %tmp3a ; <<8 x i8>> [#uses=2] |
| 13 | %tmp4a = bitcast <8 x i8> %tmp4 to x86_mmx |
| 14 | store x86_mmx %tmp4a, x86_mmx* %A |
| 15 | %tmp7 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 16 | %tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.b( x86_mmx %tmp4a, x86_mmx %tmp7 ) ; <x86_mmx> [#uses=2] |
| 17 | store x86_mmx %tmp12, x86_mmx* %A |
| 18 | %tmp16 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 19 | %tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.b( x86_mmx %tmp12, x86_mmx %tmp16 ) ; <x86_mmx> [#uses=2] |
| 20 | store x86_mmx %tmp21, x86_mmx* %A |
| 21 | %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 22 | %tmp21a = bitcast x86_mmx %tmp21 to <8 x i8> |
| 23 | %tmp27a = bitcast x86_mmx %tmp27 to <8 x i8> |
| 24 | %tmp28 = sub <8 x i8> %tmp21a, %tmp27a ; <<8 x i8>> [#uses=2] |
| 25 | %tmp28a = bitcast <8 x i8> %tmp28 to x86_mmx |
| 26 | store x86_mmx %tmp28a, x86_mmx* %A |
| 27 | %tmp31 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 28 | %tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.b( x86_mmx %tmp28a, x86_mmx %tmp31 ) ; <x86_mmx> [#uses=2] |
| 29 | store x86_mmx %tmp36, x86_mmx* %A |
| 30 | %tmp40 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 31 | %tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.b( x86_mmx %tmp36, x86_mmx %tmp40 ) ; <x86_mmx> [#uses=2] |
| 32 | store x86_mmx %tmp45, x86_mmx* %A |
| 33 | %tmp51 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 34 | %tmp45a = bitcast x86_mmx %tmp45 to <8 x i8> |
| 35 | %tmp51a = bitcast x86_mmx %tmp51 to <8 x i8> |
| 36 | %tmp52 = mul <8 x i8> %tmp45a, %tmp51a ; <<8 x i8>> [#uses=2] |
| 37 | %tmp52a = bitcast <8 x i8> %tmp52 to x86_mmx |
| 38 | store x86_mmx %tmp52a, x86_mmx* %A |
| 39 | %tmp57 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 40 | %tmp57a = bitcast x86_mmx %tmp57 to <8 x i8> |
| 41 | %tmp58 = and <8 x i8> %tmp52, %tmp57a ; <<8 x i8>> [#uses=2] |
| 42 | %tmp58a = bitcast <8 x i8> %tmp58 to x86_mmx |
| 43 | store x86_mmx %tmp58a, x86_mmx* %A |
| 44 | %tmp63 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 45 | %tmp63a = bitcast x86_mmx %tmp63 to <8 x i8> |
| 46 | %tmp64 = or <8 x i8> %tmp58, %tmp63a ; <<8 x i8>> [#uses=2] |
| 47 | %tmp64a = bitcast <8 x i8> %tmp64 to x86_mmx |
| 48 | store x86_mmx %tmp64a, x86_mmx* %A |
| 49 | %tmp69 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 50 | %tmp69a = bitcast x86_mmx %tmp69 to <8 x i8> |
| 51 | %tmp64b = bitcast x86_mmx %tmp64a to <8 x i8> |
| 52 | %tmp70 = xor <8 x i8> %tmp64b, %tmp69a ; <<8 x i8>> [#uses=1] |
| 53 | %tmp70a = bitcast <8 x i8> %tmp70 to x86_mmx |
| 54 | store x86_mmx %tmp70a, x86_mmx* %A |
Bill Wendling | c1fb047 | 2007-03-10 09:57:05 +0000 | [diff] [blame] | 55 | tail call void @llvm.x86.mmx.emms( ) |
| 56 | ret void |
| 57 | } |
| 58 | |
Dale Johannesen | 0488fb6 | 2010-09-30 23:57:10 +0000 | [diff] [blame] | 59 | define void @baz(x86_mmx* %A, x86_mmx* %B) { |
Bill Wendling | 71b91ac | 2007-03-08 22:14:51 +0000 | [diff] [blame] | 60 | entry: |
Dale Johannesen | 0488fb6 | 2010-09-30 23:57:10 +0000 | [diff] [blame] | 61 | %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1] |
| 62 | %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 63 | %tmp1a = bitcast x86_mmx %tmp1 to <2 x i32> |
| 64 | %tmp3a = bitcast x86_mmx %tmp3 to <2 x i32> |
| 65 | %tmp4 = add <2 x i32> %tmp1a, %tmp3a ; <<2 x i32>> [#uses=2] |
| 66 | %tmp4a = bitcast <2 x i32> %tmp4 to x86_mmx |
| 67 | store x86_mmx %tmp4a, x86_mmx* %A |
| 68 | %tmp9 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 69 | %tmp9a = bitcast x86_mmx %tmp9 to <2 x i32> |
| 70 | %tmp10 = sub <2 x i32> %tmp4, %tmp9a ; <<2 x i32>> [#uses=2] |
| 71 | %tmp10a = bitcast <2 x i32> %tmp4 to x86_mmx |
| 72 | store x86_mmx %tmp10a, x86_mmx* %A |
| 73 | %tmp15 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 74 | %tmp10b = bitcast x86_mmx %tmp10a to <2 x i32> |
| 75 | %tmp15a = bitcast x86_mmx %tmp15 to <2 x i32> |
| 76 | %tmp16 = mul <2 x i32> %tmp10b, %tmp15a ; <<2 x i32>> [#uses=2] |
| 77 | %tmp16a = bitcast <2 x i32> %tmp16 to x86_mmx |
| 78 | store x86_mmx %tmp16a, x86_mmx* %A |
| 79 | %tmp21 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 80 | %tmp16b = bitcast x86_mmx %tmp16a to <2 x i32> |
| 81 | %tmp21a = bitcast x86_mmx %tmp21 to <2 x i32> |
| 82 | %tmp22 = and <2 x i32> %tmp16b, %tmp21a ; <<2 x i32>> [#uses=2] |
| 83 | %tmp22a = bitcast <2 x i32> %tmp22 to x86_mmx |
| 84 | store x86_mmx %tmp22a, x86_mmx* %A |
| 85 | %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 86 | %tmp22b = bitcast x86_mmx %tmp22a to <2 x i32> |
| 87 | %tmp27a = bitcast x86_mmx %tmp27 to <2 x i32> |
| 88 | %tmp28 = or <2 x i32> %tmp22b, %tmp27a ; <<2 x i32>> [#uses=2] |
| 89 | %tmp28a = bitcast <2 x i32> %tmp28 to x86_mmx |
| 90 | store x86_mmx %tmp28a, x86_mmx* %A |
| 91 | %tmp33 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 92 | %tmp28b = bitcast x86_mmx %tmp28a to <2 x i32> |
| 93 | %tmp33a = bitcast x86_mmx %tmp33 to <2 x i32> |
| 94 | %tmp34 = xor <2 x i32> %tmp28b, %tmp33a ; <<2 x i32>> [#uses=1] |
| 95 | %tmp34a = bitcast <2 x i32> %tmp34 to x86_mmx |
| 96 | store x86_mmx %tmp34a, x86_mmx* %A |
Bill Wendling | 71b91ac | 2007-03-08 22:14:51 +0000 | [diff] [blame] | 97 | tail call void @llvm.x86.mmx.emms( ) |
| 98 | ret void |
| 99 | } |
| 100 | |
Dale Johannesen | 0488fb6 | 2010-09-30 23:57:10 +0000 | [diff] [blame] | 101 | define void @bar(x86_mmx* %A, x86_mmx* %B) { |
Bill Wendling | 71b91ac | 2007-03-08 22:14:51 +0000 | [diff] [blame] | 102 | entry: |
Dale Johannesen | 0488fb6 | 2010-09-30 23:57:10 +0000 | [diff] [blame] | 103 | %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1] |
| 104 | %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 105 | %tmp1a = bitcast x86_mmx %tmp1 to <4 x i16> |
| 106 | %tmp3a = bitcast x86_mmx %tmp3 to <4 x i16> |
| 107 | %tmp4 = add <4 x i16> %tmp1a, %tmp3a ; <<4 x i16>> [#uses=2] |
| 108 | %tmp4a = bitcast <4 x i16> %tmp4 to x86_mmx |
| 109 | store x86_mmx %tmp4a, x86_mmx* %A |
| 110 | %tmp7 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 111 | %tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.w( x86_mmx %tmp4a, x86_mmx %tmp7 ) ; <x86_mmx> [#uses=2] |
| 112 | store x86_mmx %tmp12, x86_mmx* %A |
| 113 | %tmp16 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 114 | %tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp12, x86_mmx %tmp16 ) ; <x86_mmx> [#uses=2] |
| 115 | store x86_mmx %tmp21, x86_mmx* %A |
| 116 | %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 117 | %tmp21a = bitcast x86_mmx %tmp21 to <4 x i16> |
| 118 | %tmp27a = bitcast x86_mmx %tmp27 to <4 x i16> |
| 119 | %tmp28 = sub <4 x i16> %tmp21a, %tmp27a ; <<4 x i16>> [#uses=2] |
| 120 | %tmp28a = bitcast <4 x i16> %tmp28 to x86_mmx |
| 121 | store x86_mmx %tmp28a, x86_mmx* %A |
| 122 | %tmp31 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 123 | %tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.w( x86_mmx %tmp28a, x86_mmx %tmp31 ) ; <x86_mmx> [#uses=2] |
| 124 | store x86_mmx %tmp36, x86_mmx* %A |
| 125 | %tmp40 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 126 | %tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.w( x86_mmx %tmp36, x86_mmx %tmp40 ) ; <x86_mmx> [#uses=2] |
| 127 | store x86_mmx %tmp45, x86_mmx* %A |
| 128 | %tmp51 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 129 | %tmp45a = bitcast x86_mmx %tmp45 to <4 x i16> |
| 130 | %tmp51a = bitcast x86_mmx %tmp51 to <4 x i16> |
| 131 | %tmp52 = mul <4 x i16> %tmp45a, %tmp51a ; <<4 x i16>> [#uses=2] |
| 132 | %tmp52a = bitcast <4 x i16> %tmp52 to x86_mmx |
| 133 | store x86_mmx %tmp52a, x86_mmx* %A |
| 134 | %tmp55 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 135 | %tmp60 = tail call x86_mmx @llvm.x86.mmx.pmulh.w( x86_mmx %tmp52a, x86_mmx %tmp55 ) ; <x86_mmx> [#uses=2] |
| 136 | store x86_mmx %tmp60, x86_mmx* %A |
| 137 | %tmp64 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 138 | %tmp69 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd( x86_mmx %tmp60, x86_mmx %tmp64 ) ; <x86_mmx> [#uses=1] |
| 139 | %tmp70 = bitcast x86_mmx %tmp69 to x86_mmx ; <x86_mmx> [#uses=2] |
| 140 | store x86_mmx %tmp70, x86_mmx* %A |
| 141 | %tmp75 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 142 | %tmp70a = bitcast x86_mmx %tmp70 to <4 x i16> |
| 143 | %tmp75a = bitcast x86_mmx %tmp75 to <4 x i16> |
| 144 | %tmp76 = and <4 x i16> %tmp70a, %tmp75a ; <<4 x i16>> [#uses=2] |
| 145 | %tmp76a = bitcast <4 x i16> %tmp76 to x86_mmx |
| 146 | store x86_mmx %tmp76a, x86_mmx* %A |
| 147 | %tmp81 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 148 | %tmp76b = bitcast x86_mmx %tmp76a to <4 x i16> |
| 149 | %tmp81a = bitcast x86_mmx %tmp81 to <4 x i16> |
| 150 | %tmp82 = or <4 x i16> %tmp76b, %tmp81a ; <<4 x i16>> [#uses=2] |
| 151 | %tmp82a = bitcast <4 x i16> %tmp82 to x86_mmx |
| 152 | store x86_mmx %tmp82a, x86_mmx* %A |
| 153 | %tmp87 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 154 | %tmp82b = bitcast x86_mmx %tmp82a to <4 x i16> |
| 155 | %tmp87a = bitcast x86_mmx %tmp87 to <4 x i16> |
| 156 | %tmp88 = xor <4 x i16> %tmp82b, %tmp87a ; <<4 x i16>> [#uses=1] |
| 157 | %tmp88a = bitcast <4 x i16> %tmp88 to x86_mmx |
| 158 | store x86_mmx %tmp88a, x86_mmx* %A |
Bill Wendling | 71b91ac | 2007-03-08 22:14:51 +0000 | [diff] [blame] | 159 | tail call void @llvm.x86.mmx.emms( ) |
| 160 | ret void |
| 161 | } |
| 162 | |
Dale Johannesen | 0488fb6 | 2010-09-30 23:57:10 +0000 | [diff] [blame] | 163 | ;; The following is modified to use MMX intrinsics everywhere they work. |
Bill Wendling | c1fb047 | 2007-03-10 09:57:05 +0000 | [diff] [blame] | 164 | |
Dale Johannesen | 0488fb6 | 2010-09-30 23:57:10 +0000 | [diff] [blame] | 165 | define void @fooa(x86_mmx* %A, x86_mmx* %B) { |
| 166 | entry: |
| 167 | %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1] |
| 168 | %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 169 | %tmp4 = tail call x86_mmx @llvm.x86.mmx.padd.b( x86_mmx %tmp1, x86_mmx %tmp3 ) ; <x86_mmx> [#uses=2] |
| 170 | store x86_mmx %tmp4, x86_mmx* %A |
| 171 | %tmp7 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 172 | %tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.b( x86_mmx %tmp4, x86_mmx %tmp7 ) ; <x86_mmx> [#uses=2] |
| 173 | store x86_mmx %tmp12, x86_mmx* %A |
| 174 | %tmp16 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 175 | %tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.b( x86_mmx %tmp12, x86_mmx %tmp16 ) ; <x86_mmx> [#uses=2] |
| 176 | store x86_mmx %tmp21, x86_mmx* %A |
| 177 | %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 178 | %tmp28 = tail call x86_mmx @llvm.x86.mmx.psub.b( x86_mmx %tmp21, x86_mmx %tmp27 ) ; <x86_mmx> [#uses=2] |
| 179 | store x86_mmx %tmp28, x86_mmx* %A |
| 180 | %tmp31 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 181 | %tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.b( x86_mmx %tmp28, x86_mmx %tmp31 ) ; <x86_mmx> [#uses=2] |
| 182 | store x86_mmx %tmp36, x86_mmx* %A |
| 183 | %tmp40 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 184 | %tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.b( x86_mmx %tmp36, x86_mmx %tmp40 ) ; <x86_mmx> [#uses=2] |
| 185 | store x86_mmx %tmp45, x86_mmx* %A |
| 186 | %tmp51 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 187 | %tmp51a = bitcast x86_mmx %tmp51 to i64 |
| 188 | %tmp51aa = bitcast i64 %tmp51a to <8 x i8> |
| 189 | %tmp51b = bitcast x86_mmx %tmp45 to <8 x i8> |
| 190 | %tmp52 = mul <8 x i8> %tmp51b, %tmp51aa ; <x86_mmx> [#uses=2] |
| 191 | %tmp52a = bitcast <8 x i8> %tmp52 to i64 |
| 192 | %tmp52aa = bitcast i64 %tmp52a to x86_mmx |
| 193 | store x86_mmx %tmp52aa, x86_mmx* %A |
| 194 | %tmp57 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 195 | %tmp58 = tail call x86_mmx @llvm.x86.mmx.pand( x86_mmx %tmp51, x86_mmx %tmp57 ) ; <x86_mmx> [#uses=2] |
| 196 | store x86_mmx %tmp58, x86_mmx* %A |
| 197 | %tmp63 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 198 | %tmp64 = tail call x86_mmx @llvm.x86.mmx.por( x86_mmx %tmp58, x86_mmx %tmp63 ) ; <x86_mmx> [#uses=2] |
| 199 | store x86_mmx %tmp64, x86_mmx* %A |
| 200 | %tmp69 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 201 | %tmp70 = tail call x86_mmx @llvm.x86.mmx.pxor( x86_mmx %tmp64, x86_mmx %tmp69 ) ; <x86_mmx> [#uses=2] |
| 202 | store x86_mmx %tmp70, x86_mmx* %A |
| 203 | tail call void @llvm.x86.mmx.emms( ) |
| 204 | ret void |
| 205 | } |
Bill Wendling | c1fb047 | 2007-03-10 09:57:05 +0000 | [diff] [blame] | 206 | |
Dale Johannesen | 0488fb6 | 2010-09-30 23:57:10 +0000 | [diff] [blame] | 207 | define void @baza(x86_mmx* %A, x86_mmx* %B) { |
| 208 | entry: |
| 209 | %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1] |
| 210 | %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 211 | %tmp4 = tail call x86_mmx @llvm.x86.mmx.padd.d( x86_mmx %tmp1, x86_mmx %tmp3 ) ; <x86_mmx> [#uses=2] |
| 212 | store x86_mmx %tmp4, x86_mmx* %A |
| 213 | %tmp9 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 214 | %tmp10 = tail call x86_mmx @llvm.x86.mmx.psub.d( x86_mmx %tmp4, x86_mmx %tmp9 ) ; <x86_mmx> [#uses=2] |
| 215 | store x86_mmx %tmp10, x86_mmx* %A |
| 216 | %tmp15 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 217 | %tmp10a = bitcast x86_mmx %tmp10 to <2 x i32> |
| 218 | %tmp15a = bitcast x86_mmx %tmp15 to <2 x i32> |
| 219 | %tmp16 = mul <2 x i32> %tmp10a, %tmp15a ; <x86_mmx> [#uses=2] |
| 220 | %tmp16a = bitcast <2 x i32> %tmp16 to x86_mmx |
| 221 | store x86_mmx %tmp16a, x86_mmx* %A |
| 222 | %tmp21 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 223 | %tmp22 = tail call x86_mmx @llvm.x86.mmx.pand( x86_mmx %tmp16a, x86_mmx %tmp21 ) ; <x86_mmx> [#uses=2] |
| 224 | store x86_mmx %tmp22, x86_mmx* %A |
| 225 | %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 226 | %tmp28 = tail call x86_mmx @llvm.x86.mmx.por( x86_mmx %tmp22, x86_mmx %tmp27 ) ; <x86_mmx> [#uses=2] |
| 227 | store x86_mmx %tmp28, x86_mmx* %A |
| 228 | %tmp33 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 229 | %tmp34 = tail call x86_mmx @llvm.x86.mmx.pxor( x86_mmx %tmp28, x86_mmx %tmp33 ) ; <x86_mmx> [#uses=2] |
| 230 | store x86_mmx %tmp34, x86_mmx* %A |
| 231 | tail call void @llvm.x86.mmx.emms( ) |
| 232 | ret void |
| 233 | } |
Bill Wendling | c1fb047 | 2007-03-10 09:57:05 +0000 | [diff] [blame] | 234 | |
Dale Johannesen | 0488fb6 | 2010-09-30 23:57:10 +0000 | [diff] [blame] | 235 | define void @bara(x86_mmx* %A, x86_mmx* %B) { |
| 236 | entry: |
| 237 | %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1] |
| 238 | %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 239 | %tmp4 = tail call x86_mmx @llvm.x86.mmx.padd.w( x86_mmx %tmp1, x86_mmx %tmp3 ) ; <x86_mmx> [#uses=2] |
| 240 | store x86_mmx %tmp4, x86_mmx* %A |
| 241 | %tmp7 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 242 | %tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.w( x86_mmx %tmp4, x86_mmx %tmp7 ) ; <x86_mmx> [#uses=2] |
| 243 | store x86_mmx %tmp12, x86_mmx* %A |
| 244 | %tmp16 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 245 | %tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp12, x86_mmx %tmp16 ) ; <x86_mmx> [#uses=2] |
| 246 | store x86_mmx %tmp21, x86_mmx* %A |
| 247 | %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 248 | %tmp28 = tail call x86_mmx @llvm.x86.mmx.psub.w( x86_mmx %tmp21, x86_mmx %tmp27 ) ; <x86_mmx> [#uses=2] |
| 249 | store x86_mmx %tmp28, x86_mmx* %A |
| 250 | %tmp31 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 251 | %tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.w( x86_mmx %tmp28, x86_mmx %tmp31 ) ; <x86_mmx> [#uses=2] |
| 252 | store x86_mmx %tmp36, x86_mmx* %A |
| 253 | %tmp40 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 254 | %tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.w( x86_mmx %tmp36, x86_mmx %tmp40 ) ; <x86_mmx> [#uses=2] |
| 255 | store x86_mmx %tmp45, x86_mmx* %A |
| 256 | %tmp51 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 257 | %tmp52 = tail call x86_mmx @llvm.x86.mmx.pmull.w( x86_mmx %tmp45, x86_mmx %tmp51 ) ; <x86_mmx> [#uses=2] |
| 258 | store x86_mmx %tmp52, x86_mmx* %A |
| 259 | %tmp55 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 260 | %tmp60 = tail call x86_mmx @llvm.x86.mmx.pmulh.w( x86_mmx %tmp52, x86_mmx %tmp55 ) ; <x86_mmx> [#uses=2] |
| 261 | store x86_mmx %tmp60, x86_mmx* %A |
| 262 | %tmp64 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 263 | %tmp69 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd( x86_mmx %tmp60, x86_mmx %tmp64 ) ; <x86_mmx> [#uses=1] |
| 264 | %tmp70 = bitcast x86_mmx %tmp69 to x86_mmx ; <x86_mmx> [#uses=2] |
| 265 | store x86_mmx %tmp70, x86_mmx* %A |
| 266 | %tmp75 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 267 | %tmp76 = tail call x86_mmx @llvm.x86.mmx.pand( x86_mmx %tmp70, x86_mmx %tmp75 ) ; <x86_mmx> [#uses=2] |
| 268 | store x86_mmx %tmp76, x86_mmx* %A |
| 269 | %tmp81 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 270 | %tmp82 = tail call x86_mmx @llvm.x86.mmx.por( x86_mmx %tmp76, x86_mmx %tmp81 ) ; <x86_mmx> [#uses=2] |
| 271 | store x86_mmx %tmp82, x86_mmx* %A |
| 272 | %tmp87 = load x86_mmx* %B ; <x86_mmx> [#uses=1] |
| 273 | %tmp88 = tail call x86_mmx @llvm.x86.mmx.pxor( x86_mmx %tmp82, x86_mmx %tmp87 ) ; <x86_mmx> [#uses=2] |
| 274 | store x86_mmx %tmp88, x86_mmx* %A |
| 275 | tail call void @llvm.x86.mmx.emms( ) |
| 276 | ret void |
| 277 | } |
Bill Wendling | 71b91ac | 2007-03-08 22:14:51 +0000 | [diff] [blame] | 278 | |
Dale Johannesen | 0488fb6 | 2010-09-30 23:57:10 +0000 | [diff] [blame] | 279 | declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) |
Bill Wendling | 1b7a81d | 2007-03-16 09:44:46 +0000 | [diff] [blame] | 280 | |
Dale Johannesen | 0488fb6 | 2010-09-30 23:57:10 +0000 | [diff] [blame] | 281 | declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) |
Bill Wendling | 1b7a81d | 2007-03-16 09:44:46 +0000 | [diff] [blame] | 282 | |
Dale Johannesen | 0488fb6 | 2010-09-30 23:57:10 +0000 | [diff] [blame] | 283 | declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) |
Bill Wendling | 1b7a81d | 2007-03-16 09:44:46 +0000 | [diff] [blame] | 284 | |
Dale Johannesen | 0488fb6 | 2010-09-30 23:57:10 +0000 | [diff] [blame] | 285 | declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) |
Bill Wendling | 1b7a81d | 2007-03-16 09:44:46 +0000 | [diff] [blame] | 286 | |
Dale Johannesen | 0488fb6 | 2010-09-30 23:57:10 +0000 | [diff] [blame] | 287 | declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) |
Bill Wendling | 74027e9 | 2007-03-15 21:24:36 +0000 | [diff] [blame] | 288 | |
Dale Johannesen | 0488fb6 | 2010-09-30 23:57:10 +0000 | [diff] [blame] | 289 | declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) |
Bill Wendling | 74027e9 | 2007-03-15 21:24:36 +0000 | [diff] [blame] | 290 | |
Bill Wendling | 71b91ac | 2007-03-08 22:14:51 +0000 | [diff] [blame] | 291 | declare void @llvm.x86.mmx.emms() |
Dale Johannesen | 0488fb6 | 2010-09-30 23:57:10 +0000 | [diff] [blame] | 292 | |
| 293 | declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) |
| 294 | declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) |
| 295 | declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) |
| 296 | declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) |
| 297 | declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) |
| 298 | declare x86_mmx @llvm.x86.mmx.padds.d(x86_mmx, x86_mmx) |
| 299 | declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) |
| 300 | declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) |
| 301 | declare x86_mmx @llvm.x86.mmx.psubs.d(x86_mmx, x86_mmx) |
| 302 | declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) |
| 303 | declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) |
| 304 | declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) |
| 305 | declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) |
| 306 | declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) |
| 307 | declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) |
| 308 | declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) |
| 309 | |