Chih-Hung Hsieh | 7993e18 | 2015-12-14 22:08:36 +0000 | [diff] [blame] | 1 | ; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s |
| 2 | ; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s |
| 3 | |
| 4 | ; These tests were generated from simplified libm C code. |
| 5 | ; When compiled for the x86_64-linux-android target, |
| 6 | ; long double is mapped to f128 type that should be passed |
| 7 | ; in SSE registers. When the f128 type calling convention |
| 8 | ; problem was fixed, old llvm code failed to handle f128 values |
| 9 | ; in several f128/i128 type operations. These unit tests hopefully |
| 10 | ; will catch regression in any future change in this area. |
| 11 | ; To modified or enhance these test cases, please consult libm |
| 12 | ; code pattern and compile with -target x86_64-linux-android |
| 13 | ; to generate IL. The __float128 keyword if not accepted by |
| 14 | ; clang, just define it to "long double". |
| 15 | ; |
| 16 | |
| 17 | ; typedef long double __float128; |
| 18 | ; union IEEEl2bits { |
| 19 | ; __float128 e; |
| 20 | ; struct { |
| 21 | ; unsigned long manl :64; |
| 22 | ; unsigned long manh :48; |
| 23 | ; unsigned int exp :15; |
| 24 | ; unsigned int sign :1; |
| 25 | ; } bits; |
| 26 | ; struct { |
| 27 | ; unsigned long manl :64; |
| 28 | ; unsigned long manh :48; |
| 29 | ; unsigned int expsign :16; |
| 30 | ; } xbits; |
| 31 | ; }; |
| 32 | |
| 33 | ; C code: |
| 34 | ; void foo(__float128 x); |
| 35 | ; void TestUnionLD1(__float128 s, unsigned long n) { |
| 36 | ; union IEEEl2bits u; |
| 37 | ; __float128 w; |
| 38 | ; u.e = s; |
| 39 | ; u.bits.manh = n; |
| 40 | ; w = u.e; |
| 41 | ; foo(w); |
| 42 | ; } |
| 43 | define void @TestUnionLD1(fp128 %s, i64 %n) #0 { |
| 44 | entry: |
| 45 | %0 = bitcast fp128 %s to i128 |
| 46 | %1 = zext i64 %n to i128 |
| 47 | %bf.value = shl nuw i128 %1, 64 |
| 48 | %bf.shl = and i128 %bf.value, 5192296858534809181786422619668480 |
| 49 | %bf.clear = and i128 %0, -5192296858534809181786422619668481 |
| 50 | %bf.set = or i128 %bf.shl, %bf.clear |
| 51 | %2 = bitcast i128 %bf.set to fp128 |
| 52 | tail call void @foo(fp128 %2) #2 |
| 53 | ret void |
| 54 | ; CHECK-LABEL: TestUnionLD1: |
| 55 | ; CHECK: movaps %xmm0, -24(%rsp) |
| 56 | ; CHECK-NEXT: movq -24(%rsp), %rax |
| 57 | ; CHECK-NEXT: movabsq $281474976710655, %rcx |
| 58 | ; CHECK-NEXT: andq %rdi, %rcx |
| 59 | ; CHECK-NEXT: movabsq $-281474976710656, %rdx |
| 60 | ; CHECK-NEXT: andq -16(%rsp), %rdx |
| 61 | ; CHECK-NEXT: movq %rax, -40(%rsp) |
| 62 | ; CHECK-NEXT: orq %rcx, %rdx |
| 63 | ; CHECK-NEXT: movq %rdx, -32(%rsp) |
| 64 | ; CHECK-NEXT: movaps -40(%rsp), %xmm0 |
| 65 | ; CHECK-NEXT: jmp foo |
| 66 | } |
| 67 | |
| 68 | ; C code: |
| 69 | ; __float128 TestUnionLD2(__float128 s) { |
| 70 | ; union IEEEl2bits u; |
| 71 | ; __float128 w; |
| 72 | ; u.e = s; |
| 73 | ; u.bits.manl = 0; |
| 74 | ; w = u.e; |
| 75 | ; return w; |
| 76 | ; } |
| 77 | define fp128 @TestUnionLD2(fp128 %s) #0 { |
| 78 | entry: |
| 79 | %0 = bitcast fp128 %s to i128 |
| 80 | %bf.clear = and i128 %0, -18446744073709551616 |
| 81 | %1 = bitcast i128 %bf.clear to fp128 |
| 82 | ret fp128 %1 |
| 83 | ; CHECK-LABEL: TestUnionLD2: |
| 84 | ; CHECK: movaps %xmm0, -24(%rsp) |
| 85 | ; CHECK-NEXT: movq -16(%rsp), %rax |
| 86 | ; CHECK-NEXT: movq %rax, -32(%rsp) |
| 87 | ; CHECK-NEXT: movq $0, -40(%rsp) |
| 88 | ; CHECK-NEXT: movaps -40(%rsp), %xmm0 |
| 89 | ; CHECK-NEXT: retq |
| 90 | } |
| 91 | |
| 92 | ; C code: |
| 93 | ; __float128 TestI128_1(__float128 x) |
| 94 | ; { |
| 95 | ; union IEEEl2bits z; |
| 96 | ; z.e = x; |
| 97 | ; z.bits.sign = 0; |
| 98 | ; return (z.e < 0.1L) ? 1.0L : 2.0L; |
| 99 | ; } |
| 100 | define fp128 @TestI128_1(fp128 %x) #0 { |
| 101 | entry: |
| 102 | %0 = bitcast fp128 %x to i128 |
| 103 | %bf.clear = and i128 %0, 170141183460469231731687303715884105727 |
| 104 | %1 = bitcast i128 %bf.clear to fp128 |
| 105 | %cmp = fcmp olt fp128 %1, 0xL999999999999999A3FFB999999999999 |
| 106 | %cond = select i1 %cmp, fp128 0xL00000000000000003FFF000000000000, fp128 0xL00000000000000004000000000000000 |
| 107 | ret fp128 %cond |
| 108 | ; CHECK-LABEL: TestI128_1: |
| 109 | ; CHECK: movaps %xmm0, |
| 110 | ; CHECK: movabsq $9223372036854775807, |
| 111 | ; CHECK: callq __lttf2 |
| 112 | ; CHECK: testl %eax, %eax |
| 113 | ; CHECK: movaps {{.*}}, %xmm0 |
| 114 | ; CHECK: retq |
| 115 | } |
| 116 | |
| 117 | ; C code: |
| 118 | ; __float128 TestI128_2(__float128 x, __float128 y) |
| 119 | ; { |
| 120 | ; unsigned short hx; |
| 121 | ; union IEEEl2bits ge_u; |
| 122 | ; ge_u.e = x; |
| 123 | ; hx = ge_u.xbits.expsign; |
| 124 | ; return (hx & 0x8000) == 0 ? x : y; |
| 125 | ; } |
| 126 | define fp128 @TestI128_2(fp128 %x, fp128 %y) #0 { |
| 127 | entry: |
| 128 | %0 = bitcast fp128 %x to i128 |
| 129 | %cmp = icmp sgt i128 %0, -1 |
| 130 | %cond = select i1 %cmp, fp128 %x, fp128 %y |
| 131 | ret fp128 %cond |
| 132 | ; CHECK-LABEL: TestI128_2: |
| 133 | ; CHECK: movaps %xmm0, -24(%rsp) |
| 134 | ; CHECK-NEXT: cmpq $0, -16(%rsp) |
| 135 | ; CHECK-NEXT: jns |
| 136 | ; CHECK: movaps %xmm1, %xmm0 |
| 137 | ; CHECK: retq |
| 138 | } |
| 139 | |
| 140 | ; C code: |
| 141 | ; __float128 TestI128_3(__float128 x, int *ex) |
| 142 | ; { |
| 143 | ; union IEEEl2bits u; |
| 144 | ; u.e = x; |
| 145 | ; if (u.bits.exp == 0) { |
| 146 | ; u.e *= 0x1.0p514; |
| 147 | ; u.bits.exp = 0x3ffe; |
| 148 | ; } |
| 149 | ; return (u.e); |
| 150 | ; } |
| 151 | define fp128 @TestI128_3(fp128 %x, i32* nocapture readnone %ex) #0 { |
| 152 | entry: |
| 153 | %0 = bitcast fp128 %x to i128 |
| 154 | %bf.cast = and i128 %0, 170135991163610696904058773219554885632 |
| 155 | %cmp = icmp eq i128 %bf.cast, 0 |
| 156 | br i1 %cmp, label %if.then, label %if.end |
| 157 | |
| 158 | if.then: ; preds = %entry |
| 159 | %mul = fmul fp128 %x, 0xL00000000000000004201000000000000 |
| 160 | %1 = bitcast fp128 %mul to i128 |
| 161 | %bf.clear4 = and i128 %1, -170135991163610696904058773219554885633 |
| 162 | %bf.set = or i128 %bf.clear4, 85060207136517546210586590865283612672 |
| 163 | br label %if.end |
| 164 | |
| 165 | if.end: ; preds = %if.then, %entry |
| 166 | %u.sroa.0.0 = phi i128 [ %bf.set, %if.then ], [ %0, %entry ] |
| 167 | %2 = bitcast i128 %u.sroa.0.0 to fp128 |
| 168 | ret fp128 %2 |
| 169 | ; CHECK-LABEL: TestI128_3: |
| 170 | ; CHECK: movaps %xmm0, |
| 171 | ; CHECK: movabsq $9223090561878065152, |
| 172 | ; CHECK: testq |
| 173 | ; CHECK: callq __multf3 |
| 174 | ; CHECK-NEXT: movaps %xmm0 |
| 175 | ; CHECK: movabsq $-9223090561878065153, |
| 176 | ; CHECK: movabsq $4611123068473966592, |
| 177 | ; CHECK: retq |
| 178 | } |
| 179 | |
| 180 | ; C code: |
| 181 | ; __float128 TestI128_4(__float128 x) |
| 182 | ; { |
| 183 | ; union IEEEl2bits u; |
| 184 | ; __float128 df; |
| 185 | ; u.e = x; |
| 186 | ; u.xbits.manl = 0; |
| 187 | ; df = u.e; |
| 188 | ; return x + df; |
| 189 | ; } |
| 190 | define fp128 @TestI128_4(fp128 %x) #0 { |
| 191 | entry: |
| 192 | %0 = bitcast fp128 %x to i128 |
| 193 | %bf.clear = and i128 %0, -18446744073709551616 |
| 194 | %1 = bitcast i128 %bf.clear to fp128 |
| 195 | %add = fadd fp128 %1, %x |
| 196 | ret fp128 %add |
| 197 | ; CHECK-LABEL: TestI128_4: |
| 198 | ; CHECK: movaps %xmm0, %xmm1 |
| 199 | ; CHECK-NEXT: movaps %xmm1, 16(%rsp) |
| 200 | ; CHECK-NEXT: movq 24(%rsp), %rax |
| 201 | ; CHECK-NEXT: movq %rax, 8(%rsp) |
| 202 | ; CHECK-NEXT: movq $0, (%rsp) |
| 203 | ; CHECK-NEXT: movaps (%rsp), %xmm0 |
| 204 | ; CHECK-NEXT: callq __addtf3 |
| 205 | ; CHECK: retq |
| 206 | } |
| 207 | |
| 208 | @v128 = common global i128 0, align 16 |
| 209 | @v128_2 = common global i128 0, align 16 |
| 210 | |
| 211 | ; C code: |
| 212 | ; unsigned __int128 v128, v128_2; |
| 213 | ; void TestShift128_2() { |
| 214 | ; v128 = ((v128 << 96) | v128_2); |
| 215 | ; } |
| 216 | define void @TestShift128_2() #2 { |
| 217 | entry: |
| 218 | %0 = load i128, i128* @v128, align 16 |
| 219 | %shl = shl i128 %0, 96 |
| 220 | %1 = load i128, i128* @v128_2, align 16 |
| 221 | %or = or i128 %shl, %1 |
| 222 | store i128 %or, i128* @v128, align 16 |
| 223 | ret void |
| 224 | ; CHECK-LABEL: TestShift128_2: |
| 225 | ; CHECK: movq v128(%rip), %rax |
| 226 | ; CHECK-NEXT: shlq $32, %rax |
| 227 | ; CHECK-NEXT: movq v128_2(%rip), %rcx |
| 228 | ; CHECK-NEXT: orq v128_2+8(%rip), %rax |
| 229 | ; CHECK-NEXT: movq %rcx, v128(%rip) |
| 230 | ; CHECK-NEXT: movq %rax, v128+8(%rip) |
| 231 | ; CHECK-NEXT: retq |
| 232 | } |
| 233 | |
| 234 | define fp128 @acosl(fp128 %x) #0 { |
| 235 | entry: |
| 236 | %0 = bitcast fp128 %x to i128 |
| 237 | %bf.clear = and i128 %0, -18446744073709551616 |
| 238 | %1 = bitcast i128 %bf.clear to fp128 |
| 239 | %add = fadd fp128 %1, %x |
| 240 | ret fp128 %add |
| 241 | ; CHECK-LABEL: acosl: |
| 242 | ; CHECK: movaps %xmm0, %xmm1 |
| 243 | ; CHECK-NEXT: movaps %xmm1, 16(%rsp) |
| 244 | ; CHECK-NEXT: movq 24(%rsp), %rax |
| 245 | ; CHECK-NEXT: movq %rax, 8(%rsp) |
| 246 | ; CHECK-NEXT: movq $0, (%rsp) |
| 247 | ; CHECK-NEXT: movaps (%rsp), %xmm0 |
| 248 | ; CHECK-NEXT: callq __addtf3 |
| 249 | ; CHECK: retq |
| 250 | } |
| 251 | |
| 252 | ; Compare i128 values and check i128 constants. |
| 253 | define fp128 @TestComp(fp128 %x, fp128 %y) #0 { |
| 254 | entry: |
| 255 | %0 = bitcast fp128 %x to i128 |
| 256 | %cmp = icmp sgt i128 %0, -1 |
| 257 | %cond = select i1 %cmp, fp128 %x, fp128 %y |
| 258 | ret fp128 %cond |
| 259 | ; CHECK-LABEL: TestComp: |
| 260 | ; CHECK: movaps %xmm0, -24(%rsp) |
| 261 | ; CHECK-NEXT: cmpq $0, -16(%rsp) |
| 262 | ; CHECK-NEXT: jns |
| 263 | ; CHECK: movaps %xmm1, %xmm0 |
| 264 | ; CHECK: retq |
| 265 | } |
| 266 | |
| 267 | declare void @foo(fp128) #1 |
| 268 | |
| 269 | ; Test logical operations on fp128 values. |
| 270 | define fp128 @TestFABS_LD(fp128 %x) #0 { |
| 271 | entry: |
| 272 | %call = tail call fp128 @fabsl(fp128 %x) #2 |
| 273 | ret fp128 %call |
| 274 | ; CHECK-LABEL: TestFABS_LD |
| 275 | ; CHECK: andps {{.*}}, %xmm0 |
| 276 | ; CHECK-NEXT: retq |
| 277 | } |
| 278 | |
| 279 | declare fp128 @fabsl(fp128) #1 |
| 280 | |
| 281 | declare fp128 @copysignl(fp128, fp128) #1 |
| 282 | |
| 283 | ; Test more complicated logical operations generated from copysignl. |
| 284 | define void @TestCopySign({ fp128, fp128 }* noalias nocapture sret %agg.result, { fp128, fp128 }* byval nocapture readonly align 16 %z) #0 { |
| 285 | entry: |
| 286 | %z.realp = getelementptr inbounds { fp128, fp128 }, { fp128, fp128 }* %z, i64 0, i32 0 |
| 287 | %z.real = load fp128, fp128* %z.realp, align 16 |
| 288 | %z.imagp = getelementptr inbounds { fp128, fp128 }, { fp128, fp128 }* %z, i64 0, i32 1 |
| 289 | %z.imag4 = load fp128, fp128* %z.imagp, align 16 |
| 290 | %cmp = fcmp ogt fp128 %z.real, %z.imag4 |
| 291 | %sub = fsub fp128 %z.imag4, %z.imag4 |
| 292 | br i1 %cmp, label %if.then, label %cleanup |
| 293 | |
| 294 | if.then: ; preds = %entry |
| 295 | %call = tail call fp128 @fabsl(fp128 %sub) #2 |
| 296 | br label %cleanup |
| 297 | |
| 298 | cleanup: ; preds = %entry, %if.then |
| 299 | %z.real.sink = phi fp128 [ %z.real, %if.then ], [ %sub, %entry ] |
| 300 | %call.sink = phi fp128 [ %call, %if.then ], [ %z.real, %entry ] |
| 301 | %call5 = tail call fp128 @copysignl(fp128 %z.real.sink, fp128 %z.imag4) #2 |
| 302 | %0 = getelementptr inbounds { fp128, fp128 }, { fp128, fp128 }* %agg.result, i64 0, i32 0 |
| 303 | %1 = getelementptr inbounds { fp128, fp128 }, { fp128, fp128 }* %agg.result, i64 0, i32 1 |
| 304 | store fp128 %call.sink, fp128* %0, align 16 |
| 305 | store fp128 %call5, fp128* %1, align 16 |
| 306 | ret void |
| 307 | ; CHECK-LABEL: TestCopySign |
| 308 | ; CHECK-NOT: call |
| 309 | ; CHECK: callq __subtf3 |
| 310 | ; CHECK-NOT: call |
| 311 | ; CHECK: callq __gttf2 |
| 312 | ; CHECK-NOT: call |
| 313 | ; CHECK: andps {{.*}}, %xmm0 |
| 314 | ; CHECK: retq |
| 315 | } |
| 316 | |
| 317 | |
| 318 | attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+ssse3,+sse3,+popcnt,+sse,+sse2,+sse4.1,+sse4.2" "unsafe-fp-math"="false" "use-soft-float"="false" } |
| 319 | attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+ssse3,+sse3,+popcnt,+sse,+sse2,+sse4.1,+sse4.2" "unsafe-fp-math"="false" "use-soft-float"="false" } |
| 320 | attributes #2 = { nounwind readnone } |