Jatin Bhateja | 1a86c38 | 2017-09-21 09:53:21 +0000 | [diff] [blame] | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| 2 | ; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s -check-prefix=X64 |
| 3 | ; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s -check-prefix=X86 |
| 4 | |
| 5 | declare float @llvm.sqrt.f32(float %x); |
| 6 | |
| 7 | define float @fast_recip_sqrt(float %x) { |
| 8 | ; X64-LABEL: fast_recip_sqrt: |
| 9 | ; X64: # BB#0: |
| 10 | ; X64-NEXT: sqrtss %xmm0, %xmm1 |
| 11 | ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| 12 | ; X64-NEXT: divss %xmm1, %xmm0 |
| 13 | ; X64-NEXT: retq |
| 14 | ; |
| 15 | ; X86-LABEL: fast_recip_sqrt: |
| 16 | ; X86: # BB#0: |
| 17 | ; X86-NEXT: flds {{[0-9]+}}(%esp) |
| 18 | ; X86-NEXT: fsqrt |
| 19 | ; X86-NEXT: fld1 |
| 20 | ; X86-NEXT: fdivp %st(1) |
| 21 | ; X86-NEXT: retl |
| 22 | %y = call fast float @llvm.sqrt.f32(float %x) |
| 23 | %z = fdiv fast float 1.0, %y |
| 24 | ret float %z |
| 25 | } |
| 26 | |
| 27 | declare float @llvm.fmuladd.f32(float %a, float %b, float %c); |
| 28 | |
| 29 | define float @fast_fmuladd_opts(float %a , float %b , float %c) { |
| 30 | ; X64-LABEL: fast_fmuladd_opts: |
| 31 | ; X64: # BB#0: |
| 32 | ; X64-NEXT: movaps %xmm0, %xmm1 |
| 33 | ; X64-NEXT: addss %xmm1, %xmm1 |
| 34 | ; X64-NEXT: addss %xmm0, %xmm1 |
| 35 | ; X64-NEXT: movaps %xmm1, %xmm0 |
| 36 | ; X64-NEXT: retq |
| 37 | ; |
| 38 | ; X86-LABEL: fast_fmuladd_opts: |
| 39 | ; X86: # BB#0: |
| 40 | ; X86-NEXT: flds {{[0-9]+}}(%esp) |
| 41 | ; X86-NEXT: fld %st(0) |
| 42 | ; X86-NEXT: fadd %st(1) |
| 43 | ; X86-NEXT: faddp %st(1) |
| 44 | ; X86-NEXT: retl |
| 45 | %res = call fast float @llvm.fmuladd.f32(float %a, float 2.0, float %a) |
| 46 | ret float %res |
| 47 | } |
Sanjay Patel | 58f02af | 2017-09-21 17:40:58 +0000 | [diff] [blame^] | 48 | |
| 49 | ; The multiply is strict. |
| 50 | |
| 51 | define double @not_so_fast_mul_add(double %x) { |
| 52 | ; X64-LABEL: not_so_fast_mul_add: |
| 53 | ; X64: # BB#0: |
| 54 | ; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero |
| 55 | ; X64-NEXT: mulsd %xmm0, %xmm1 |
| 56 | ; X64-NEXT: addsd %xmm1, %xmm0 |
| 57 | ; X64-NEXT: retq |
| 58 | ; |
| 59 | ; X86-LABEL: not_so_fast_mul_add: |
| 60 | ; X86: # BB#0: |
| 61 | ; X86-NEXT: fldl {{[0-9]+}}(%esp) |
| 62 | ; X86-NEXT: fld %st(0) |
| 63 | ; X86-NEXT: fmull {{\.LCPI.*}} |
| 64 | ; X86-NEXT: faddp %st(1) |
| 65 | ; X86-NEXT: retl |
| 66 | %m = fmul double %x, 4.2 |
| 67 | %a = fadd fast double %m, %x |
| 68 | ret double %a |
| 69 | } |
| 70 | |
| 71 | ; The sqrt is strict. |
| 72 | |
| 73 | define float @not_so_fast_recip_sqrt(float %x) { |
| 74 | ; X64-LABEL: not_so_fast_recip_sqrt: |
| 75 | ; X64: # BB#0: |
| 76 | ; X64-NEXT: sqrtss %xmm0, %xmm1 |
| 77 | ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| 78 | ; X64-NEXT: divss %xmm1, %xmm0 |
| 79 | ; X64-NEXT: retq |
| 80 | ; |
| 81 | ; X86-LABEL: not_so_fast_recip_sqrt: |
| 82 | ; X86: # BB#0: |
| 83 | ; X86-NEXT: flds {{[0-9]+}}(%esp) |
| 84 | ; X86-NEXT: fsqrt |
| 85 | ; X86-NEXT: fld1 |
| 86 | ; X86-NEXT: fdivp %st(1) |
| 87 | ; X86-NEXT: retl |
| 88 | %y = call float @llvm.sqrt.f32(float %x) |
| 89 | %z = fdiv fast float 1.0, %y |
| 90 | ret float %z |
| 91 | } |
| 92 | |