Jatin Bhateja | 1a86c38 | 2017-09-21 09:53:21 +0000 | [diff] [blame] | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| 2 | ; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s -check-prefix=X64 |
| 3 | ; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s -check-prefix=X86 |
| 4 | |
| 5 | declare float @llvm.sqrt.f32(float %x); |
| 6 | |
| 7 | define float @fast_recip_sqrt(float %x) { |
| 8 | ; X64-LABEL: fast_recip_sqrt: |
| 9 | ; X64: # BB#0: |
| 10 | ; X64-NEXT: sqrtss %xmm0, %xmm1 |
| 11 | ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| 12 | ; X64-NEXT: divss %xmm1, %xmm0 |
| 13 | ; X64-NEXT: retq |
| 14 | ; |
| 15 | ; X86-LABEL: fast_recip_sqrt: |
| 16 | ; X86: # BB#0: |
| 17 | ; X86-NEXT: flds {{[0-9]+}}(%esp) |
| 18 | ; X86-NEXT: fsqrt |
| 19 | ; X86-NEXT: fld1 |
| 20 | ; X86-NEXT: fdivp %st(1) |
| 21 | ; X86-NEXT: retl |
| 22 | %y = call fast float @llvm.sqrt.f32(float %x) |
| 23 | %z = fdiv fast float 1.0, %y |
| 24 | ret float %z |
| 25 | } |
| 26 | |
| 27 | declare float @llvm.fmuladd.f32(float %a, float %b, float %c); |
| 28 | |
| 29 | define float @fast_fmuladd_opts(float %a , float %b , float %c) { |
| 30 | ; X64-LABEL: fast_fmuladd_opts: |
| 31 | ; X64: # BB#0: |
| 32 | ; X64-NEXT: movaps %xmm0, %xmm1 |
Geoff Berry | bfc5fb4 | 2017-10-02 22:01:37 +0000 | [diff] [blame] | 33 | ; X64-NEXT: addss %xmm0, %xmm1 |
Jatin Bhateja | 1a86c38 | 2017-09-21 09:53:21 +0000 | [diff] [blame] | 34 | ; X64-NEXT: addss %xmm0, %xmm1 |
| 35 | ; X64-NEXT: movaps %xmm1, %xmm0 |
| 36 | ; X64-NEXT: retq |
| 37 | ; |
| 38 | ; X86-LABEL: fast_fmuladd_opts: |
| 39 | ; X86: # BB#0: |
| 40 | ; X86-NEXT: flds {{[0-9]+}}(%esp) |
| 41 | ; X86-NEXT: fld %st(0) |
| 42 | ; X86-NEXT: fadd %st(1) |
| 43 | ; X86-NEXT: faddp %st(1) |
| 44 | ; X86-NEXT: retl |
| 45 | %res = call fast float @llvm.fmuladd.f32(float %a, float 2.0, float %a) |
| 46 | ret float %res |
| 47 | } |
Sanjay Patel | 58f02af | 2017-09-21 17:40:58 +0000 | [diff] [blame] | 48 | |
| 49 | ; The multiply is strict. |
| 50 | |
Jatin Bhateja | c034d36 | 2017-09-22 05:48:20 +0000 | [diff] [blame] | 51 | @mul1 = common global double 0.000000e+00, align 4 |
| 52 | |
Sanjay Patel | 58f02af | 2017-09-21 17:40:58 +0000 | [diff] [blame] | 53 | define double @not_so_fast_mul_add(double %x) { |
| 54 | ; X64-LABEL: not_so_fast_mul_add: |
| 55 | ; X64: # BB#0: |
| 56 | ; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero |
| 57 | ; X64-NEXT: mulsd %xmm0, %xmm1 |
| 58 | ; X64-NEXT: addsd %xmm1, %xmm0 |
Jatin Bhateja | c034d36 | 2017-09-22 05:48:20 +0000 | [diff] [blame] | 59 | ; X64-NEXT: movsd %xmm1, {{.*}}(%rip) |
Sanjay Patel | 58f02af | 2017-09-21 17:40:58 +0000 | [diff] [blame] | 60 | ; X64-NEXT: retq |
| 61 | ; |
| 62 | ; X86-LABEL: not_so_fast_mul_add: |
| 63 | ; X86: # BB#0: |
| 64 | ; X86-NEXT: fldl {{[0-9]+}}(%esp) |
| 65 | ; X86-NEXT: fld %st(0) |
| 66 | ; X86-NEXT: fmull {{\.LCPI.*}} |
Jatin Bhateja | c034d36 | 2017-09-22 05:48:20 +0000 | [diff] [blame] | 67 | ; X86-NEXT: fadd %st(0), %st(1) |
| 68 | ; X86-NEXT: fstpl mul1 |
Sanjay Patel | 58f02af | 2017-09-21 17:40:58 +0000 | [diff] [blame] | 69 | ; X86-NEXT: retl |
| 70 | %m = fmul double %x, 4.2 |
| 71 | %a = fadd fast double %m, %x |
Jatin Bhateja | c034d36 | 2017-09-22 05:48:20 +0000 | [diff] [blame] | 72 | store double %m, double* @mul1, align 4 |
Sanjay Patel | 58f02af | 2017-09-21 17:40:58 +0000 | [diff] [blame] | 73 | ret double %a |
| 74 | } |
| 75 | |
| 76 | ; The sqrt is strict. |
| 77 | |
Jatin Bhateja | c034d36 | 2017-09-22 05:48:20 +0000 | [diff] [blame] | 78 | @sqrt1 = common global float 0.000000e+00, align 4 |
| 79 | |
Sanjay Patel | 58f02af | 2017-09-21 17:40:58 +0000 | [diff] [blame] | 80 | define float @not_so_fast_recip_sqrt(float %x) { |
| 81 | ; X64-LABEL: not_so_fast_recip_sqrt: |
| 82 | ; X64: # BB#0: |
| 83 | ; X64-NEXT: sqrtss %xmm0, %xmm1 |
| 84 | ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| 85 | ; X64-NEXT: divss %xmm1, %xmm0 |
Jatin Bhateja | c034d36 | 2017-09-22 05:48:20 +0000 | [diff] [blame] | 86 | ; X64-NEXT: movss %xmm1, {{.*}}(%rip) |
Sanjay Patel | 58f02af | 2017-09-21 17:40:58 +0000 | [diff] [blame] | 87 | ; X64-NEXT: retq |
| 88 | ; |
| 89 | ; X86-LABEL: not_so_fast_recip_sqrt: |
| 90 | ; X86: # BB#0: |
| 91 | ; X86-NEXT: flds {{[0-9]+}}(%esp) |
| 92 | ; X86-NEXT: fsqrt |
| 93 | ; X86-NEXT: fld1 |
Jatin Bhateja | c034d36 | 2017-09-22 05:48:20 +0000 | [diff] [blame] | 94 | ; X86-NEXT: fdiv %st(1) |
| 95 | ; X86-NEXT: fxch %st(1) |
| 96 | ; X86-NEXT: fstps sqrt1 |
Sanjay Patel | 58f02af | 2017-09-21 17:40:58 +0000 | [diff] [blame] | 97 | ; X86-NEXT: retl |
| 98 | %y = call float @llvm.sqrt.f32(float %x) |
| 99 | %z = fdiv fast float 1.0, %y |
Jatin Bhateja | c034d36 | 2017-09-22 05:48:20 +0000 | [diff] [blame] | 100 | store float %y, float* @sqrt1, align 4 |
| 101 | %ret = fadd float %z , 14.5 |
Sanjay Patel | 58f02af | 2017-09-21 17:40:58 +0000 | [diff] [blame] | 102 | ret float %z |
| 103 | } |
| 104 | |