| Chad Rosier | aeffffd | 2017-05-11 20:07:24 +0000 | [diff] [blame] | 1 | ; RUN: llc < %s -O3 -mtriple=arm64-apple-ios -enable-unsafe-fp-math | FileCheck %s |
| 2 | ; RUN: llc < %s -O3 -mtriple=arm64-apple-ios -fp-contract=fast | FileCheck %s |
| 3 | |
| Gerolf Hoflehner | 01b3a618 | 2016-04-24 05:14:01 +0000 | [diff] [blame] | 4 | define void @foo_2d(double* %src) { |
| 5 | entry: |
| 6 | %arrayidx1 = getelementptr inbounds double, double* %src, i64 5 |
| 7 | %arrayidx2 = getelementptr inbounds double, double* %src, i64 11 |
| 8 | %tmp = bitcast double* %arrayidx1 to <2 x double>* |
| 9 | br label %for.body |
| 10 | |
| 11 | ; CHECK-LABEL: %for.body |
| 12 | ; CHECK: fmls.2d {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} |
| 13 | ; CHECK: fmls.2d {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[0] |
| 14 | ; CHECK: fmls.d {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}[0] |
| 15 | for.body: ; preds = %for.body, %entry |
| 16 | %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] |
| 17 | %indvars.iv.next = sub nuw nsw i64 %indvars.iv, 1 |
| 18 | %arrayidx3 = getelementptr inbounds double, double* %src, i64 %indvars.iv.next |
| 19 | %tmp1 = load double, double* %arrayidx3, align 8 |
| 20 | %add = fadd fast double %tmp1, %tmp1 |
| 21 | %mul = fmul fast double %add, %add |
| 22 | %e1 = insertelement <2 x double> undef, double %add, i32 0 |
| 23 | %e2 = insertelement <2 x double> %e1, double %add, i32 1 |
| 24 | %sub2 = fsub fast <2 x double> %e2, <double 3.000000e+00, double -3.000000e+00> |
| 25 | %e3 = insertelement <2 x double> undef, double %mul, i32 0 |
| 26 | %e4 = insertelement <2 x double> %e3, double %mul, i32 1 |
| 27 | %mul2 = fmul fast <2 x double> %sub2,<double 3.000000e+00, double -3.000000e+00> |
| 28 | %e5 = insertelement <2 x double> undef, double %add, i32 0 |
| 29 | %e6 = insertelement <2 x double> %e5, double %add, i32 1 |
| 30 | %sub3 = fsub fast <2 x double> <double 3.000000e+00, double -3.000000e+00>, %mul2 |
| 31 | %mulx = fmul fast <2 x double> %sub2, %e2 |
| 32 | %subx = fsub fast <2 x double> %e4, %mulx |
| 33 | %e7 = insertelement <2 x double> undef, double %mul, i32 0 |
| 34 | %e8 = insertelement <2 x double> %e7, double %mul, i32 1 |
| 35 | %e9 = fmul fast <2 x double> %subx, %sub3 |
| 36 | store <2 x double> %e9, <2 x double>* %tmp, align 8 |
| 37 | %e10 = extractelement <2 x double> %sub3, i32 0 |
| 38 | %mul3 = fmul fast double %mul, %e10 |
| 39 | %sub4 = fsub fast double %mul, %mul3 |
| 40 | store double %sub4, double* %arrayidx2, align 8 |
| 41 | %exitcond = icmp eq i64 %indvars.iv.next, 25 |
| 42 | br i1 %exitcond, label %for.end, label %for.body |
| 43 | |
| 44 | for.end: ; preds = %for.body |
| 45 | ret void |
| 46 | } |
| 47 | define void @foo_2s(float* %src) { |
| 48 | entry: |
| 49 | %arrayidx1 = getelementptr inbounds float, float* %src, i64 5 |
| 50 | %arrayidx2 = getelementptr inbounds float, float* %src, i64 11 |
| 51 | %tmp = bitcast float* %arrayidx1 to <2 x float>* |
| 52 | br label %for.body |
| 53 | |
| 54 | ; CHECK-LABEL: %for.body |
| 55 | ; CHECK: fmls.2s {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} |
| 56 | ; CHECK: fmls.2s {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[0] |
| 57 | ; CHECK: fmls.s {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}[0] |
| 58 | for.body: ; preds = %for.body, %entry |
| 59 | %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] |
| 60 | %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 |
| 61 | %arrayidx3 = getelementptr inbounds float, float* %src, i64 %indvars.iv.next |
| 62 | %tmp1 = load float, float* %arrayidx3, align 8 |
| 63 | %add = fadd fast float %tmp1, %tmp1 |
| 64 | %mul = fmul fast float %add, %add |
| 65 | %e1 = insertelement <2 x float> undef, float %add, i32 0 |
| 66 | %e2 = insertelement <2 x float> %e1, float %add, i32 1 |
| 67 | %add2 = fsub fast <2 x float> %e2, <float 3.000000e+00, float -3.000000e+00> |
| 68 | %e3 = insertelement <2 x float> undef, float %mul, i32 0 |
| 69 | %e4 = insertelement <2 x float> %e3, float %mul, i32 1 |
| 70 | %mul2 = fmul fast <2 x float> %add2,<float 3.000000e+00, float -3.000000e+00> |
| 71 | %e5 = insertelement <2 x float> undef, float %add, i32 0 |
| 72 | %e6 = insertelement <2 x float> %e5, float %add, i32 1 |
| 73 | %add3 = fsub fast <2 x float> <float 3.000000e+00, float -3.000000e+00>, %mul2 |
| 74 | %mulx = fmul fast <2 x float> %add2, %e2 |
| 75 | %addx = fsub fast <2 x float> %e4, %mulx |
| 76 | %e7 = insertelement <2 x float> undef, float %mul, i32 0 |
| 77 | %e8 = insertelement <2 x float> %e7, float %mul, i32 1 |
| 78 | %e9 = fmul fast <2 x float> %addx, %add3 |
| 79 | store <2 x float> %e9, <2 x float>* %tmp, align 8 |
| 80 | %e10 = extractelement <2 x float> %add3, i32 0 |
| 81 | %mul3 = fmul fast float %mul, %e10 |
| 82 | %add4 = fsub fast float %mul, %mul3 |
| 83 | store float %add4, float* %arrayidx2, align 8 |
| 84 | %exitcond = icmp eq i64 %indvars.iv.next, 25 |
| 85 | br i1 %exitcond, label %for.end, label %for.body |
| 86 | |
| 87 | for.end: ; preds = %for.body |
| 88 | ret void |
| 89 | } |
| 90 | define void @foo_4s(float* %src) { |
| 91 | entry: |
| 92 | %arrayidx1 = getelementptr inbounds float, float* %src, i64 5 |
| 93 | %arrayidx2 = getelementptr inbounds float, float* %src, i64 11 |
| 94 | %tmp = bitcast float* %arrayidx1 to <4 x float>* |
| 95 | br label %for.body |
| 96 | |
| 97 | ; CHECK-LABEL: %for.body |
| 98 | ; CHECK: fmls.4s {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} |
| 99 | ; CHECK: fmls.4s {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[0] |
| 100 | for.body: ; preds = %for.body, %entry |
| 101 | %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] |
| 102 | %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 |
| 103 | %arrayidx3 = getelementptr inbounds float, float* %src, i64 %indvars.iv.next |
| 104 | %tmp1 = load float, float* %arrayidx3, align 8 |
| 105 | %add = fadd fast float %tmp1, %tmp1 |
| 106 | %mul = fmul fast float %add, %add |
| 107 | %e1 = insertelement <4 x float> undef, float %add, i32 0 |
| 108 | %e2 = insertelement <4 x float> %e1, float %add, i32 1 |
| 109 | %add2 = fadd fast <4 x float> %e2, <float 3.000000e+00, float -3.000000e+00, float 5.000000e+00, float 7.000000e+00> |
| 110 | %e3 = insertelement <4 x float> undef, float %mul, i32 0 |
| 111 | %e4 = insertelement <4 x float> %e3, float %mul, i32 1 |
| 112 | %mul2 = fmul fast <4 x float> %add2,<float 3.000000e+00, float -3.000000e+00, float 5.000000e+00, float 7.000000e+00> |
| 113 | %e5 = insertelement <4 x float> undef, float %add, i32 0 |
| 114 | %e6 = insertelement <4 x float> %e5, float %add, i32 1 |
| 115 | %add3 = fsub fast <4 x float> <float 3.000000e+00, float -3.000000e+00, float 5.000000e+00, float 7.000000e+00> , %mul2 |
| 116 | %mulx = fmul fast <4 x float> %add2, %e2 |
| 117 | %addx = fsub fast <4 x float> %e4, %mulx |
| 118 | %e7 = insertelement <4 x float> undef, float %mul, i32 0 |
| 119 | %e8 = insertelement <4 x float> %e7, float %mul, i32 1 |
| 120 | %e9 = fmul fast <4 x float> %addx, %add3 |
| 121 | store <4 x float> %e9, <4 x float>* %tmp, align 8 |
| 122 | %e10 = extractelement <4 x float> %add3, i32 0 |
| 123 | %mul3 = fmul fast float %mul, %e10 |
| 124 | store float %mul3, float* %arrayidx2, align 8 |
| 125 | %exitcond = icmp eq i64 %indvars.iv.next, 25 |
| 126 | br i1 %exitcond, label %for.end, label %for.body |
| 127 | |
| 128 | for.end: ; preds = %for.body |
| 129 | ret void |
| 130 | } |
| Chad Rosier | aeffffd | 2017-05-11 20:07:24 +0000 | [diff] [blame] | 131 | |
| 132 | ; CHECK-LABEL: test1: |
| 133 | ; CHECK: fnmadd s0, s0, s1, s2 |
| 134 | define float @test1(float %a, float %b, float %c) { |
| 135 | entry: |
| 136 | %0 = fmul float %a, %b |
| 137 | %mul = fsub float -0.000000e+00, %0 |
| 138 | %sub1 = fsub float %mul, %c |
| 139 | ret float %sub1 |
| 140 | } |
| 141 | |
| 142 | ; CHECK-LABEL: test2: |
| 143 | ; CHECK: fnmadd d0, d0, d1, d2 |
| 144 | define double @test2(double %a, double %b, double %c) { |
| 145 | entry: |
| 146 | %0 = fmul double %a, %b |
| 147 | %mul = fsub double -0.000000e+00, %0 |
| 148 | %sub1 = fsub double %mul, %c |
| 149 | ret double %sub1 |
| 150 | } |