Chad Rosier | fa30c9b | 2015-10-07 17:39:18 +0000 | [diff] [blame] | 1 | ; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-neon-syntax=apple -verify-machineinstrs -o - %s | FileCheck %s |
| 2 | |
| 3 | ; CHECK-LABEL: test1 |
| 4 | ; CHECK-NOT: fmul.2s |
| 5 | ; CHECK: fcvtzs.2s v0, v0, #4 |
| 6 | ; CHECK: ret |
| 7 | define <2 x i32> @test1(<2 x float> %f) { |
| 8 | %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00> |
| 9 | %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32> |
| 10 | ret <2 x i32> %vcvt.i |
| 11 | } |
| 12 | |
| 13 | ; CHECK-LABEL: test2 |
| 14 | ; CHECK-NOT: fmul.4s |
| 15 | ; CHECK: fcvtzs.4s v0, v0, #3 |
| 16 | ; CHECK: ret |
| 17 | define <4 x i32> @test2(<4 x float> %f) { |
| 18 | %mul.i = fmul <4 x float> %f, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00> |
| 19 | %vcvt.i = fptosi <4 x float> %mul.i to <4 x i32> |
| 20 | ret <4 x i32> %vcvt.i |
| 21 | } |
| 22 | |
| 23 | ; CHECK-LABEL: test3 |
| 24 | ; CHECK-NOT: fmul.2d |
| 25 | ; CHECK: fcvtzs.2d v0, v0, #5 |
| 26 | ; CHECK: ret |
| 27 | define <2 x i64> @test3(<2 x double> %d) { |
| 28 | %mul.i = fmul <2 x double> %d, <double 32.000000e+00, double 32.000000e+00> |
| 29 | %vcvt.i = fptosi <2 x double> %mul.i to <2 x i64> |
| 30 | ret <2 x i64> %vcvt.i |
| 31 | } |
| 32 | |
| 33 | ; Truncate double to i32 |
| 34 | ; CHECK-LABEL: test4 |
| 35 | ; CHECK-NOT: fmul.2d v0, v0, #4 |
| 36 | ; CHECK: fcvtzs.2d v0, v0 |
| 37 | ; CHECK: xtn.2s |
| 38 | ; CHECK: ret |
| 39 | define <2 x i32> @test4(<2 x double> %d) { |
| 40 | %mul.i = fmul <2 x double> %d, <double 16.000000e+00, double 16.000000e+00> |
| 41 | %vcvt.i = fptosi <2 x double> %mul.i to <2 x i32> |
| 42 | ret <2 x i32> %vcvt.i |
| 43 | } |
| 44 | |
| 45 | ; Truncate float to i16 |
| 46 | ; CHECK-LABEL: test5 |
| 47 | ; CHECK-NOT: fmul.2s |
| 48 | ; CHECK: fcvtzs.2s v0, v0, #4 |
| 49 | ; CHECK: ret |
| 50 | define <2 x i16> @test5(<2 x float> %f) { |
| 51 | %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00> |
| 52 | %vcvt.i = fptosi <2 x float> %mul.i to <2 x i16> |
| 53 | ret <2 x i16> %vcvt.i |
| 54 | } |
| 55 | |
| 56 | ; Don't convert float to i64 |
| 57 | ; CHECK-LABEL: test6 |
| 58 | ; CHECK: fmov.2s v1, #16.00000000 |
| 59 | ; CHECK: fmul.2s v0, v0, v1 |
| 60 | ; CHECK: fcvtl v0.2d, v0.2s |
| 61 | ; CHECK: fcvtzs.2d v0, v0 |
| 62 | ; CHECK: ret |
| 63 | define <2 x i64> @test6(<2 x float> %f) { |
| 64 | %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00> |
| 65 | %vcvt.i = fptosi <2 x float> %mul.i to <2 x i64> |
| 66 | ret <2 x i64> %vcvt.i |
| 67 | } |
| 68 | |
| 69 | ; Check unsigned conversion. |
| 70 | ; CHECK-LABEL: test7 |
| 71 | ; CHECK-NOT: fmul.2s |
| 72 | ; CHECK: fcvtzu.2s v0, v0, #4 |
| 73 | ; CHECK: ret |
| 74 | define <2 x i32> @test7(<2 x float> %f) { |
| 75 | %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00> |
| 76 | %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32> |
| 77 | ret <2 x i32> %vcvt.i |
| 78 | } |
| 79 | |
| 80 | ; Test which should not fold due to non-power of 2. |
| 81 | ; CHECK-LABEL: test8 |
| 82 | ; CHECK: fmov.2s v1, #17.00000000 |
| 83 | ; CHECK: fmul.2s v0, v0, v1 |
| 84 | ; CHECK: fcvtzu.2s v0, v0 |
| 85 | ; CHECK: ret |
| 86 | define <2 x i32> @test8(<2 x float> %f) { |
| 87 | %mul.i = fmul <2 x float> %f, <float 17.000000e+00, float 17.000000e+00> |
| 88 | %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32> |
| 89 | ret <2 x i32> %vcvt.i |
| 90 | } |
| 91 | |
| 92 | ; Test which should not fold due to non-matching power of 2. |
| 93 | ; CHECK-LABEL: test9 |
| 94 | ; CHECK: fmul.2s v0, v0, v1 |
| 95 | ; CHECK: fcvtzu.2s v0, v0 |
| 96 | ; CHECK: ret |
| 97 | define <2 x i32> @test9(<2 x float> %f) { |
| 98 | %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 8.000000e+00> |
| 99 | %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32> |
| 100 | ret <2 x i32> %vcvt.i |
| 101 | } |
| 102 | |
| 103 | ; Don't combine all undefs. |
| 104 | ; CHECK-LABEL: test10 |
| 105 | ; CHECK: fmul.2s v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} |
| 106 | ; CHECK: fcvtzu.2s v{{[0-9]+}}, v{{[0-9]+}} |
| 107 | ; CHECK: ret |
| 108 | define <2 x i32> @test10(<2 x float> %f) { |
| 109 | %mul.i = fmul <2 x float> %f, <float undef, float undef> |
| 110 | %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32> |
| 111 | ret <2 x i32> %vcvt.i |
| 112 | } |
| 113 | |
| 114 | ; Combine if mix of undef and pow2. |
| 115 | ; CHECK-LABEL: test11 |
| 116 | ; CHECK: fcvtzu.2s v0, v0, #3 |
| 117 | ; CHECK: ret |
| 118 | define <2 x i32> @test11(<2 x float> %f) { |
| 119 | %mul.i = fmul <2 x float> %f, <float undef, float 8.000000e+00> |
| 120 | %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32> |
| 121 | ret <2 x i32> %vcvt.i |
| 122 | } |
| 123 | |
| 124 | ; Don't combine when multiplied by 0.0. |
| 125 | ; CHECK-LABEL: test12 |
| 126 | ; CHECK: fmul.2s v0, v0, v1 |
| 127 | ; CHECK: fcvtzs.2s v0, v0 |
| 128 | ; CHECK: ret |
| 129 | define <2 x i32> @test12(<2 x float> %f) { |
| 130 | %mul.i = fmul <2 x float> %f, <float 0.000000e+00, float 0.000000e+00> |
| 131 | %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32> |
| 132 | ret <2 x i32> %vcvt.i |
| 133 | } |
| 134 | |
| 135 | ; Test which should not fold due to power of 2 out of range (i.e., 2^33). |
| 136 | ; CHECK-LABEL: test13 |
| 137 | ; CHECK: fmul.2s v0, v0, v1 |
| 138 | ; CHECK: fcvtzs.2s v0, v0 |
| 139 | ; CHECK: ret |
| 140 | define <2 x i32> @test13(<2 x float> %f) { |
| 141 | %mul.i = fmul <2 x float> %f, <float 0x4200000000000000, float 0x4200000000000000> |
| 142 | %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32> |
| 143 | ret <2 x i32> %vcvt.i |
| 144 | } |
| 145 | |
| 146 | ; Test case where const is max power of 2 (i.e., 2^32). |
| 147 | ; CHECK-LABEL: test14 |
| 148 | ; CHECK: fcvtzs.2s v0, v0, #32 |
| 149 | ; CHECK: ret |
| 150 | define <2 x i32> @test14(<2 x float> %f) { |
| 151 | %mul.i = fmul <2 x float> %f, <float 0x41F0000000000000, float 0x41F0000000000000> |
| 152 | %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32> |
| 153 | ret <2 x i32> %vcvt.i |
| 154 | } |