Jakob Stoklund Olesen | 2043329 | 2012-10-16 22:51:55 +0000 | [diff] [blame] | 1 | ; RUN: llc < %s -mcpu=cortex-a9 -verify-coalescing -verify-machineinstrs | FileCheck %s |
Jakob Stoklund Olesen | 984997b | 2012-05-15 23:31:35 +0000 | [diff] [blame] | 2 | target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" |
| 3 | target triple = "thumbv7-apple-ios0.0.0" |
| 4 | |
| 5 | ; CHECK: f |
| 6 | ; The vld2 and vst2 are not aligned wrt each other, the second Q loaded is the |
| 7 | ; first one stored. |
| 8 | ; The coalescer must find a super-register larger than QQ to eliminate the copy |
| 9 | ; setting up the vst2 data. |
| 10 | ; CHECK: vld2 |
| 11 | ; CHECK-NOT: vorr |
| 12 | ; CHECK-NOT: vmov |
| 13 | ; CHECK: vst2 |
| 14 | define void @f(float* %p, i32 %c) nounwind ssp { |
| 15 | entry: |
| 16 | %0 = bitcast float* %p to i8* |
Jeroen Ketema | ab99b59 | 2015-09-30 10:56:37 +0000 | [diff] [blame] | 17 | %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32.p0i8(i8* %0, i32 4) |
Jakob Stoklund Olesen | 984997b | 2012-05-15 23:31:35 +0000 | [diff] [blame] | 18 | %vld221 = extractvalue { <4 x float>, <4 x float> } %vld2, 1 |
David Blaikie | 79e6c74 | 2015-02-27 19:29:02 +0000 | [diff] [blame] | 19 | %add.ptr = getelementptr inbounds float, float* %p, i32 8 |
Jakob Stoklund Olesen | 984997b | 2012-05-15 23:31:35 +0000 | [diff] [blame] | 20 | %1 = bitcast float* %add.ptr to i8* |
Jeroen Ketema | ab99b59 | 2015-09-30 10:56:37 +0000 | [diff] [blame] | 21 | tail call void @llvm.arm.neon.vst2.p0i8.v4f32(i8* %1, <4 x float> %vld221, <4 x float> undef, i32 4) |
Jakob Stoklund Olesen | 984997b | 2012-05-15 23:31:35 +0000 | [diff] [blame] | 22 | ret void |
| 23 | } |
| 24 | |
| 25 | ; CHECK: f1 |
| 26 | ; FIXME: This function still has copies. |
| 27 | define void @f1(float* %p, i32 %c) nounwind ssp { |
| 28 | entry: |
| 29 | %0 = bitcast float* %p to i8* |
Jeroen Ketema | ab99b59 | 2015-09-30 10:56:37 +0000 | [diff] [blame] | 30 | %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32.p0i8(i8* %0, i32 4) |
Jakob Stoklund Olesen | 984997b | 2012-05-15 23:31:35 +0000 | [diff] [blame] | 31 | %vld221 = extractvalue { <4 x float>, <4 x float> } %vld2, 1 |
David Blaikie | 79e6c74 | 2015-02-27 19:29:02 +0000 | [diff] [blame] | 32 | %add.ptr = getelementptr inbounds float, float* %p, i32 8 |
Jakob Stoklund Olesen | 984997b | 2012-05-15 23:31:35 +0000 | [diff] [blame] | 33 | %1 = bitcast float* %add.ptr to i8* |
Jeroen Ketema | ab99b59 | 2015-09-30 10:56:37 +0000 | [diff] [blame] | 34 | %vld22 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32.p0i8(i8* %1, i32 4) |
Jakob Stoklund Olesen | 984997b | 2012-05-15 23:31:35 +0000 | [diff] [blame] | 35 | %vld2215 = extractvalue { <4 x float>, <4 x float> } %vld22, 0 |
Jeroen Ketema | ab99b59 | 2015-09-30 10:56:37 +0000 | [diff] [blame] | 36 | tail call void @llvm.arm.neon.vst2.p0i8.v4f32(i8* %1, <4 x float> %vld221, <4 x float> %vld2215, i32 4) |
Jakob Stoklund Olesen | 984997b | 2012-05-15 23:31:35 +0000 | [diff] [blame] | 37 | ret void |
| 38 | } |
| 39 | |
| 40 | ; CHECK: f2 |
| 41 | ; FIXME: This function still has copies. |
| 42 | define void @f2(float* %p, i32 %c) nounwind ssp { |
| 43 | entry: |
| 44 | %0 = bitcast float* %p to i8* |
Jeroen Ketema | ab99b59 | 2015-09-30 10:56:37 +0000 | [diff] [blame] | 45 | %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32.p0i8(i8* %0, i32 4) |
Jakob Stoklund Olesen | 984997b | 2012-05-15 23:31:35 +0000 | [diff] [blame] | 46 | %vld224 = extractvalue { <4 x float>, <4 x float> } %vld2, 1 |
| 47 | br label %do.body |
| 48 | |
| 49 | do.body: ; preds = %do.body, %entry |
| 50 | %qq0.0.1.0 = phi <4 x float> [ %vld224, %entry ], [ %vld2216, %do.body ] |
| 51 | %c.addr.0 = phi i32 [ %c, %entry ], [ %dec, %do.body ] |
| 52 | %p.addr.0 = phi float* [ %p, %entry ], [ %add.ptr, %do.body ] |
David Blaikie | 79e6c74 | 2015-02-27 19:29:02 +0000 | [diff] [blame] | 53 | %add.ptr = getelementptr inbounds float, float* %p.addr.0, i32 8 |
Jakob Stoklund Olesen | 984997b | 2012-05-15 23:31:35 +0000 | [diff] [blame] | 54 | %1 = bitcast float* %add.ptr to i8* |
Jeroen Ketema | ab99b59 | 2015-09-30 10:56:37 +0000 | [diff] [blame] | 55 | %vld22 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32.p0i8(i8* %1, i32 4) |
Jakob Stoklund Olesen | 984997b | 2012-05-15 23:31:35 +0000 | [diff] [blame] | 56 | %vld2215 = extractvalue { <4 x float>, <4 x float> } %vld22, 0 |
| 57 | %vld2216 = extractvalue { <4 x float>, <4 x float> } %vld22, 1 |
Jeroen Ketema | ab99b59 | 2015-09-30 10:56:37 +0000 | [diff] [blame] | 58 | tail call void @llvm.arm.neon.vst2.p0i8.v4f32(i8* %1, <4 x float> %qq0.0.1.0, <4 x float> %vld2215, i32 4) |
Jakob Stoklund Olesen | 984997b | 2012-05-15 23:31:35 +0000 | [diff] [blame] | 59 | %dec = add nsw i32 %c.addr.0, -1 |
| 60 | %tobool = icmp eq i32 %dec, 0 |
| 61 | br i1 %tobool, label %do.end, label %do.body |
| 62 | |
| 63 | do.end: ; preds = %do.body |
| 64 | ret void |
| 65 | } |
| 66 | |
Jeroen Ketema | ab99b59 | 2015-09-30 10:56:37 +0000 | [diff] [blame] | 67 | declare { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32.p0i8(i8*, i32) nounwind readonly |
| 68 | declare void @llvm.arm.neon.vst2.p0i8.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind |
Jakob Stoklund Olesen | 0bb3dd7 | 2012-09-17 23:03:25 +0000 | [diff] [blame] | 69 | |
| 70 | ; CHECK: f3 |
| 71 | ; This function has lane insertions that span basic blocks. |
| 72 | ; The trivial REG_SEQUENCE lowering can't handle that, but the coalescer can. |
| 73 | ; |
| 74 | ; void f3(float *p, float *q) { |
| 75 | ; float32x2_t x; |
| 76 | ; x[1] = p[3]; |
| 77 | ; if (q) |
| 78 | ; x[0] = q[0] + q[1]; |
| 79 | ; else |
| 80 | ; x[0] = p[2]; |
| 81 | ; vst1_f32(p+4, x); |
| 82 | ; } |
| 83 | ; |
| 84 | ; CHECK-NOT: vmov |
| 85 | ; CHECK-NOT: vorr |
| 86 | define void @f3(float* %p, float* %q) nounwind ssp { |
| 87 | entry: |
David Blaikie | 79e6c74 | 2015-02-27 19:29:02 +0000 | [diff] [blame] | 88 | %arrayidx = getelementptr inbounds float, float* %p, i32 3 |
David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 89 | %0 = load float, float* %arrayidx, align 4 |
Jakob Stoklund Olesen | 0bb3dd7 | 2012-09-17 23:03:25 +0000 | [diff] [blame] | 90 | %vecins = insertelement <2 x float> undef, float %0, i32 1 |
| 91 | %tobool = icmp eq float* %q, null |
| 92 | br i1 %tobool, label %if.else, label %if.then |
| 93 | |
| 94 | if.then: ; preds = %entry |
David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 95 | %1 = load float, float* %q, align 4 |
David Blaikie | 79e6c74 | 2015-02-27 19:29:02 +0000 | [diff] [blame] | 96 | %arrayidx2 = getelementptr inbounds float, float* %q, i32 1 |
David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 97 | %2 = load float, float* %arrayidx2, align 4 |
Jakob Stoklund Olesen | 0bb3dd7 | 2012-09-17 23:03:25 +0000 | [diff] [blame] | 98 | %add = fadd float %1, %2 |
| 99 | %vecins3 = insertelement <2 x float> %vecins, float %add, i32 0 |
| 100 | br label %if.end |
| 101 | |
| 102 | if.else: ; preds = %entry |
David Blaikie | 79e6c74 | 2015-02-27 19:29:02 +0000 | [diff] [blame] | 103 | %arrayidx4 = getelementptr inbounds float, float* %p, i32 2 |
David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 104 | %3 = load float, float* %arrayidx4, align 4 |
Jakob Stoklund Olesen | 0bb3dd7 | 2012-09-17 23:03:25 +0000 | [diff] [blame] | 105 | %vecins5 = insertelement <2 x float> %vecins, float %3, i32 0 |
| 106 | br label %if.end |
| 107 | |
| 108 | if.end: ; preds = %if.else, %if.then |
| 109 | %x.0 = phi <2 x float> [ %vecins3, %if.then ], [ %vecins5, %if.else ] |
David Blaikie | 79e6c74 | 2015-02-27 19:29:02 +0000 | [diff] [blame] | 110 | %add.ptr = getelementptr inbounds float, float* %p, i32 4 |
Jakob Stoklund Olesen | 0bb3dd7 | 2012-09-17 23:03:25 +0000 | [diff] [blame] | 111 | %4 = bitcast float* %add.ptr to i8* |
Jeroen Ketema | ab99b59 | 2015-09-30 10:56:37 +0000 | [diff] [blame] | 112 | tail call void @llvm.arm.neon.vst1.p0i8.v2f32(i8* %4, <2 x float> %x.0, i32 4) |
Jakob Stoklund Olesen | 0bb3dd7 | 2012-09-17 23:03:25 +0000 | [diff] [blame] | 113 | ret void |
| 114 | } |
| 115 | |
Jeroen Ketema | ab99b59 | 2015-09-30 10:56:37 +0000 | [diff] [blame] | 116 | declare void @llvm.arm.neon.vst1.p0i8.v2f32(i8*, <2 x float>, i32) nounwind |
| 117 | declare <2 x float> @llvm.arm.neon.vld1.v2f32.p0i8(i8*, i32) nounwind readonly |
Jakob Stoklund Olesen | 7d3c9c0 | 2012-09-19 21:29:18 +0000 | [diff] [blame] | 118 | |
| 119 | ; CHECK: f4 |
| 120 | ; This function inserts a lane into a fully defined vector. |
| 121 | ; The destination lane isn't read, so the subregs can coalesce. |
| 122 | ; CHECK-NOT: vmov |
| 123 | ; CHECK-NOT: vorr |
| 124 | define void @f4(float* %p, float* %q) nounwind ssp { |
| 125 | entry: |
| 126 | %0 = bitcast float* %p to i8* |
Jeroen Ketema | ab99b59 | 2015-09-30 10:56:37 +0000 | [diff] [blame] | 127 | %vld1 = tail call <2 x float> @llvm.arm.neon.vld1.v2f32.p0i8(i8* %0, i32 4) |
Jakob Stoklund Olesen | 7d3c9c0 | 2012-09-19 21:29:18 +0000 | [diff] [blame] | 128 | %tobool = icmp eq float* %q, null |
| 129 | br i1 %tobool, label %if.end, label %if.then |
| 130 | |
| 131 | if.then: ; preds = %entry |
David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 132 | %1 = load float, float* %q, align 4 |
David Blaikie | 79e6c74 | 2015-02-27 19:29:02 +0000 | [diff] [blame] | 133 | %arrayidx1 = getelementptr inbounds float, float* %q, i32 1 |
David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 134 | %2 = load float, float* %arrayidx1, align 4 |
Jakob Stoklund Olesen | 7d3c9c0 | 2012-09-19 21:29:18 +0000 | [diff] [blame] | 135 | %add = fadd float %1, %2 |
| 136 | %vecins = insertelement <2 x float> %vld1, float %add, i32 1 |
| 137 | br label %if.end |
| 138 | |
| 139 | if.end: ; preds = %entry, %if.then |
| 140 | %x.0 = phi <2 x float> [ %vecins, %if.then ], [ %vld1, %entry ] |
Jeroen Ketema | ab99b59 | 2015-09-30 10:56:37 +0000 | [diff] [blame] | 141 | tail call void @llvm.arm.neon.vst1.p0i8.v2f32(i8* %0, <2 x float> %x.0, i32 4) |
Jakob Stoklund Olesen | 7d3c9c0 | 2012-09-19 21:29:18 +0000 | [diff] [blame] | 142 | ret void |
| 143 | } |
Jakob Stoklund Olesen | b8707fa | 2012-09-20 23:08:42 +0000 | [diff] [blame] | 144 | |
| 145 | ; CHECK: f5 |
| 146 | ; Coalesce vector lanes through phis. |
| 147 | ; CHECK: vmov.f32 {{.*}}, #1.0 |
| 148 | ; CHECK-NOT: vmov |
| 149 | ; CHECK-NOT: vorr |
Andrew Trick | edac22a | 2012-11-13 00:34:44 +0000 | [diff] [blame] | 150 | ; CHECK: bx |
Jakob Stoklund Olesen | b8707fa | 2012-09-20 23:08:42 +0000 | [diff] [blame] | 151 | ; We may leave the last insertelement in the if.end block. |
| 152 | ; It is inserting the %add value into a dead lane, but %add causes interference |
| 153 | ; in the entry block, and we don't do dead lane checks across basic blocks. |
| 154 | define void @f5(float* %p, float* %q) nounwind ssp { |
| 155 | entry: |
| 156 | %0 = bitcast float* %p to i8* |
Jeroen Ketema | ab99b59 | 2015-09-30 10:56:37 +0000 | [diff] [blame] | 157 | %vld1 = tail call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* %0, i32 4) |
Jakob Stoklund Olesen | b8707fa | 2012-09-20 23:08:42 +0000 | [diff] [blame] | 158 | %vecext = extractelement <4 x float> %vld1, i32 0 |
| 159 | %vecext1 = extractelement <4 x float> %vld1, i32 1 |
| 160 | %vecext2 = extractelement <4 x float> %vld1, i32 2 |
| 161 | %vecext3 = extractelement <4 x float> %vld1, i32 3 |
| 162 | %add = fadd float %vecext3, 1.000000e+00 |
| 163 | %tobool = icmp eq float* %q, null |
| 164 | br i1 %tobool, label %if.end, label %if.then |
| 165 | |
| 166 | if.then: ; preds = %entry |
David Blaikie | 79e6c74 | 2015-02-27 19:29:02 +0000 | [diff] [blame] | 167 | %arrayidx = getelementptr inbounds float, float* %q, i32 1 |
David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 168 | %1 = load float, float* %arrayidx, align 4 |
Jakob Stoklund Olesen | b8707fa | 2012-09-20 23:08:42 +0000 | [diff] [blame] | 169 | %add4 = fadd float %vecext, %1 |
David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 170 | %2 = load float, float* %q, align 4 |
Jakob Stoklund Olesen | b8707fa | 2012-09-20 23:08:42 +0000 | [diff] [blame] | 171 | %add6 = fadd float %vecext1, %2 |
David Blaikie | 79e6c74 | 2015-02-27 19:29:02 +0000 | [diff] [blame] | 172 | %arrayidx7 = getelementptr inbounds float, float* %q, i32 2 |
David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 173 | %3 = load float, float* %arrayidx7, align 4 |
Jakob Stoklund Olesen | b8707fa | 2012-09-20 23:08:42 +0000 | [diff] [blame] | 174 | %add8 = fadd float %vecext2, %3 |
| 175 | br label %if.end |
| 176 | |
| 177 | if.end: ; preds = %entry, %if.then |
| 178 | %a.0 = phi float [ %add4, %if.then ], [ %vecext, %entry ] |
| 179 | %b.0 = phi float [ %add6, %if.then ], [ %vecext1, %entry ] |
| 180 | %c.0 = phi float [ %add8, %if.then ], [ %vecext2, %entry ] |
| 181 | %vecinit = insertelement <4 x float> undef, float %a.0, i32 0 |
| 182 | %vecinit9 = insertelement <4 x float> %vecinit, float %b.0, i32 1 |
| 183 | %vecinit10 = insertelement <4 x float> %vecinit9, float %c.0, i32 2 |
| 184 | %vecinit11 = insertelement <4 x float> %vecinit10, float %add, i32 3 |
Jeroen Ketema | ab99b59 | 2015-09-30 10:56:37 +0000 | [diff] [blame] | 185 | tail call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* %0, <4 x float> %vecinit11, i32 4) |
Jakob Stoklund Olesen | b8707fa | 2012-09-20 23:08:42 +0000 | [diff] [blame] | 186 | ret void |
| 187 | } |
| 188 | |
Jeroen Ketema | ab99b59 | 2015-09-30 10:56:37 +0000 | [diff] [blame] | 189 | declare <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8*, i32) nounwind readonly |
Jakob Stoklund Olesen | b8707fa | 2012-09-20 23:08:42 +0000 | [diff] [blame] | 190 | |
Jeroen Ketema | ab99b59 | 2015-09-30 10:56:37 +0000 | [diff] [blame] | 191 | declare void @llvm.arm.neon.vst1.p0i8.v4f32(i8*, <4 x float>, i32) nounwind |
Jakob Stoklund Olesen | bb999c2 | 2012-10-02 21:46:39 +0000 | [diff] [blame] | 192 | |
| 193 | ; CHECK: pr13999 |
| 194 | define void @pr13999() nounwind readonly { |
| 195 | entry: |
| 196 | br i1 true, label %outer_loop, label %loop.end |
| 197 | |
| 198 | outer_loop: |
| 199 | %d = phi double [ 0.0, %entry ], [ %add, %after_inner_loop ] |
| 200 | %0 = insertelement <2 x double> <double 0.0, double 0.0>, double %d, i32 0 |
| 201 | br i1 undef, label %after_inner_loop, label %inner_loop |
| 202 | |
| 203 | inner_loop: |
| 204 | br i1 true, label %after_inner_loop, label %inner_loop |
| 205 | |
| 206 | after_inner_loop: |
| 207 | %1 = phi <2 x double> [ %0, %outer_loop ], [ <double 0.0, double 0.0>, |
| 208 | %inner_loop ] |
| 209 | %2 = extractelement <2 x double> %1, i32 1 |
| 210 | %add = fadd double 1.0, %2 |
| 211 | br i1 false, label %loop.end, label %outer_loop |
| 212 | |
| 213 | loop.end: |
| 214 | %d.end = phi double [ 0.0, %entry ], [ %add, %after_inner_loop ] |
| 215 | ret void |
| 216 | } |
Jakob Stoklund Olesen | 2f6dfc7 | 2012-10-13 16:15:31 +0000 | [diff] [blame] | 217 | |
| 218 | ; CHECK: pr14078 |
| 219 | define arm_aapcs_vfpcc i32 @pr14078(i8* nocapture %arg, i8* nocapture %arg1, i32 %arg2) nounwind uwtable readonly { |
| 220 | bb: |
| 221 | br i1 undef, label %bb31, label %bb3 |
| 222 | |
| 223 | bb3: ; preds = %bb12, %bb |
| 224 | %tmp = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer |
| 225 | %tmp4 = bitcast <1 x i64> %tmp to <2 x float> |
| 226 | %tmp5 = shufflevector <2 x float> %tmp4, <2 x float> undef, <4 x i32> zeroinitializer |
| 227 | %tmp6 = bitcast <4 x float> %tmp5 to <2 x i64> |
| 228 | %tmp7 = shufflevector <2 x i64> %tmp6, <2 x i64> undef, <1 x i32> zeroinitializer |
| 229 | %tmp8 = bitcast <1 x i64> %tmp7 to <2 x float> |
| 230 | %tmp9 = tail call <2 x float> @baz(<2 x float> <float 0xFFFFFFFFE0000000, float 0.000000e+00>, <2 x float> %tmp8, <2 x float> zeroinitializer) nounwind |
| 231 | br i1 undef, label %bb10, label %bb12 |
| 232 | |
| 233 | bb10: ; preds = %bb3 |
David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 234 | %tmp11 = load <4 x float>, <4 x float>* undef, align 8 |
Jakob Stoklund Olesen | 2f6dfc7 | 2012-10-13 16:15:31 +0000 | [diff] [blame] | 235 | br label %bb12 |
| 236 | |
| 237 | bb12: ; preds = %bb10, %bb3 |
| 238 | %tmp13 = shufflevector <2 x float> %tmp9, <2 x float> zeroinitializer, <2 x i32> <i32 0, i32 2> |
| 239 | %tmp14 = bitcast <2 x float> %tmp13 to <1 x i64> |
| 240 | %tmp15 = shufflevector <1 x i64> %tmp14, <1 x i64> zeroinitializer, <2 x i32> <i32 0, i32 1> |
| 241 | %tmp16 = bitcast <2 x i64> %tmp15 to <4 x float> |
| 242 | %tmp17 = fmul <4 x float> zeroinitializer, %tmp16 |
| 243 | %tmp18 = bitcast <4 x float> %tmp17 to <2 x i64> |
| 244 | %tmp19 = shufflevector <2 x i64> %tmp18, <2 x i64> undef, <1 x i32> zeroinitializer |
| 245 | %tmp20 = bitcast <1 x i64> %tmp19 to <2 x float> |
| 246 | %tmp21 = tail call <2 x float> @baz67(<2 x float> %tmp20, <2 x float> undef) nounwind |
| 247 | %tmp22 = tail call <2 x float> @baz67(<2 x float> %tmp21, <2 x float> %tmp21) nounwind |
| 248 | %tmp23 = shufflevector <2 x float> %tmp22, <2 x float> undef, <4 x i32> zeroinitializer |
| 249 | %tmp24 = bitcast <4 x float> %tmp23 to <2 x i64> |
| 250 | %tmp25 = shufflevector <2 x i64> %tmp24, <2 x i64> undef, <1 x i32> zeroinitializer |
| 251 | %tmp26 = bitcast <1 x i64> %tmp25 to <2 x float> |
| 252 | %tmp27 = extractelement <2 x float> %tmp26, i32 0 |
| 253 | %tmp28 = fcmp olt float %tmp27, 0.000000e+00 |
| 254 | %tmp29 = select i1 %tmp28, i32 0, i32 undef |
| 255 | %tmp30 = icmp ult i32 undef, %arg2 |
| 256 | br i1 %tmp30, label %bb3, label %bb31 |
| 257 | |
| 258 | bb31: ; preds = %bb12, %bb |
| 259 | %tmp32 = phi i32 [ 1, %bb ], [ %tmp29, %bb12 ] |
| 260 | ret i32 %tmp32 |
| 261 | } |
| 262 | |
| 263 | declare <2 x float> @baz(<2 x float>, <2 x float>, <2 x float>) nounwind readnone |
| 264 | |
| 265 | declare <2 x float> @baz67(<2 x float>, <2 x float>) nounwind readnone |
Jakob Stoklund Olesen | ea82bd7 | 2012-10-13 17:26:47 +0000 | [diff] [blame] | 266 | |
| 267 | %struct.wombat.5 = type { %struct.quux, %struct.quux, %struct.quux, %struct.quux } |
| 268 | %struct.quux = type { <4 x float> } |
| 269 | |
| 270 | ; CHECK: pr14079 |
| 271 | define linkonce_odr arm_aapcs_vfpcc %struct.wombat.5 @pr14079(i8* nocapture %arg, i8* nocapture %arg1, i8* nocapture %arg2) nounwind uwtable inlinehint { |
| 272 | bb: |
| 273 | %tmp = shufflevector <2 x i64> zeroinitializer, <2 x i64> undef, <1 x i32> zeroinitializer |
| 274 | %tmp3 = bitcast <1 x i64> %tmp to <2 x float> |
| 275 | %tmp4 = shufflevector <2 x float> %tmp3, <2 x float> zeroinitializer, <2 x i32> <i32 1, i32 3> |
| 276 | %tmp5 = shufflevector <2 x float> %tmp4, <2 x float> undef, <2 x i32> <i32 1, i32 3> |
| 277 | %tmp6 = bitcast <2 x float> %tmp5 to <1 x i64> |
| 278 | %tmp7 = shufflevector <1 x i64> undef, <1 x i64> %tmp6, <2 x i32> <i32 0, i32 1> |
| 279 | %tmp8 = bitcast <2 x i64> %tmp7 to <4 x float> |
| 280 | %tmp9 = shufflevector <2 x i64> zeroinitializer, <2 x i64> undef, <1 x i32> <i32 1> |
| 281 | %tmp10 = bitcast <1 x i64> %tmp9 to <2 x float> |
| 282 | %tmp11 = shufflevector <2 x float> %tmp10, <2 x float> undef, <2 x i32> <i32 0, i32 2> |
| 283 | %tmp12 = shufflevector <2 x float> %tmp11, <2 x float> undef, <2 x i32> <i32 0, i32 2> |
| 284 | %tmp13 = bitcast <2 x float> %tmp12 to <1 x i64> |
| 285 | %tmp14 = shufflevector <1 x i64> %tmp13, <1 x i64> undef, <2 x i32> <i32 0, i32 1> |
| 286 | %tmp15 = bitcast <2 x i64> %tmp14 to <4 x float> |
| 287 | %tmp16 = insertvalue %struct.wombat.5 undef, <4 x float> %tmp8, 1, 0 |
| 288 | %tmp17 = insertvalue %struct.wombat.5 %tmp16, <4 x float> %tmp15, 2, 0 |
| 289 | %tmp18 = insertvalue %struct.wombat.5 %tmp17, <4 x float> undef, 3, 0 |
| 290 | ret %struct.wombat.5 %tmp18 |
| 291 | } |
Jakob Stoklund Olesen | 9a06696 | 2012-10-29 17:51:52 +0000 | [diff] [blame] | 292 | |
| 293 | ; CHECK: adjustCopiesBackFrom |
| 294 | ; The shuffle in if.else3 must be preserved even though adjustCopiesBackFrom |
| 295 | ; is tempted to remove it. |
Jakob Stoklund Olesen | 9a06696 | 2012-10-29 17:51:52 +0000 | [diff] [blame] | 296 | ; CHECK: vorr d |
| 297 | define internal void @adjustCopiesBackFrom(<2 x i64>* noalias nocapture sret %agg.result, <2 x i64> %in) { |
| 298 | entry: |
| 299 | %0 = extractelement <2 x i64> %in, i32 0 |
| 300 | %cmp = icmp slt i64 %0, 1 |
| 301 | %.in = select i1 %cmp, <2 x i64> <i64 0, i64 undef>, <2 x i64> %in |
| 302 | %1 = extractelement <2 x i64> %in, i32 1 |
| 303 | %cmp1 = icmp slt i64 %1, 1 |
| 304 | br i1 %cmp1, label %if.then2, label %if.else3 |
| 305 | |
| 306 | if.then2: ; preds = %entry |
| 307 | %2 = insertelement <2 x i64> %.in, i64 0, i32 1 |
| 308 | br label %if.end4 |
| 309 | |
| 310 | if.else3: ; preds = %entry |
| 311 | %3 = shufflevector <2 x i64> %.in, <2 x i64> %in, <2 x i32> <i32 0, i32 3> |
| 312 | br label %if.end4 |
| 313 | |
| 314 | if.end4: ; preds = %if.else3, %if.then2 |
| 315 | %result.2 = phi <2 x i64> [ %2, %if.then2 ], [ %3, %if.else3 ] |
| 316 | store <2 x i64> %result.2, <2 x i64>* %agg.result, align 128 |
| 317 | ret void |
| 318 | } |
Jakob Stoklund Olesen | 546e9e8 | 2012-11-29 00:26:11 +0000 | [diff] [blame] | 319 | |
| 320 | ; <rdar://problem/12758887> |
| 321 | ; RegisterCoalescer::updateRegDefsUses() could visit an instruction more than |
| 322 | ; once under rare circumstances. When widening a register from QPR to DTriple |
| 323 | ; with the original virtual register in dsub_1_dsub_2, the double rewrite would |
| 324 | ; produce an invalid sub-register. |
| 325 | ; |
| 326 | ; This is because dsub_1_dsub_2 is not an idempotent sub-register index. |
| 327 | ; It will translate %vr:dsub_0 -> %vr:dsub_1. |
| 328 | define hidden fastcc void @radar12758887() nounwind optsize ssp { |
| 329 | entry: |
| 330 | br i1 undef, label %for.body, label %for.end70 |
| 331 | |
| 332 | for.body: ; preds = %for.end, %entry |
| 333 | br i1 undef, label %for.body29, label %for.end |
| 334 | |
| 335 | for.body29: ; preds = %for.body29, %for.body |
David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 336 | %0 = load <2 x double>, <2 x double>* null, align 1 |
Jakob Stoklund Olesen | 546e9e8 | 2012-11-29 00:26:11 +0000 | [diff] [blame] | 337 | %splat40 = shufflevector <2 x double> %0, <2 x double> undef, <2 x i32> zeroinitializer |
| 338 | %mul41 = fmul <2 x double> undef, %splat40 |
| 339 | %add42 = fadd <2 x double> undef, %mul41 |
| 340 | %splat44 = shufflevector <2 x double> %0, <2 x double> undef, <2 x i32> <i32 1, i32 1> |
| 341 | %mul45 = fmul <2 x double> undef, %splat44 |
| 342 | %add46 = fadd <2 x double> undef, %mul45 |
| 343 | br i1 undef, label %for.end, label %for.body29 |
| 344 | |
| 345 | for.end: ; preds = %for.body29, %for.body |
| 346 | %accumR2.0.lcssa = phi <2 x double> [ zeroinitializer, %for.body ], [ %add42, %for.body29 ] |
| 347 | %accumI2.0.lcssa = phi <2 x double> [ zeroinitializer, %for.body ], [ %add46, %for.body29 ] |
| 348 | %1 = shufflevector <2 x double> %accumI2.0.lcssa, <2 x double> undef, <2 x i32> <i32 1, i32 0> |
| 349 | %add58 = fadd <2 x double> undef, %1 |
| 350 | %mul61 = fmul <2 x double> %add58, undef |
| 351 | %add63 = fadd <2 x double> undef, %mul61 |
| 352 | %add64 = fadd <2 x double> undef, %add63 |
| 353 | %add67 = fadd <2 x double> undef, %add64 |
| 354 | store <2 x double> %add67, <2 x double>* undef, align 1 |
| 355 | br i1 undef, label %for.end70, label %for.body |
| 356 | |
| 357 | for.end70: ; preds = %for.end, %entry |
| 358 | ret void |
| 359 | } |