Richard Sandiford | 8ee1b77 | 2013-11-22 16:58:05 +0000 | [diff] [blame] | 1 | ; RUN: opt %s -scalarizer -scalarize-load-store -dce -S | FileCheck %s |
| 2 | target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" |
| 3 | |
| 4 | declare <4 x float> @ext(<4 x float>) |
| 5 | @g = global <4 x float> zeroinitializer |
| 6 | |
| 7 | define void @f1(<4 x float> %init, <4 x float> *%base, i32 %count) { |
| 8 | ; CHECK-LABEL: @f1( |
| 9 | ; CHECK: entry: |
| 10 | ; CHECK: %init.i0 = extractelement <4 x float> %init, i32 0 |
| 11 | ; CHECK: %init.i1 = extractelement <4 x float> %init, i32 1 |
| 12 | ; CHECK: %init.i2 = extractelement <4 x float> %init, i32 2 |
| 13 | ; CHECK: %init.i3 = extractelement <4 x float> %init, i32 3 |
| 14 | ; CHECK: br label %loop |
| 15 | ; CHECK: loop: |
| 16 | ; CHECK: %i = phi i32 [ %count, %entry ], [ %nexti, %loop ] |
| 17 | ; CHECK: %acc.i0 = phi float [ %init.i0, %entry ], [ %sel.i0, %loop ] |
| 18 | ; CHECK: %acc.i1 = phi float [ %init.i1, %entry ], [ %sel.i1, %loop ] |
| 19 | ; CHECK: %acc.i2 = phi float [ %init.i2, %entry ], [ %sel.i2, %loop ] |
| 20 | ; CHECK: %acc.i3 = phi float [ %init.i3, %entry ], [ %sel.i3, %loop ] |
| 21 | ; CHECK: %nexti = sub i32 %i, 1 |
| 22 | ; CHECK: %ptr = getelementptr <4 x float>* %base, i32 %i |
| 23 | ; CHECK: %ptr.i0 = bitcast <4 x float>* %ptr to float* |
| 24 | ; CHECK: %val.i0 = load float* %ptr.i0, align 16 |
| 25 | ; CHECK: %ptr.i1 = getelementptr float* %ptr.i0, i32 1 |
| 26 | ; CHECK: %val.i1 = load float* %ptr.i1, align 4 |
| 27 | ; CHECK: %ptr.i2 = getelementptr float* %ptr.i0, i32 2 |
| 28 | ; CHECK: %val.i2 = load float* %ptr.i2, align 8 |
| 29 | ; CHECK: %ptr.i3 = getelementptr float* %ptr.i0, i32 3 |
| 30 | ; CHECK: %val.i3 = load float* %ptr.i3, align 4 |
| 31 | ; CHECK: %add.i0 = fadd float %val.i0, %val.i2 |
| 32 | ; CHECK: %add.i1 = fadd float %val.i1, %val.i3 |
| 33 | ; CHECK: %add.i2 = fadd float %acc.i0, %acc.i2 |
| 34 | ; CHECK: %add.i3 = fadd float %acc.i1, %acc.i3 |
| 35 | ; CHECK: %add.upto0 = insertelement <4 x float> undef, float %add.i0, i32 0 |
| 36 | ; CHECK: %add.upto1 = insertelement <4 x float> %add.upto0, float %add.i1, i32 1 |
| 37 | ; CHECK: %add.upto2 = insertelement <4 x float> %add.upto1, float %add.i2, i32 2 |
| 38 | ; CHECK: %add = insertelement <4 x float> %add.upto2, float %add.i3, i32 3 |
| 39 | ; CHECK: %call = call <4 x float> @ext(<4 x float> %add) |
| 40 | ; CHECK: %call.i0 = extractelement <4 x float> %call, i32 0 |
| 41 | ; CHECK: %cmp.i0 = fcmp ogt float %call.i0, 1.0 |
| 42 | ; CHECK: %call.i1 = extractelement <4 x float> %call, i32 1 |
| 43 | ; CHECK: %cmp.i1 = fcmp ogt float %call.i1, 2.0 |
| 44 | ; CHECK: %call.i2 = extractelement <4 x float> %call, i32 2 |
| 45 | ; CHECK: %cmp.i2 = fcmp ogt float %call.i2, 3.0 |
| 46 | ; CHECK: %call.i3 = extractelement <4 x float> %call, i32 3 |
| 47 | ; CHECK: %cmp.i3 = fcmp ogt float %call.i3, 4.0 |
| 48 | ; CHECK: %sel.i0 = select i1 %cmp.i0, float %call.i0, float 5.0 |
| 49 | ; CHECK: %sel.i1 = select i1 %cmp.i1, float %call.i1, float 6.0 |
| 50 | ; CHECK: %sel.i2 = select i1 %cmp.i2, float %call.i2, float 7.0 |
| 51 | ; CHECK: %sel.i3 = select i1 %cmp.i3, float %call.i3, float 8.0 |
| 52 | ; CHECK: store float %sel.i0, float* %ptr.i0 |
| 53 | ; CHECK: store float %sel.i1, float* %ptr.i1 |
| 54 | ; CHECK: store float %sel.i2, float* %ptr.i2 |
| 55 | ; CHECK: store float %sel.i3, float* %ptr.i3 |
| 56 | ; CHECK: %test = icmp eq i32 %nexti, 0 |
| 57 | ; CHECK: br i1 %test, label %loop, label %exit |
| 58 | ; CHECK: exit: |
| 59 | ; CHECK: ret void |
| 60 | entry: |
| 61 | br label %loop |
| 62 | |
| 63 | loop: |
| 64 | %i = phi i32 [ %count, %entry ], [ %nexti, %loop ] |
| 65 | %acc = phi <4 x float> [ %init, %entry ], [ %sel, %loop ] |
| 66 | %nexti = sub i32 %i, 1 |
| 67 | |
| 68 | %ptr = getelementptr <4 x float> *%base, i32 %i |
| 69 | %val = load <4 x float> *%ptr |
| 70 | %dval = bitcast <4 x float> %val to <2 x double> |
| 71 | %dacc = bitcast <4 x float> %acc to <2 x double> |
| 72 | %shuffle1 = shufflevector <2 x double> %dval, <2 x double> %dacc, |
| 73 | <2 x i32> <i32 0, i32 2> |
| 74 | %shuffle2 = shufflevector <2 x double> %dval, <2 x double> %dacc, |
| 75 | <2 x i32> <i32 1, i32 3> |
| 76 | %f1 = bitcast <2 x double> %shuffle1 to <4 x float> |
| 77 | %f2 = bitcast <2 x double> %shuffle2 to <4 x float> |
| 78 | %add = fadd <4 x float> %f1, %f2 |
| 79 | %call = call <4 x float> @ext(<4 x float> %add) |
| 80 | %cmp = fcmp ogt <4 x float> %call, |
| 81 | <float 1.0, float 2.0, float 3.0, float 4.0> |
| 82 | %sel = select <4 x i1> %cmp, <4 x float> %call, |
| 83 | <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0> |
| 84 | store <4 x float> %sel, <4 x float> *%ptr |
| 85 | |
| 86 | %test = icmp eq i32 %nexti, 0 |
| 87 | br i1 %test, label %loop, label %exit |
| 88 | |
| 89 | exit: |
| 90 | ret void |
| 91 | } |
| 92 | |
| 93 | define void @f2(<4 x i32> %init, <4 x i8> *%base, i32 %count) { |
| 94 | ; CHECK-LABEL: define void @f2(<4 x i32> %init, <4 x i8>* %base, i32 %count) { |
| 95 | ; CHECK: entry: |
| 96 | ; CHECK: %init.i0 = extractelement <4 x i32> %init, i32 0 |
| 97 | ; CHECK: %init.i1 = extractelement <4 x i32> %init, i32 1 |
| 98 | ; CHECK: %init.i2 = extractelement <4 x i32> %init, i32 2 |
| 99 | ; CHECK: %init.i3 = extractelement <4 x i32> %init, i32 3 |
| 100 | ; CHECK: br label %loop |
| 101 | ; CHECK: loop: |
| 102 | ; CHECK: %i = phi i32 [ %count, %entry ], [ %nexti, %loop ] |
| 103 | ; CHECK: %acc.i0 = phi i32 [ %init.i0, %entry ], [ %sel.i0, %loop ] |
| 104 | ; CHECK: %acc.i1 = phi i32 [ %init.i1, %entry ], [ %sel.i1, %loop ] |
| 105 | ; CHECK: %acc.i2 = phi i32 [ %init.i2, %entry ], [ %sel.i2, %loop ] |
| 106 | ; CHECK: %acc.i3 = phi i32 [ %init.i3, %entry ], [ %sel.i3, %loop ] |
| 107 | ; CHECK: %nexti = sub i32 %i, 1 |
| 108 | ; CHECK: %ptr = getelementptr <4 x i8>* %base, i32 %i |
| 109 | ; CHECK: %ptr.i0 = bitcast <4 x i8>* %ptr to i8* |
| 110 | ; CHECK: %val.i0 = load i8* %ptr.i0, align 4 |
| 111 | ; CHECK: %ptr.i1 = getelementptr i8* %ptr.i0, i32 1 |
| 112 | ; CHECK: %val.i1 = load i8* %ptr.i1, align 1 |
| 113 | ; CHECK: %ptr.i2 = getelementptr i8* %ptr.i0, i32 2 |
| 114 | ; CHECK: %val.i2 = load i8* %ptr.i2, align 2 |
| 115 | ; CHECK: %ptr.i3 = getelementptr i8* %ptr.i0, i32 3 |
| 116 | ; CHECK: %val.i3 = load i8* %ptr.i3, align 1 |
| 117 | ; CHECK: %ext.i0 = sext i8 %val.i0 to i32 |
| 118 | ; CHECK: %ext.i1 = sext i8 %val.i1 to i32 |
| 119 | ; CHECK: %ext.i2 = sext i8 %val.i2 to i32 |
| 120 | ; CHECK: %ext.i3 = sext i8 %val.i3 to i32 |
| 121 | ; CHECK: %add.i0 = add i32 %ext.i0, %acc.i0 |
| 122 | ; CHECK: %add.i1 = add i32 %ext.i1, %acc.i1 |
| 123 | ; CHECK: %add.i2 = add i32 %ext.i2, %acc.i2 |
| 124 | ; CHECK: %add.i3 = add i32 %ext.i3, %acc.i3 |
| 125 | ; CHECK: %cmp.i0 = icmp slt i32 %add.i0, -10 |
| 126 | ; CHECK: %cmp.i1 = icmp slt i32 %add.i1, -11 |
| 127 | ; CHECK: %cmp.i2 = icmp slt i32 %add.i2, -12 |
| 128 | ; CHECK: %cmp.i3 = icmp slt i32 %add.i3, -13 |
| 129 | ; CHECK: %sel.i0 = select i1 %cmp.i0, i32 %add.i0, i32 %i |
| 130 | ; CHECK: %sel.i1 = select i1 %cmp.i1, i32 %add.i1, i32 %i |
| 131 | ; CHECK: %sel.i2 = select i1 %cmp.i2, i32 %add.i2, i32 %i |
| 132 | ; CHECK: %sel.i3 = select i1 %cmp.i3, i32 %add.i3, i32 %i |
| 133 | ; CHECK: %trunc.i0 = trunc i32 %sel.i0 to i8 |
| 134 | ; CHECK: %trunc.i1 = trunc i32 %sel.i1 to i8 |
| 135 | ; CHECK: %trunc.i2 = trunc i32 %sel.i2 to i8 |
| 136 | ; CHECK: %trunc.i3 = trunc i32 %sel.i3 to i8 |
| 137 | ; CHECK: store i8 %trunc.i0, i8* %ptr.i0, align 4 |
| 138 | ; CHECK: store i8 %trunc.i1, i8* %ptr.i1, align 1 |
| 139 | ; CHECK: store i8 %trunc.i2, i8* %ptr.i2, align 2 |
| 140 | ; CHECK: store i8 %trunc.i3, i8* %ptr.i3, align 1 |
| 141 | ; CHECK: %test = icmp eq i32 %nexti, 0 |
| 142 | ; CHECK: br i1 %test, label %loop, label %exit |
| 143 | ; CHECK: exit: |
| 144 | ; CHECK: ret void |
| 145 | entry: |
| 146 | br label %loop |
| 147 | |
| 148 | loop: |
| 149 | %i = phi i32 [ %count, %entry ], [ %nexti, %loop ] |
| 150 | %acc = phi <4 x i32> [ %init, %entry ], [ %sel, %loop ] |
| 151 | %nexti = sub i32 %i, 1 |
| 152 | |
| 153 | %ptr = getelementptr <4 x i8> *%base, i32 %i |
| 154 | %val = load <4 x i8> *%ptr |
| 155 | %ext = sext <4 x i8> %val to <4 x i32> |
| 156 | %add = add <4 x i32> %ext, %acc |
| 157 | %cmp = icmp slt <4 x i32> %add, <i32 -10, i32 -11, i32 -12, i32 -13> |
| 158 | %single = insertelement <4 x i32> undef, i32 %i, i32 0 |
| 159 | %limit = shufflevector <4 x i32> %single, <4 x i32> undef, |
| 160 | <4 x i32> zeroinitializer |
| 161 | %sel = select <4 x i1> %cmp, <4 x i32> %add, <4 x i32> %limit |
| 162 | %trunc = trunc <4 x i32> %sel to <4 x i8> |
| 163 | store <4 x i8> %trunc, <4 x i8> *%ptr |
| 164 | |
| 165 | %test = icmp eq i32 %nexti, 0 |
| 166 | br i1 %test, label %loop, label %exit |
| 167 | |
| 168 | exit: |
| 169 | ret void |
| 170 | } |
| 171 | |
| 172 | ; Check that !tbaa information is preserved. |
| 173 | define void @f3(<4 x i32> *%src, <4 x i32> *%dst) { |
| 174 | ; CHECK-LABEL: @f3( |
| 175 | ; CHECK: %val.i0 = load i32* %src.i0, align 16, !tbaa ![[TAG:[0-9]*]] |
| 176 | ; CHECK: %val.i1 = load i32* %src.i1, align 4, !tbaa ![[TAG]] |
| 177 | ; CHECK: %val.i2 = load i32* %src.i2, align 8, !tbaa ![[TAG]] |
| 178 | ; CHECK: %val.i3 = load i32* %src.i3, align 4, !tbaa ![[TAG]] |
| 179 | ; CHECK: store i32 %add.i0, i32* %dst.i0, align 16, !tbaa ![[TAG:[0-9]*]] |
| 180 | ; CHECK: store i32 %add.i1, i32* %dst.i1, align 4, !tbaa ![[TAG]] |
| 181 | ; CHECK: store i32 %add.i2, i32* %dst.i2, align 8, !tbaa ![[TAG]] |
| 182 | ; CHECK: store i32 %add.i3, i32* %dst.i3, align 4, !tbaa ![[TAG]] |
| 183 | ; CHECK: ret void |
| 184 | %val = load <4 x i32> *%src, !tbaa !1 |
| 185 | %add = add <4 x i32> %val, %val |
| 186 | store <4 x i32> %add, <4 x i32> *%dst, !tbaa !2 |
| 187 | ret void |
| 188 | } |
| 189 | |
| 190 | ; Check that !tbaa.struct information is preserved. |
| 191 | define void @f4(<4 x i32> *%src, <4 x i32> *%dst) { |
| 192 | ; CHECK-LABEL: @f4( |
| 193 | ; CHECK: %val.i0 = load i32* %src.i0, align 16, !tbaa.struct ![[TAG:[0-9]*]] |
| 194 | ; CHECK: %val.i1 = load i32* %src.i1, align 4, !tbaa.struct ![[TAG]] |
| 195 | ; CHECK: %val.i2 = load i32* %src.i2, align 8, !tbaa.struct ![[TAG]] |
| 196 | ; CHECK: %val.i3 = load i32* %src.i3, align 4, !tbaa.struct ![[TAG]] |
| 197 | ; CHECK: store i32 %add.i0, i32* %dst.i0, align 16, !tbaa.struct ![[TAG]] |
| 198 | ; CHECK: store i32 %add.i1, i32* %dst.i1, align 4, !tbaa.struct ![[TAG]] |
| 199 | ; CHECK: store i32 %add.i2, i32* %dst.i2, align 8, !tbaa.struct ![[TAG]] |
| 200 | ; CHECK: store i32 %add.i3, i32* %dst.i3, align 4, !tbaa.struct ![[TAG]] |
| 201 | ; CHECK: ret void |
| 202 | %val = load <4 x i32> *%src, !tbaa.struct !5 |
| 203 | %add = add <4 x i32> %val, %val |
| 204 | store <4 x i32> %add, <4 x i32> *%dst, !tbaa.struct !5 |
| 205 | ret void |
| 206 | } |
| 207 | |
| 208 | ; Check that llvm.mem.parallel_loop_access information is preserved. |
| 209 | define void @f5(i32 %count, <4 x i32> *%src, <4 x i32> *%dst) { |
| 210 | ; CHECK-LABEL: @f5( |
| 211 | ; CHECK: %val.i0 = load i32* %this_src.i0, align 16, !llvm.mem.parallel_loop_access ![[TAG:[0-9]*]] |
| 212 | ; CHECK: %val.i1 = load i32* %this_src.i1, align 4, !llvm.mem.parallel_loop_access ![[TAG]] |
| 213 | ; CHECK: %val.i2 = load i32* %this_src.i2, align 8, !llvm.mem.parallel_loop_access ![[TAG]] |
| 214 | ; CHECK: %val.i3 = load i32* %this_src.i3, align 4, !llvm.mem.parallel_loop_access ![[TAG]] |
| 215 | ; CHECK: store i32 %add.i0, i32* %this_dst.i0, align 16, !llvm.mem.parallel_loop_access ![[TAG]] |
| 216 | ; CHECK: store i32 %add.i1, i32* %this_dst.i1, align 4, !llvm.mem.parallel_loop_access ![[TAG]] |
| 217 | ; CHECK: store i32 %add.i2, i32* %this_dst.i2, align 8, !llvm.mem.parallel_loop_access ![[TAG]] |
| 218 | ; CHECK: store i32 %add.i3, i32* %this_dst.i3, align 4, !llvm.mem.parallel_loop_access ![[TAG]] |
| 219 | ; CHECK: ret void |
| 220 | entry: |
| 221 | br label %loop |
| 222 | |
| 223 | loop: |
| 224 | %index = phi i32 [ 0, %entry ], [ %next_index, %loop ] |
| 225 | %this_src = getelementptr <4 x i32> *%src, i32 %index |
| 226 | %this_dst = getelementptr <4 x i32> *%dst, i32 %index |
| 227 | %val = load <4 x i32> *%this_src, !llvm.mem.parallel_loop_access !3 |
| 228 | %add = add <4 x i32> %val, %val |
| 229 | store <4 x i32> %add, <4 x i32> *%this_dst, !llvm.mem.parallel_loop_access !3 |
| 230 | %next_index = add i32 %index, -1 |
| 231 | %continue = icmp ne i32 %next_index, %count |
| 232 | br i1 %continue, label %loop, label %end, !llvm.loop !3 |
| 233 | |
| 234 | end: |
| 235 | ret void |
| 236 | } |
| 237 | |
| 238 | ; Check that fpmath information is preserved. |
| 239 | define <4 x float> @f6(<4 x float> %x) { |
| 240 | ; CHECK-LABEL: @f6( |
| 241 | ; CHECK: %x.i0 = extractelement <4 x float> %x, i32 0 |
| 242 | ; CHECK: %res.i0 = fadd float %x.i0, 1.0{{[e+0]*}}, !fpmath ![[TAG:[0-9]*]] |
| 243 | ; CHECK: %x.i1 = extractelement <4 x float> %x, i32 1 |
| 244 | ; CHECK: %res.i1 = fadd float %x.i1, 2.0{{[e+0]*}}, !fpmath ![[TAG]] |
| 245 | ; CHECK: %x.i2 = extractelement <4 x float> %x, i32 2 |
| 246 | ; CHECK: %res.i2 = fadd float %x.i2, 3.0{{[e+0]*}}, !fpmath ![[TAG]] |
| 247 | ; CHECK: %x.i3 = extractelement <4 x float> %x, i32 3 |
| 248 | ; CHECK: %res.i3 = fadd float %x.i3, 4.0{{[e+0]*}}, !fpmath ![[TAG]] |
| 249 | ; CHECK: %res.upto0 = insertelement <4 x float> undef, float %res.i0, i32 0 |
| 250 | ; CHECK: %res.upto1 = insertelement <4 x float> %res.upto0, float %res.i1, i32 1 |
| 251 | ; CHECK: %res.upto2 = insertelement <4 x float> %res.upto1, float %res.i2, i32 2 |
| 252 | ; CHECK: %res = insertelement <4 x float> %res.upto2, float %res.i3, i32 3 |
| 253 | ; CHECK: ret <4 x float> %res |
| 254 | %res = fadd <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>, |
| 255 | !fpmath !4 |
| 256 | ret <4 x float> %res |
| 257 | } |
| 258 | |
| 259 | ; Check that random metadata isn't kept. |
| 260 | define void @f7(<4 x i32> *%src, <4 x i32> *%dst) { |
| 261 | ; CHECK-LABEL: @f7( |
| 262 | ; CHECK-NOT: !foo |
| 263 | ; CHECK: ret void |
| 264 | %val = load <4 x i32> *%src, !foo !5 |
| 265 | %add = add <4 x i32> %val, %val |
| 266 | store <4 x i32> %add, <4 x i32> *%dst, !foo !5 |
| 267 | ret void |
| 268 | } |
| 269 | |
| 270 | ; Test GEP with vectors. |
| 271 | define void @f8(<4 x float *> *%dest, <4 x float *> %ptr0, <4 x i32> %i0, |
| 272 | float *%other) { |
| 273 | ; CHECK-LABEL: @f8( |
| 274 | ; CHECK: %dest.i0 = bitcast <4 x float*>* %dest to float** |
| 275 | ; CHECK: %dest.i1 = getelementptr float** %dest.i0, i32 1 |
| 276 | ; CHECK: %dest.i2 = getelementptr float** %dest.i0, i32 2 |
| 277 | ; CHECK: %dest.i3 = getelementptr float** %dest.i0, i32 3 |
| 278 | ; CHECK: %i0.i1 = extractelement <4 x i32> %i0, i32 1 |
| 279 | ; CHECK: %i0.i3 = extractelement <4 x i32> %i0, i32 3 |
| 280 | ; CHECK: %ptr0.i0 = extractelement <4 x float*> %ptr0, i32 0 |
| 281 | ; CHECK: %val.i0 = getelementptr float* %ptr0.i0, i32 100 |
| 282 | ; CHECK: %val.i1 = getelementptr float* %other, i32 %i0.i1 |
| 283 | ; CHECK: %ptr0.i2 = extractelement <4 x float*> %ptr0, i32 2 |
| 284 | ; CHECK: %val.i2 = getelementptr float* %ptr0.i2, i32 100 |
| 285 | ; CHECK: %ptr0.i3 = extractelement <4 x float*> %ptr0, i32 3 |
| 286 | ; CHECK: %val.i3 = getelementptr float* %ptr0.i3, i32 %i0.i3 |
| 287 | ; CHECK: store float* %val.i0, float** %dest.i0, align 32 |
| 288 | ; CHECK: store float* %val.i1, float** %dest.i1, align 8 |
| 289 | ; CHECK: store float* %val.i2, float** %dest.i2, align 16 |
| 290 | ; CHECK: store float* %val.i3, float** %dest.i3, align 8 |
| 291 | ; CHECK: ret void |
| 292 | %i1 = insertelement <4 x i32> %i0, i32 100, i32 0 |
| 293 | %i2 = insertelement <4 x i32> %i1, i32 100, i32 2 |
| 294 | %ptr1 = insertelement <4 x float *> %ptr0, float *%other, i32 1 |
| 295 | %val = getelementptr <4 x float *> %ptr1, <4 x i32> %i2 |
| 296 | store <4 x float *> %val, <4 x float *> *%dest |
| 297 | ret void |
| 298 | } |
| 299 | |
| 300 | ; Test the handling of unaligned loads. |
| 301 | define void @f9(<4 x float> *%dest, <4 x float> *%src) { |
| 302 | ; CHECK: @f9( |
| 303 | ; CHECK: %dest.i0 = bitcast <4 x float>* %dest to float* |
| 304 | ; CHECK: %dest.i1 = getelementptr float* %dest.i0, i32 1 |
| 305 | ; CHECK: %dest.i2 = getelementptr float* %dest.i0, i32 2 |
| 306 | ; CHECK: %dest.i3 = getelementptr float* %dest.i0, i32 3 |
| 307 | ; CHECK: %src.i0 = bitcast <4 x float>* %src to float* |
| 308 | ; CHECK: %val.i0 = load float* %src.i0, align 4 |
| 309 | ; CHECK: %src.i1 = getelementptr float* %src.i0, i32 1 |
| 310 | ; CHECK: %val.i1 = load float* %src.i1, align 4 |
| 311 | ; CHECK: %src.i2 = getelementptr float* %src.i0, i32 2 |
| 312 | ; CHECK: %val.i2 = load float* %src.i2, align 4 |
| 313 | ; CHECK: %src.i3 = getelementptr float* %src.i0, i32 3 |
| 314 | ; CHECK: %val.i3 = load float* %src.i3, align 4 |
| 315 | ; CHECK: store float %val.i0, float* %dest.i0, align 8 |
| 316 | ; CHECK: store float %val.i1, float* %dest.i1, align 4 |
| 317 | ; CHECK: store float %val.i2, float* %dest.i2, align 8 |
| 318 | ; CHECK: store float %val.i3, float* %dest.i3, align 4 |
| 319 | ; CHECK: ret void |
| 320 | %val = load <4 x float> *%src, align 4 |
| 321 | store <4 x float> %val, <4 x float> *%dest, align 8 |
| 322 | ret void |
| 323 | } |
| 324 | |
| 325 | ; ...and again with subelement alignment. |
| 326 | define void @f10(<4 x float> *%dest, <4 x float> *%src) { |
| 327 | ; CHECK: @f10( |
| 328 | ; CHECK: %dest.i0 = bitcast <4 x float>* %dest to float* |
| 329 | ; CHECK: %dest.i1 = getelementptr float* %dest.i0, i32 1 |
| 330 | ; CHECK: %dest.i2 = getelementptr float* %dest.i0, i32 2 |
| 331 | ; CHECK: %dest.i3 = getelementptr float* %dest.i0, i32 3 |
| 332 | ; CHECK: %src.i0 = bitcast <4 x float>* %src to float* |
| 333 | ; CHECK: %val.i0 = load float* %src.i0, align 1 |
| 334 | ; CHECK: %src.i1 = getelementptr float* %src.i0, i32 1 |
| 335 | ; CHECK: %val.i1 = load float* %src.i1, align 1 |
| 336 | ; CHECK: %src.i2 = getelementptr float* %src.i0, i32 2 |
| 337 | ; CHECK: %val.i2 = load float* %src.i2, align 1 |
| 338 | ; CHECK: %src.i3 = getelementptr float* %src.i0, i32 3 |
| 339 | ; CHECK: %val.i3 = load float* %src.i3, align 1 |
| 340 | ; CHECK: store float %val.i0, float* %dest.i0, align 2 |
| 341 | ; CHECK: store float %val.i1, float* %dest.i1, align 2 |
| 342 | ; CHECK: store float %val.i2, float* %dest.i2, align 2 |
| 343 | ; CHECK: store float %val.i3, float* %dest.i3, align 2 |
| 344 | ; CHECK: ret void |
| 345 | %val = load <4 x float> *%src, align 1 |
| 346 | store <4 x float> %val, <4 x float> *%dest, align 2 |
| 347 | ret void |
| 348 | } |
| 349 | |
| 350 | ; Test that sub-byte loads aren't scalarized. |
| 351 | define void @f11(<32 x i1> *%dest, <32 x i1> *%src0) { |
| 352 | ; CHECK: @f11( |
| 353 | ; CHECK: %val0 = load <32 x i1>* %src0 |
| 354 | ; CHECK: %val1 = load <32 x i1>* %src1 |
| 355 | ; CHECK: store <32 x i1> %and, <32 x i1>* %dest |
| 356 | ; CHECK: ret void |
| 357 | %src1 = getelementptr <32 x i1> *%src0, i32 1 |
| 358 | %val0 = load <32 x i1> *%src0 |
| 359 | %val1 = load <32 x i1> *%src1 |
| 360 | %and = and <32 x i1> %val0, %val1 |
| 361 | store <32 x i1> %and, <32 x i1> *%dest |
| 362 | ret void |
| 363 | } |
| 364 | |
| 365 | ; Test that variable inserts aren't scalarized. |
| 366 | define void @f12(<4 x i32> *%dest, <4 x i32> *%src, i32 %index) { |
| 367 | ; CHECK: @f12( |
| 368 | ; CHECK: %val1 = insertelement <4 x i32> %val0, i32 1, i32 %index |
| 369 | ; CHECK-DAG: %val1.i0 = extractelement <4 x i32> %val1, i32 0 |
| 370 | ; CHECK-DAG: %val1.i1 = extractelement <4 x i32> %val1, i32 1 |
| 371 | ; CHECK-DAG: %val1.i2 = extractelement <4 x i32> %val1, i32 2 |
| 372 | ; CHECK-DAG: %val1.i3 = extractelement <4 x i32> %val1, i32 3 |
| 373 | ; CHECK-DAG: %val2.i0 = shl i32 1, %val1.i0 |
| 374 | ; CHECK-DAG: %val2.i1 = shl i32 2, %val1.i1 |
| 375 | ; CHECK-DAG: %val2.i2 = shl i32 3, %val1.i2 |
| 376 | ; CHECK-DAG: %val2.i3 = shl i32 4, %val1.i3 |
| 377 | ; CHECK: ret void |
| 378 | %val0 = load <4 x i32> *%src |
| 379 | %val1 = insertelement <4 x i32> %val0, i32 1, i32 %index |
| 380 | %val2 = shl <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %val1 |
| 381 | store <4 x i32> %val2, <4 x i32> *%dest |
| 382 | ret void |
| 383 | } |
| 384 | |
Richard Sandiford | 3548cbb | 2013-12-23 14:45:00 +0000 | [diff] [blame] | 385 | ; Test vector GEPs with more than one index. |
| 386 | define void @f13(<4 x float *> *%dest, <4 x [4 x float] *> %ptr, <4 x i32> %i, |
| 387 | float *%other) { |
| 388 | ; CHECK-LABEL: @f13( |
| 389 | ; CHECK: %dest.i0 = bitcast <4 x float*>* %dest to float** |
| 390 | ; CHECK: %dest.i1 = getelementptr float** %dest.i0, i32 1 |
| 391 | ; CHECK: %dest.i2 = getelementptr float** %dest.i0, i32 2 |
| 392 | ; CHECK: %dest.i3 = getelementptr float** %dest.i0, i32 3 |
| 393 | ; CHECK: %i.i0 = extractelement <4 x i32> %i, i32 0 |
| 394 | ; CHECK: %ptr.i0 = extractelement <4 x [4 x float]*> %ptr, i32 0 |
| 395 | ; CHECK: %val.i0 = getelementptr inbounds [4 x float]* %ptr.i0, i32 0, i32 %i.i0 |
| 396 | ; CHECK: %i.i1 = extractelement <4 x i32> %i, i32 1 |
| 397 | ; CHECK: %ptr.i1 = extractelement <4 x [4 x float]*> %ptr, i32 1 |
| 398 | ; CHECK: %val.i1 = getelementptr inbounds [4 x float]* %ptr.i1, i32 1, i32 %i.i1 |
| 399 | ; CHECK: %i.i2 = extractelement <4 x i32> %i, i32 2 |
| 400 | ; CHECK: %ptr.i2 = extractelement <4 x [4 x float]*> %ptr, i32 2 |
| 401 | ; CHECK: %val.i2 = getelementptr inbounds [4 x float]* %ptr.i2, i32 2, i32 %i.i2 |
| 402 | ; CHECK: %i.i3 = extractelement <4 x i32> %i, i32 3 |
| 403 | ; CHECK: %ptr.i3 = extractelement <4 x [4 x float]*> %ptr, i32 3 |
| 404 | ; CHECK: %val.i3 = getelementptr inbounds [4 x float]* %ptr.i3, i32 3, i32 %i.i3 |
| 405 | ; CHECK: store float* %val.i0, float** %dest.i0, align 32 |
| 406 | ; CHECK: store float* %val.i1, float** %dest.i1, align 8 |
| 407 | ; CHECK: store float* %val.i2, float** %dest.i2, align 16 |
| 408 | ; CHECK: store float* %val.i3, float** %dest.i3, align 8 |
| 409 | ; CHECK: ret void |
| 410 | %val = getelementptr inbounds <4 x [4 x float] *> %ptr, |
| 411 | <4 x i32> <i32 0, i32 1, i32 2, i32 3>, |
| 412 | <4 x i32> %i |
| 413 | store <4 x float *> %val, <4 x float *> *%dest |
| 414 | ret void |
| 415 | } |
| 416 | |
Richard Sandiford | 1fb5c13 | 2013-12-23 14:51:56 +0000 | [diff] [blame] | 417 | ; Test combinations of vector and non-vector PHIs. |
| 418 | define <4 x float> @f14(<4 x float> %acc, i32 %count) { |
| 419 | ; CHECK-LABEL: @f14( |
| 420 | ; CHECK: %this_acc.i0 = phi float [ %acc.i0, %entry ], [ %next_acc.i0, %loop ] |
| 421 | ; CHECK: %this_acc.i1 = phi float [ %acc.i1, %entry ], [ %next_acc.i1, %loop ] |
| 422 | ; CHECK: %this_acc.i2 = phi float [ %acc.i2, %entry ], [ %next_acc.i2, %loop ] |
| 423 | ; CHECK: %this_acc.i3 = phi float [ %acc.i3, %entry ], [ %next_acc.i3, %loop ] |
| 424 | ; CHECK: %this_count = phi i32 [ %count, %entry ], [ %next_count, %loop ] |
| 425 | ; CHECK: %this_acc.upto0 = insertelement <4 x float> undef, float %this_acc.i0, i32 0 |
| 426 | ; CHECK: %this_acc.upto1 = insertelement <4 x float> %this_acc.upto0, float %this_acc.i1, i32 1 |
| 427 | ; CHECK: %this_acc.upto2 = insertelement <4 x float> %this_acc.upto1, float %this_acc.i2, i32 2 |
| 428 | ; CHECK: %this_acc = insertelement <4 x float> %this_acc.upto2, float %this_acc.i3, i32 3 |
| 429 | ; CHECK: ret <4 x float> %next_acc |
| 430 | entry: |
| 431 | br label %loop |
| 432 | |
| 433 | loop: |
| 434 | %this_acc = phi <4 x float> [ %acc, %entry ], [ %next_acc, %loop ] |
| 435 | %this_count = phi i32 [ %count, %entry ], [ %next_count, %loop ] |
| 436 | %foo = call <4 x float> @ext(<4 x float> %this_acc) |
| 437 | %next_acc = fadd <4 x float> %this_acc, %foo |
| 438 | %next_count = sub i32 %this_count, 1 |
| 439 | %cmp = icmp eq i32 %next_count, 0 |
| 440 | br i1 %cmp, label %loop, label %exit |
| 441 | |
| 442 | exit: |
| 443 | ret <4 x float> %next_acc |
| 444 | } |
| 445 | |
Richard Sandiford | 8ee1b77 | 2013-11-22 16:58:05 +0000 | [diff] [blame] | 446 | !0 = metadata !{ metadata !"root" } |
| 447 | !1 = metadata !{ metadata !"set1", metadata !0 } |
| 448 | !2 = metadata !{ metadata !"set2", metadata !0 } |
| 449 | !3 = metadata !{ metadata !3 } |
| 450 | !4 = metadata !{ float 4.0 } |
| 451 | !5 = metadata !{ i64 0, i64 8, null } |