Wei Ding | 74da350 | 2017-04-12 23:51:47 +0000 | [diff] [blame] | 1 | ; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck -check-prefix=GCN %s |
| 2 | |
| 3 | ; GCN-LABEL: {{^}}checkTwoBlocksWithUniformBranch |
| 4 | ; GCN: BB0_2 |
| 5 | ; GCN: v_add |
| 6 | define amdgpu_kernel void @checkTwoBlocksWithUniformBranch(i32 addrspace(1)* nocapture %out, i32 %width, float %xPos, float %yPos, float %xStep, float %yStep, i32 %maxIter) { |
| 7 | entry: |
| 8 | %conv = call i32 @llvm.amdgcn.workitem.id.x() #1 |
| 9 | %rem = urem i32 %conv, %width |
| 10 | %div = udiv i32 %conv, %width |
| 11 | %conv1 = sitofp i32 %rem to float |
| 12 | %x = tail call float @llvm.fmuladd.f32(float %xStep, float %conv1, float %xPos) |
| 13 | %conv2 = sitofp i32 %div to float |
| 14 | %y = tail call float @llvm.fmuladd.f32(float %yStep, float %conv2, float %yPos) |
| 15 | %yy = fmul float %y, %y |
| 16 | %xy = tail call float @llvm.fmuladd.f32(float %x, float %x, float %yy) |
| 17 | %cmp01 = fcmp ole float %xy, 4.000000e+00 |
| 18 | %cmp02 = icmp ne i32 %maxIter, 0 |
| 19 | %cond01 = and i1 %cmp02, %cmp01 |
| 20 | br i1 %cond01, label %for.body.preheader, label %for.end |
| 21 | |
| 22 | for.body.preheader: ; preds = %entry |
| 23 | br label %for.body |
| 24 | |
| 25 | for.body: ; preds = %for.body.preheader, %for.body |
| 26 | %x_val = phi float [ %call8, %for.body ], [ %x, %for.body.preheader ] |
| 27 | %iter_val = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] |
| 28 | %y_val = phi float [ %call9, %for.body ], [ %y, %for.body.preheader ] |
| 29 | %sub = fsub float -0.000000e+00, %y_val |
| 30 | %call7 = tail call float @llvm.fmuladd.f32(float %x_val, float %x_val, float %x) #1 |
| 31 | %call8 = tail call float @llvm.fmuladd.f32(float %sub, float %y_val, float %call7) #1 |
| 32 | %mul = fmul float %x_val, 2.000000e+00 |
| 33 | %call9 = tail call float @llvm.fmuladd.f32(float %mul, float %y_val, float %y) #1 |
| 34 | %inc = add nuw i32 %iter_val, 1 |
| 35 | %mul3 = fmul float %call9, %call9 |
| 36 | %0 = tail call float @llvm.fmuladd.f32(float %call8, float %call8, float %mul3) |
| 37 | %cmp = fcmp ole float %0, 4.000000e+00 |
| 38 | %cmp5 = icmp ult i32 %inc, %maxIter |
| 39 | %or.cond = and i1 %cmp5, %cmp |
| 40 | br i1 %or.cond, label %for.body, label %for.end.loopexit |
| 41 | |
| 42 | for.end.loopexit: ; preds = %for.body |
| 43 | br label %for.end |
| 44 | |
| 45 | for.end: ; preds = %for.end.loopexit, %entry |
| 46 | %iter.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %for.end.loopexit ] |
| 47 | %idxprom = ashr exact i32 %conv, 32 |
| 48 | %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %idxprom |
| 49 | store i32 %iter.0.lcssa, i32 addrspace(1)* %arrayidx, align 4 |
| 50 | ret void |
| 51 | } |
| 52 | |
| 53 | ; Function Attrs: nounwind readnone |
| 54 | declare i32 @llvm.amdgcn.workitem.id.x() #0 |
| 55 | declare float @llvm.fmuladd.f32(float, float, float) #1 |
| 56 | |
| 57 | attributes #0 = { nounwind readnone } |
| 58 | attributes #1 = { readnone } |