Yaxun Liu | c41e2f6 | 2017-12-15 03:56:57 +0000 | [diff] [blame] | 1 | ; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgizcl -verify-machineinstrs < %s | FileCheck %s |
| 2 | |
| 3 | %struct.wombat = type { [4 x i32], [4 x i32], [4 x i32] } |
| 4 | |
| 5 | define amdgpu_kernel void @wobble(i8 addrspace(1)* nocapture readonly %arg) #0 !dbg !4 { |
| 6 | bb: |
| 7 | %tmp = load i32, i32 addrspace(1)* undef, align 4 |
| 8 | %tmp1 = load <4 x float>, <4 x float> addrspace(1)* undef, align 16 |
| 9 | %tmp2 = sext i32 %tmp to i64 |
| 10 | %tmp3 = shufflevector <4 x float> undef, <4 x float> %tmp1, <2 x i32> <i32 3, i32 7> |
| 11 | %tmp4 = call float @barney() #2 |
| 12 | %tmp5 = getelementptr inbounds i8, i8 addrspace(1)* %arg, i64 0 |
| 13 | %tmp6 = bitcast i8 addrspace(1)* %tmp5 to <2 x float> addrspace(1)* |
| 14 | %tmp7 = getelementptr inbounds i8, i8 addrspace(1)* %arg, i64 0 |
| 15 | %tmp8 = bitcast i8 addrspace(1)* %tmp7 to %struct.wombat addrspace(1)* |
| 16 | %tmp9 = getelementptr inbounds %struct.wombat, %struct.wombat addrspace(1)* %tmp8, i64 %tmp2, i32 2, i64 0 |
| 17 | %tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4 |
| 18 | %tmp11 = sext i32 %tmp10 to i64 |
| 19 | %tmp12 = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %tmp6, i64 %tmp11 |
| 20 | %tmp13 = bitcast <2 x float> addrspace(1)* %tmp12 to i64 addrspace(1)* |
| 21 | %tmp14 = getelementptr inbounds i8, i8 addrspace(1)* %arg, i64 undef |
| 22 | %tmp15 = bitcast i8 addrspace(1)* %tmp14 to <4 x float> addrspace(1)* |
| 23 | %tmp16 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %tmp15, i64 undef |
| 24 | %tmp17 = load <4 x float>, <4 x float> addrspace(1)* %tmp16, align 16 |
Sanjay Patel | 672ad32 | 2018-03-08 21:30:56 +0000 | [diff] [blame] | 25 | %tmp18 = fsub <4 x float> %tmp17, %tmp17 |
| 26 | %ext = extractelement <4 x float> %tmp18, i32 1 |
| 27 | %tmp19 = fadd float %ext, 0.000000e+00 |
Yaxun Liu | c41e2f6 | 2017-12-15 03:56:57 +0000 | [diff] [blame] | 28 | %tmp20 = fcmp oeq float %tmp19, 0.000000e+00 |
| 29 | br i1 %tmp20, label %bb21, label %bb25 |
| 30 | |
| 31 | bb21: ; preds = %bb |
Sanjay Patel | 672ad32 | 2018-03-08 21:30:56 +0000 | [diff] [blame] | 32 | %tmp22 = fmul <4 x float> %tmp18, %tmp18 |
| 33 | %tmp23 = fadd <4 x float> %tmp22, %tmp22 |
| 34 | %tmp24 = fmul <4 x float> %tmp23, %tmp23 |
Yaxun Liu | c41e2f6 | 2017-12-15 03:56:57 +0000 | [diff] [blame] | 35 | br label %bb28 |
| 36 | |
| 37 | bb25: ; preds = %bb |
| 38 | %tmp26 = insertelement <4 x float> undef, float 0.000000e+00, i32 1 |
| 39 | %tmp27 = insertelement <4 x float> %tmp26, float undef, i32 2 |
| 40 | br label %bb28 |
| 41 | |
| 42 | bb28: ; preds = %bb25, %bb21 |
| 43 | %tmp29 = phi <4 x float> [ %tmp27, %bb25 ], [ %tmp24, %bb21 ] |
| 44 | store <4 x float> %tmp29, <4 x float> addrspace(5)* undef, align 16 |
| 45 | %tmp30 = getelementptr inbounds %struct.wombat, %struct.wombat addrspace(1)* %tmp8, i64 %tmp2, i32 2, i64 2 |
| 46 | %tmp31 = load i32, i32 addrspace(1)* %tmp30, align 4 |
| 47 | %tmp32 = sext i32 %tmp31 to i64 |
| 48 | %tmp33 = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %tmp6, i64 %tmp32 |
| 49 | %tmp34 = bitcast <2 x float> addrspace(1)* %tmp33 to i64 addrspace(1)* |
| 50 | %tmp35 = load i64, i64 addrspace(1)* %tmp34, align 8 |
| 51 | %tmp36 = load i32, i32 addrspace(1)* undef, align 4 |
| 52 | %tmp37 = sext i32 %tmp36 to i64 |
| 53 | %tmp38 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* null, i64 %tmp37 |
| 54 | %tmp39 = load <4 x float>, <4 x float> addrspace(1)* %tmp38, align 16 |
| 55 | %tmp40 = load <4 x float>, <4 x float> addrspace(1)* undef, align 16 |
| 56 | %tmp41 = fsub <4 x float> zeroinitializer, %tmp40 |
| 57 | %tmp42 = fsub <4 x float> %tmp39, %tmp40 |
| 58 | %tmp43 = extractelement <4 x float> %tmp40, i32 1 |
| 59 | %tmp44 = fsub float %tmp43, undef |
| 60 | %tmp45 = fadd float undef, undef |
| 61 | %tmp46 = fdiv float %tmp44, %tmp45 |
| 62 | %tmp47 = insertelement <4 x float> undef, float %tmp46, i32 0 |
| 63 | %tmp48 = shufflevector <4 x float> %tmp47, <4 x float> undef, <4 x i32> zeroinitializer |
| 64 | %tmp49 = fsub <4 x float> %tmp48, %tmp40 |
| 65 | %tmp50 = extractelement <4 x float> %tmp41, i32 1 |
| 66 | %tmp51 = extractelement <4 x float> %tmp42, i32 2 |
| 67 | %tmp52 = fmul float undef, undef |
| 68 | %tmp53 = fadd float %tmp52, undef |
| 69 | %tmp54 = fadd float %tmp51, %tmp53 |
| 70 | %tmp55 = extractelement <4 x float> %tmp49, i32 1 |
| 71 | %tmp56 = fmul float %tmp55, %tmp50 |
| 72 | %tmp57 = fmul float %tmp54, %tmp56 |
| 73 | %tmp58 = fdiv float %tmp57, 0.000000e+00 |
| 74 | ; CHECK: ;DEBUG_VALUE: foo:var <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef] |
| 75 | call void @llvm.dbg.value(metadata <4 x float> %tmp29, metadata !3, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)) #2, !dbg !5 |
| 76 | %tmp59 = bitcast i64 %tmp35 to <2 x float> |
| 77 | %tmp60 = insertelement <2 x float> undef, float %tmp58, i32 0 |
| 78 | %tmp61 = shufflevector <2 x float> %tmp60, <2 x float> undef, <2 x i32> zeroinitializer |
| 79 | %tmp62 = fmul <2 x float> %tmp61, undef |
| 80 | %tmp63 = fsub <2 x float> %tmp62, %tmp59 |
| 81 | %tmp64 = extractelement <2 x float> %tmp63, i64 0 |
| 82 | call void @eggs(float %tmp64) #2 |
| 83 | store <2 x float> %tmp3, <2 x float> addrspace(1)* undef, align 8 |
| 84 | store float 0.000000e+00, float addrspace(1)* undef, align 4 |
| 85 | ret void |
| 86 | } |
| 87 | |
| 88 | declare float @barney() #2 |
| 89 | declare void @eggs(float) #2 |
| 90 | declare void @llvm.dbg.value(metadata, metadata, metadata) #1 |
| 91 | |
| 92 | attributes #0 = { convergent nounwind "target-cpu"="gfx900" "target-features"="+fp32-denormals" } |
| 93 | attributes #1 = { nounwind readnone speculatable } |
| 94 | attributes #2 = { nounwind } |
| 95 | |
| 96 | !llvm.dbg.cu = !{!0} |
| 97 | !llvm.module.flags = !{!2} |
| 98 | |
| 99 | !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) |
| 100 | !1 = !DIFile(filename: "foo.cl", directory: "/tmp") |
| 101 | !2 = !{i32 2, !"Debug Info Version", i32 3} |
| 102 | !3 = !DILocalVariable(name: "var", arg: 8, scope: !4) |
| 103 | !4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, type: !12, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, unit: !0) |
| 104 | !5 = !DILocation(line: 69, scope: !4) |
| 105 | !12 = !DISubroutineType(types: !13) |
| 106 | !13 = !{null, !14} |
| 107 | !14 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) |