Matt Arsenault | ad55ee5 | 2016-12-06 01:02:51 +0000 | [diff] [blame] | 1 | ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s |
| 2 | ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s |
Tom Stellard | 2f7cdda | 2013-08-06 23:08:28 +0000 | [diff] [blame] | 3 | |
| 4 | ; This test checks that no VGPR to SGPR copies are created by the register |
| 5 | ; allocator. |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 6 | |
Matt Arsenault | 018179f | 2016-01-26 04:38:08 +0000 | [diff] [blame] | 7 | |
| 8 | declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 |
| 9 | |
| 10 | |
Tom Stellard | 79243d9 | 2014-10-01 17:15:17 +0000 | [diff] [blame] | 11 | ; CHECK-LABEL: {{^}}phi1: |
Tom Stellard | 3ae5887 | 2014-11-21 22:00:13 +0000 | [diff] [blame] | 12 | ; CHECK: s_buffer_load_dword [[DST:s[0-9]]], {{s\[[0-9]+:[0-9]+\]}}, 0x0 |
Tom Stellard | 326d6ec | 2014-11-05 14:50:53 +0000 | [diff] [blame] | 13 | ; CHECK: v_mov_b32_e32 v{{[0-9]}}, [[DST]] |
Nicolai Haehnle | df3a20c | 2016-04-06 19:40:20 +0000 | [diff] [blame] | 14 | define amdgpu_ps void @phi1(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 { |
Tom Stellard | 2f7cdda | 2013-08-06 23:08:28 +0000 | [diff] [blame] | 15 | main_body: |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 16 | %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0 |
| 17 | %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0 |
| 18 | %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 0) |
| 19 | %tmp22 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 16) |
| 20 | %tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 32) |
| 21 | %tmp24 = fptosi float %tmp22 to i32 |
| 22 | %tmp25 = icmp ne i32 %tmp24, 0 |
| 23 | br i1 %tmp25, label %ENDIF, label %ELSE |
Tom Stellard | 2f7cdda | 2013-08-06 23:08:28 +0000 | [diff] [blame] | 24 | |
| 25 | ELSE: ; preds = %main_body |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 26 | %tmp26 = fsub float -0.000000e+00, %tmp21 |
Tom Stellard | 2f7cdda | 2013-08-06 23:08:28 +0000 | [diff] [blame] | 27 | br label %ENDIF |
| 28 | |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 29 | ENDIF: ; preds = %ELSE, %main_body |
| 30 | %temp.0 = phi float [ %tmp26, %ELSE ], [ %tmp21, %main_body ] |
| 31 | %tmp27 = fadd float %temp.0, %tmp23 |
| 32 | call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %tmp27, float %tmp27, float 0.000000e+00, float 1.000000e+00) |
Tom Stellard | 2f7cdda | 2013-08-06 23:08:28 +0000 | [diff] [blame] | 33 | ret void |
| 34 | } |
| 35 | |
Tom Stellard | 15e4811 | 2013-08-22 20:21:02 +0000 | [diff] [blame] | 36 | ; Make sure this program doesn't crash |
Tom Stellard | 79243d9 | 2014-10-01 17:15:17 +0000 | [diff] [blame] | 37 | ; CHECK-LABEL: {{^}}phi2: |
Nicolai Haehnle | df3a20c | 2016-04-06 19:40:20 +0000 | [diff] [blame] | 38 | define amdgpu_ps void @phi2(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 { |
Tom Stellard | 15e4811 | 2013-08-22 20:21:02 +0000 | [diff] [blame] | 39 | main_body: |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 40 | %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0 |
| 41 | %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0 |
| 42 | %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 16) |
| 43 | %tmp22 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 32) |
| 44 | %tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 36) |
| 45 | %tmp24 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 40) |
| 46 | %tmp25 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 48) |
| 47 | %tmp26 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 52) |
| 48 | %tmp27 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 56) |
| 49 | %tmp28 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 64) |
| 50 | %tmp29 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 68) |
| 51 | %tmp30 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 72) |
| 52 | %tmp31 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 76) |
| 53 | %tmp32 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 80) |
| 54 | %tmp33 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 84) |
| 55 | %tmp34 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 88) |
| 56 | %tmp35 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 92) |
Matt Arsenault | 018179f | 2016-01-26 04:38:08 +0000 | [diff] [blame] | 57 | %tmp36 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %arg2, i32 0 |
| 58 | %tmp37 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp36, !tbaa !0 |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 59 | %tmp38 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg1, i32 0 |
| 60 | %tmp39 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp38, !tbaa !0 |
| 61 | %tmp40 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %arg3, <2 x i32> %arg5) |
| 62 | %tmp41 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %arg3, <2 x i32> %arg5) |
| 63 | %tmp42 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %arg3, <2 x i32> %arg5) |
| 64 | %tmp43 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %arg3, <2 x i32> %arg5) |
| 65 | %tmp44 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %arg3, <2 x i32> %arg5) |
| 66 | %tmp45 = bitcast float %tmp40 to i32 |
| 67 | %tmp46 = bitcast float %tmp41 to i32 |
| 68 | %tmp47 = insertelement <2 x i32> undef, i32 %tmp45, i32 0 |
| 69 | %tmp48 = insertelement <2 x i32> %tmp47, i32 %tmp46, i32 1 |
Matt Arsenault | 018179f | 2016-01-26 04:38:08 +0000 | [diff] [blame] | 70 | %tmp39.bc = bitcast <16 x i8> %tmp39 to <4 x i32> |
| 71 | %tmp49 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp48, <8 x i32> %tmp37, <4 x i32> %tmp39.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 72 | %tmp50 = extractelement <4 x float> %tmp49, i32 2 |
| 73 | %tmp51 = call float @fabs(float %tmp50) |
| 74 | %tmp52 = fmul float %tmp42, %tmp42 |
| 75 | %tmp53 = fmul float %tmp43, %tmp43 |
| 76 | %tmp54 = fadd float %tmp53, %tmp52 |
| 77 | %tmp55 = fmul float %tmp44, %tmp44 |
| 78 | %tmp56 = fadd float %tmp54, %tmp55 |
| 79 | %tmp57 = call float @llvm.amdgcn.rsq.f32(float %tmp56) |
| 80 | %tmp58 = fmul float %tmp42, %tmp57 |
| 81 | %tmp59 = fmul float %tmp43, %tmp57 |
| 82 | %tmp60 = fmul float %tmp44, %tmp57 |
| 83 | %tmp61 = fmul float %tmp58, %tmp22 |
| 84 | %tmp62 = fmul float %tmp59, %tmp23 |
| 85 | %tmp63 = fadd float %tmp62, %tmp61 |
| 86 | %tmp64 = fmul float %tmp60, %tmp24 |
| 87 | %tmp65 = fadd float %tmp63, %tmp64 |
| 88 | %tmp66 = fsub float -0.000000e+00, %tmp25 |
| 89 | %tmp67 = fmul float %tmp65, %tmp51 |
| 90 | %tmp68 = fadd float %tmp67, %tmp66 |
| 91 | %tmp69 = fmul float %tmp26, %tmp68 |
| 92 | %tmp70 = fmul float %tmp27, %tmp68 |
| 93 | %tmp71 = call float @fabs(float %tmp69) |
| 94 | %tmp72 = fcmp olt float 0x3EE4F8B580000000, %tmp71 |
| 95 | %tmp73 = sext i1 %tmp72 to i32 |
| 96 | %tmp74 = bitcast i32 %tmp73 to float |
| 97 | %tmp75 = bitcast float %tmp74 to i32 |
| 98 | %tmp76 = icmp ne i32 %tmp75, 0 |
| 99 | br i1 %tmp76, label %IF, label %ENDIF |
Tom Stellard | 15e4811 | 2013-08-22 20:21:02 +0000 | [diff] [blame] | 100 | |
| 101 | IF: ; preds = %main_body |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 102 | %tmp77 = fsub float -0.000000e+00, %tmp69 |
Matt Arsenault | 8aa5678 | 2016-01-23 05:42:49 +0000 | [diff] [blame] | 103 | %tmp78 = call float @llvm.exp2.f32(float %tmp77) |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 104 | %tmp79 = fsub float -0.000000e+00, %tmp78 |
| 105 | %tmp80 = fadd float 1.000000e+00, %tmp79 |
| 106 | %tmp81 = fdiv float 1.000000e+00, %tmp69 |
| 107 | %tmp82 = fmul float %tmp80, %tmp81 |
| 108 | %tmp83 = fmul float %tmp31, %tmp82 |
Tom Stellard | 15e4811 | 2013-08-22 20:21:02 +0000 | [diff] [blame] | 109 | br label %ENDIF |
| 110 | |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 111 | ENDIF: ; preds = %IF, %main_body |
| 112 | %temp4.0 = phi float [ %tmp83, %IF ], [ %tmp31, %main_body ] |
| 113 | %tmp84 = call float @fabs(float %tmp70) |
| 114 | %tmp85 = fcmp olt float 0x3EE4F8B580000000, %tmp84 |
| 115 | %tmp86 = sext i1 %tmp85 to i32 |
| 116 | %tmp87 = bitcast i32 %tmp86 to float |
| 117 | %tmp88 = bitcast float %tmp87 to i32 |
| 118 | %tmp89 = icmp ne i32 %tmp88, 0 |
| 119 | br i1 %tmp89, label %IF25, label %ENDIF24 |
Tom Stellard | 15e4811 | 2013-08-22 20:21:02 +0000 | [diff] [blame] | 120 | |
| 121 | IF25: ; preds = %ENDIF |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 122 | %tmp90 = fsub float -0.000000e+00, %tmp70 |
Matt Arsenault | 8aa5678 | 2016-01-23 05:42:49 +0000 | [diff] [blame] | 123 | %tmp91 = call float @llvm.exp2.f32(float %tmp90) |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 124 | %tmp92 = fsub float -0.000000e+00, %tmp91 |
| 125 | %tmp93 = fadd float 1.000000e+00, %tmp92 |
| 126 | %tmp94 = fdiv float 1.000000e+00, %tmp70 |
| 127 | %tmp95 = fmul float %tmp93, %tmp94 |
| 128 | %tmp96 = fmul float %tmp35, %tmp95 |
Tom Stellard | 15e4811 | 2013-08-22 20:21:02 +0000 | [diff] [blame] | 129 | br label %ENDIF24 |
| 130 | |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 131 | ENDIF24: ; preds = %IF25, %ENDIF |
| 132 | %temp8.0 = phi float [ %tmp96, %IF25 ], [ %tmp35, %ENDIF ] |
| 133 | %tmp97 = fmul float %tmp28, %temp4.0 |
| 134 | %tmp98 = fmul float %tmp29, %temp4.0 |
| 135 | %tmp99 = fmul float %tmp30, %temp4.0 |
| 136 | %tmp100 = fmul float %tmp32, %temp8.0 |
| 137 | %tmp101 = fadd float %tmp100, %tmp97 |
| 138 | %tmp102 = fmul float %tmp33, %temp8.0 |
| 139 | %tmp103 = fadd float %tmp102, %tmp98 |
| 140 | %tmp104 = fmul float %tmp34, %temp8.0 |
| 141 | %tmp105 = fadd float %tmp104, %tmp99 |
| 142 | %tmp106 = call float @llvm.pow.f32(float %tmp51, float %tmp21) |
| 143 | %tmp107 = fsub float -0.000000e+00, %tmp101 |
| 144 | %tmp108 = fmul float %tmp107, %tmp106 |
| 145 | %tmp109 = fsub float -0.000000e+00, %tmp103 |
| 146 | %tmp110 = fmul float %tmp109, %tmp106 |
| 147 | %tmp111 = fsub float -0.000000e+00, %tmp105 |
| 148 | %tmp112 = fmul float %tmp111, %tmp106 |
| 149 | %tmp113 = call i32 @llvm.SI.packf16(float %tmp108, float %tmp110) |
| 150 | %tmp114 = bitcast i32 %tmp113 to float |
| 151 | %tmp115 = call i32 @llvm.SI.packf16(float %tmp112, float 1.000000e+00) |
| 152 | %tmp116 = bitcast i32 %tmp115 to float |
| 153 | call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp114, float %tmp116, float %tmp114, float %tmp116) |
Tom Stellard | 15e4811 | 2013-08-22 20:21:02 +0000 | [diff] [blame] | 154 | ret void |
| 155 | } |
| 156 | |
Tom Stellard | 2f7cdda | 2013-08-06 23:08:28 +0000 | [diff] [blame] | 157 | ; We just want ot make sure the program doesn't crash |
Tom Stellard | 79243d9 | 2014-10-01 17:15:17 +0000 | [diff] [blame] | 158 | ; CHECK-LABEL: {{^}}loop: |
Nicolai Haehnle | df3a20c | 2016-04-06 19:40:20 +0000 | [diff] [blame] | 159 | define amdgpu_ps void @loop(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 { |
Tom Stellard | 2f7cdda | 2013-08-06 23:08:28 +0000 | [diff] [blame] | 160 | main_body: |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 161 | %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0 |
| 162 | %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0 |
| 163 | %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 0) |
| 164 | %tmp22 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 4) |
| 165 | %tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 8) |
| 166 | %tmp24 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 12) |
| 167 | %tmp25 = fptosi float %tmp24 to i32 |
| 168 | %tmp26 = bitcast i32 %tmp25 to float |
| 169 | %tmp27 = bitcast float %tmp26 to i32 |
Tom Stellard | 2f7cdda | 2013-08-06 23:08:28 +0000 | [diff] [blame] | 170 | br label %LOOP |
| 171 | |
| 172 | LOOP: ; preds = %ENDIF, %main_body |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 173 | %temp4.0 = phi float [ %tmp21, %main_body ], [ %temp5.0, %ENDIF ] |
| 174 | %temp5.0 = phi float [ %tmp22, %main_body ], [ %temp6.0, %ENDIF ] |
| 175 | %temp6.0 = phi float [ %tmp23, %main_body ], [ %temp4.0, %ENDIF ] |
| 176 | %temp8.0 = phi float [ 0.000000e+00, %main_body ], [ %tmp36, %ENDIF ] |
| 177 | %tmp28 = bitcast float %temp8.0 to i32 |
| 178 | %tmp29 = icmp sge i32 %tmp28, %tmp27 |
| 179 | %tmp30 = sext i1 %tmp29 to i32 |
| 180 | %tmp31 = bitcast i32 %tmp30 to float |
| 181 | %tmp32 = bitcast float %tmp31 to i32 |
| 182 | %tmp33 = icmp ne i32 %tmp32, 0 |
| 183 | br i1 %tmp33, label %IF, label %ENDIF |
Tom Stellard | 2f7cdda | 2013-08-06 23:08:28 +0000 | [diff] [blame] | 184 | |
| 185 | IF: ; preds = %LOOP |
| 186 | call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %temp4.0, float %temp5.0, float %temp6.0, float 1.000000e+00) |
| 187 | ret void |
| 188 | |
| 189 | ENDIF: ; preds = %LOOP |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 190 | %tmp34 = bitcast float %temp8.0 to i32 |
| 191 | %tmp35 = add i32 %tmp34, 1 |
| 192 | %tmp36 = bitcast i32 %tmp35 to float |
Tom Stellard | 2f7cdda | 2013-08-06 23:08:28 +0000 | [diff] [blame] | 193 | br label %LOOP |
| 194 | } |
| 195 | |
| 196 | ; Function Attrs: nounwind readnone |
| 197 | declare float @llvm.SI.load.const(<16 x i8>, i32) #1 |
| 198 | |
| 199 | ; Function Attrs: readonly |
| 200 | declare float @fabs(float) #2 |
| 201 | |
| 202 | declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) |
| 203 | |
Tom Stellard | 15e4811 | 2013-08-22 20:21:02 +0000 | [diff] [blame] | 204 | ; Function Attrs: nounwind readnone |
| 205 | declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 |
| 206 | |
| 207 | ; Function Attrs: nounwind readnone |
Matt Arsenault | 018179f | 2016-01-26 04:38:08 +0000 | [diff] [blame] | 208 | declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <8 x i32>, <16 x i8>, i32) #1 |
Tom Stellard | 15e4811 | 2013-08-22 20:21:02 +0000 | [diff] [blame] | 209 | |
| 210 | ; Function Attrs: readnone |
Matt Arsenault | 45f8216 | 2016-07-11 23:35:48 +0000 | [diff] [blame] | 211 | declare float @llvm.amdgcn.rsq.f32(float) #1 |
Tom Stellard | 15e4811 | 2013-08-22 20:21:02 +0000 | [diff] [blame] | 212 | |
Matt Arsenault | 8aa5678 | 2016-01-23 05:42:49 +0000 | [diff] [blame] | 213 | declare float @llvm.exp2.f32(float) #1 |
Tom Stellard | 15e4811 | 2013-08-22 20:21:02 +0000 | [diff] [blame] | 214 | |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 215 | ; Function Attrs: nounwind readnone |
| 216 | declare float @llvm.pow.f32(float, float) #1 |
Tom Stellard | 15e4811 | 2013-08-22 20:21:02 +0000 | [diff] [blame] | 217 | |
| 218 | ; Function Attrs: nounwind readnone |
| 219 | declare i32 @llvm.SI.packf16(float, float) #1 |
Tom Stellard | 519ae39 | 2013-11-15 18:26:45 +0000 | [diff] [blame] | 220 | |
| 221 | ; This checks for a bug in the FixSGPRCopies pass where VReg96 |
| 222 | ; registers were being identified as an SGPR regclass which was causing |
| 223 | ; an assertion failure. |
| 224 | |
Tom Stellard | 79243d9 | 2014-10-01 17:15:17 +0000 | [diff] [blame] | 225 | ; CHECK-LABEL: {{^}}sample_v3: |
Matt Arsenault | ad55ee5 | 2016-12-06 01:02:51 +0000 | [diff] [blame] | 226 | ; CHECK: v_mov_b32_e32 v[[SAMPLE_LO:[0-9]+]], 11 |
| 227 | ; CHECK: v_mov_b32_e32 v[[SAMPLE_HI:[0-9]+]], 13 |
| 228 | ; CHECK: s_branch |
| 229 | |
| 230 | ; CHECK-DAG: v_mov_b32_e32 v[[SAMPLE_LO:[0-9]+]], 5 |
| 231 | ; CHECK-DAG: v_mov_b32_e32 v[[SAMPLE_HI:[0-9]+]], 7 |
| 232 | |
| 233 | ; CHECK: BB{{[0-9]+_[0-9]+}}: |
| 234 | ; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[SAMPLE_LO]]:[[SAMPLE_HI]]{{\]}} |
Tom Stellard | 326d6ec | 2014-11-05 14:50:53 +0000 | [diff] [blame] | 235 | ; CHECK: exp |
| 236 | ; CHECK: s_endpgm |
Nicolai Haehnle | df3a20c | 2016-04-06 19:40:20 +0000 | [diff] [blame] | 237 | define amdgpu_ps void @sample_v3([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 { |
Tom Stellard | 519ae39 | 2013-11-15 18:26:45 +0000 | [diff] [blame] | 238 | entry: |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 239 | %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0 |
| 240 | %tmp21 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0 |
| 241 | %tmp22 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 16) |
Matt Arsenault | 018179f | 2016-01-26 04:38:08 +0000 | [diff] [blame] | 242 | %tmp23 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 0 |
| 243 | %tmp24 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp23, !tbaa !0 |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 244 | %tmp25 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 0 |
| 245 | %tmp26 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp25, !tbaa !0 |
| 246 | %tmp27 = fcmp oeq float %tmp22, 0.000000e+00 |
Matt Arsenault | 018179f | 2016-01-26 04:38:08 +0000 | [diff] [blame] | 247 | %tmp26.bc = bitcast <16 x i8> %tmp26 to <4 x i32> |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 248 | br i1 %tmp27, label %if, label %else |
Tom Stellard | 519ae39 | 2013-11-15 18:26:45 +0000 | [diff] [blame] | 249 | |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 250 | if: ; preds = %entry |
Matt Arsenault | ad55ee5 | 2016-12-06 01:02:51 +0000 | [diff] [blame] | 251 | %val.if = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 11, i32 13>, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) |
Tom Stellard | 519ae39 | 2013-11-15 18:26:45 +0000 | [diff] [blame] | 252 | %val.if.0 = extractelement <4 x float> %val.if, i32 0 |
| 253 | %val.if.1 = extractelement <4 x float> %val.if, i32 1 |
| 254 | %val.if.2 = extractelement <4 x float> %val.if, i32 2 |
| 255 | br label %endif |
| 256 | |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 257 | else: ; preds = %entry |
Matt Arsenault | ad55ee5 | 2016-12-06 01:02:51 +0000 | [diff] [blame] | 258 | %val.else = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 5, i32 7>, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) |
Tom Stellard | 519ae39 | 2013-11-15 18:26:45 +0000 | [diff] [blame] | 259 | %val.else.0 = extractelement <4 x float> %val.else, i32 0 |
| 260 | %val.else.1 = extractelement <4 x float> %val.else, i32 1 |
| 261 | %val.else.2 = extractelement <4 x float> %val.else, i32 2 |
| 262 | br label %endif |
| 263 | |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 264 | endif: ; preds = %else, %if |
| 265 | %val.0 = phi float [ %val.if.0, %if ], [ %val.else.0, %else ] |
| 266 | %val.1 = phi float [ %val.if.1, %if ], [ %val.else.1, %else ] |
| 267 | %val.2 = phi float [ %val.if.2, %if ], [ %val.else.2, %else ] |
| 268 | call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %val.0, float %val.1, float %val.2, float 0.000000e+00) |
Tom Stellard | 519ae39 | 2013-11-15 18:26:45 +0000 | [diff] [blame] | 269 | ret void |
| 270 | } |
| 271 | |
Tom Stellard | 79243d9 | 2014-10-01 17:15:17 +0000 | [diff] [blame] | 272 | ; CHECK-LABEL: {{^}}copy1: |
Tom Stellard | 326d6ec | 2014-11-05 14:50:53 +0000 | [diff] [blame] | 273 | ; CHECK: buffer_load_dword |
| 274 | ; CHECK: v_add |
| 275 | ; CHECK: s_endpgm |
Tom Stellard | 13de545 | 2013-11-18 18:50:15 +0000 | [diff] [blame] | 276 | define void @copy1(float addrspace(1)* %out, float addrspace(1)* %in0) { |
| 277 | entry: |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 278 | %tmp = load float, float addrspace(1)* %in0 |
| 279 | %tmp1 = fcmp oeq float %tmp, 0.000000e+00 |
| 280 | br i1 %tmp1, label %if0, label %endif |
Tom Stellard | 13de545 | 2013-11-18 18:50:15 +0000 | [diff] [blame] | 281 | |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 282 | if0: ; preds = %entry |
| 283 | %tmp2 = bitcast float %tmp to i32 |
| 284 | %tmp3 = fcmp olt float %tmp, 0.000000e+00 |
| 285 | br i1 %tmp3, label %if1, label %endif |
Tom Stellard | 13de545 | 2013-11-18 18:50:15 +0000 | [diff] [blame] | 286 | |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 287 | if1: ; preds = %if0 |
| 288 | %tmp4 = add i32 %tmp2, 1 |
Tom Stellard | 13de545 | 2013-11-18 18:50:15 +0000 | [diff] [blame] | 289 | br label %endif |
| 290 | |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 291 | endif: ; preds = %if1, %if0, %entry |
| 292 | %tmp5 = phi i32 [ 0, %entry ], [ %tmp2, %if0 ], [ %tmp4, %if1 ] |
| 293 | %tmp6 = bitcast i32 %tmp5 to float |
| 294 | store float %tmp6, float addrspace(1)* %out |
Tom Stellard | 13de545 | 2013-11-18 18:50:15 +0000 | [diff] [blame] | 295 | ret void |
| 296 | } |
Tom Stellard | f340787 | 2013-11-18 18:50:20 +0000 | [diff] [blame] | 297 | |
| 298 | ; This test is just checking that we don't crash / assertion fail. |
Tom Stellard | 79243d9 | 2014-10-01 17:15:17 +0000 | [diff] [blame] | 299 | ; CHECK-LABEL: {{^}}copy2: |
Tom Stellard | 326d6ec | 2014-11-05 14:50:53 +0000 | [diff] [blame] | 300 | ; CHECK: s_endpgm |
Nicolai Haehnle | df3a20c | 2016-04-06 19:40:20 +0000 | [diff] [blame] | 301 | define amdgpu_ps void @copy2([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 { |
Tom Stellard | f340787 | 2013-11-18 18:50:20 +0000 | [diff] [blame] | 302 | entry: |
| 303 | br label %LOOP68 |
| 304 | |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 305 | LOOP68: ; preds = %ENDIF69, %entry |
Tom Stellard | f340787 | 2013-11-18 18:50:20 +0000 | [diff] [blame] | 306 | %temp4.7 = phi float [ 0.000000e+00, %entry ], [ %v, %ENDIF69 ] |
| 307 | %t = phi i32 [ 20, %entry ], [ %x, %ENDIF69 ] |
| 308 | %g = icmp eq i32 0, %t |
| 309 | %l = bitcast float %temp4.7 to i32 |
| 310 | br i1 %g, label %IF70, label %ENDIF69 |
| 311 | |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 312 | IF70: ; preds = %LOOP68 |
Tom Stellard | f340787 | 2013-11-18 18:50:20 +0000 | [diff] [blame] | 313 | %q = icmp ne i32 %l, 13 |
| 314 | %temp.8 = select i1 %q, float 1.000000e+00, float 0.000000e+00 |
| 315 | call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %temp.8, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00) |
| 316 | ret void |
| 317 | |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 318 | ENDIF69: ; preds = %LOOP68 |
Tom Stellard | f340787 | 2013-11-18 18:50:20 +0000 | [diff] [blame] | 319 | %u = add i32 %l, %t |
| 320 | %v = bitcast i32 %u to float |
| 321 | %x = add i32 %t, -1 |
| 322 | br label %LOOP68 |
| 323 | } |
| 324 | |
Tom Stellard | f0a2107 | 2014-11-18 20:39:39 +0000 | [diff] [blame] | 325 | ; This test checks that image_sample resource descriptors aren't loaded into |
| 326 | ; vgprs. The verifier will fail if this happens. |
Matt Arsenault | ad55ee5 | 2016-12-06 01:02:51 +0000 | [diff] [blame] | 327 | ; CHECK-LABEL:{{^}}sample_rsrc |
| 328 | |
| 329 | ; CHECK: s_cmp_eq_u32 |
| 330 | ; CHECK: s_cbranch_scc0 [[END:BB[0-9]+_[0-9]+]] |
| 331 | |
| 332 | ; CHECK: v_add_i32_e32 v[[ADD:[0-9]+]], vcc, 1, v{{[0-9]+}} |
| 333 | |
| 334 | ; [[END]]: |
| 335 | ; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[ADD]]{{\]}} |
Tom Stellard | f0a2107 | 2014-11-18 20:39:39 +0000 | [diff] [blame] | 336 | ; CHECK: s_endpgm |
Nicolai Haehnle | df3a20c | 2016-04-06 19:40:20 +0000 | [diff] [blame] | 337 | define amdgpu_ps void @sample_rsrc([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 { |
Tom Stellard | f0a2107 | 2014-11-18 20:39:39 +0000 | [diff] [blame] | 338 | bb: |
David Blaikie | 79e6c74 | 2015-02-27 19:29:02 +0000 | [diff] [blame] | 339 | %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg1, i32 0, i32 0 |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 340 | %tmp22 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !2 |
Tom Stellard | f0a2107 | 2014-11-18 20:39:39 +0000 | [diff] [blame] | 341 | %tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp22, i32 16) |
David Blaikie | 79e6c74 | 2015-02-27 19:29:02 +0000 | [diff] [blame] | 342 | %tmp25 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %arg3, i32 0, i32 0 |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 343 | %tmp26 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp25, !tbaa !2 |
David Blaikie | 79e6c74 | 2015-02-27 19:29:02 +0000 | [diff] [blame] | 344 | %tmp27 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %arg2, i32 0, i32 0 |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 345 | %tmp28 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp27, !tbaa !2 |
Tom Stellard | f0a2107 | 2014-11-18 20:39:39 +0000 | [diff] [blame] | 346 | %tmp29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %arg5, <2 x i32> %arg7) |
| 347 | %tmp30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %arg5, <2 x i32> %arg7) |
| 348 | %tmp31 = bitcast float %tmp23 to i32 |
| 349 | %tmp36 = icmp ne i32 %tmp31, 0 |
| 350 | br i1 %tmp36, label %bb38, label %bb80 |
| 351 | |
| 352 | bb38: ; preds = %bb |
| 353 | %tmp52 = bitcast float %tmp29 to i32 |
| 354 | %tmp53 = bitcast float %tmp30 to i32 |
| 355 | %tmp54 = insertelement <2 x i32> undef, i32 %tmp52, i32 0 |
| 356 | %tmp55 = insertelement <2 x i32> %tmp54, i32 %tmp53, i32 1 |
Matt Arsenault | 018179f | 2016-01-26 04:38:08 +0000 | [diff] [blame] | 357 | %tmp56 = bitcast <8 x i32> %tmp26 to <8 x i32> |
| 358 | %tmp58 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp55, <8 x i32> %tmp56, <4 x i32> %tmp28, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) |
Tom Stellard | f0a2107 | 2014-11-18 20:39:39 +0000 | [diff] [blame] | 359 | br label %bb71 |
| 360 | |
| 361 | bb80: ; preds = %bb |
| 362 | %tmp81 = bitcast float %tmp29 to i32 |
| 363 | %tmp82 = bitcast float %tmp30 to i32 |
| 364 | %tmp82.2 = add i32 %tmp82, 1 |
| 365 | %tmp83 = insertelement <2 x i32> undef, i32 %tmp81, i32 0 |
| 366 | %tmp84 = insertelement <2 x i32> %tmp83, i32 %tmp82.2, i32 1 |
Matt Arsenault | 018179f | 2016-01-26 04:38:08 +0000 | [diff] [blame] | 367 | %tmp85 = bitcast <8 x i32> %tmp26 to <8 x i32> |
| 368 | %tmp87 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp84, <8 x i32> %tmp85, <4 x i32> %tmp28, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) |
Tom Stellard | f0a2107 | 2014-11-18 20:39:39 +0000 | [diff] [blame] | 369 | br label %bb71 |
| 370 | |
| 371 | bb71: ; preds = %bb80, %bb38 |
| 372 | %tmp72 = phi <4 x float> [ %tmp58, %bb38 ], [ %tmp87, %bb80 ] |
| 373 | %tmp88 = extractelement <4 x float> %tmp72, i32 0 |
| 374 | call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp88, float %tmp88, float %tmp88, float %tmp88) |
| 375 | ret void |
| 376 | } |
| 377 | |
Tom Stellard | 1397d49 | 2016-02-11 21:45:07 +0000 | [diff] [blame] | 378 | ; Check the the resource descriptor is stored in an sgpr. |
| 379 | ; CHECK-LABEL: {{^}}mimg_srsrc_sgpr: |
Nikolay Haustov | 2f684f1 | 2016-02-26 09:51:05 +0000 | [diff] [blame] | 380 | ; CHECK: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1 |
Nicolai Haehnle | df3a20c | 2016-04-06 19:40:20 +0000 | [diff] [blame] | 381 | define amdgpu_ps void @mimg_srsrc_sgpr([34 x <8 x i32>] addrspace(2)* byval %arg) #0 { |
Tom Stellard | 1397d49 | 2016-02-11 21:45:07 +0000 | [diff] [blame] | 382 | %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 |
| 383 | %tmp7 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %arg, i32 0, i32 %tid |
| 384 | %tmp8 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp7, align 32, !tbaa !0 |
| 385 | %tmp9 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 1061158912, i32 1048576000>, <8 x i32> %tmp8, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) |
| 386 | %tmp10 = extractelement <4 x float> %tmp9, i32 0 |
| 387 | %tmp12 = call i32 @llvm.SI.packf16(float undef, float %tmp10) |
| 388 | %tmp13 = bitcast i32 %tmp12 to float |
| 389 | call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp13, float undef, float undef, float undef) |
| 390 | ret void |
| 391 | } |
| 392 | |
| 393 | ; Check the the sampler is stored in an sgpr. |
| 394 | ; CHECK-LABEL: {{^}}mimg_ssamp_sgpr: |
Nikolay Haustov | 2f684f1 | 2016-02-26 09:51:05 +0000 | [diff] [blame] | 395 | ; CHECK: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1 |
Nicolai Haehnle | df3a20c | 2016-04-06 19:40:20 +0000 | [diff] [blame] | 396 | define amdgpu_ps void @mimg_ssamp_sgpr([17 x <4 x i32>] addrspace(2)* byval %arg) #0 { |
Tom Stellard | 1397d49 | 2016-02-11 21:45:07 +0000 | [diff] [blame] | 397 | %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 |
| 398 | %tmp7 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg, i32 0, i32 %tid |
| 399 | %tmp8 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp7, align 16, !tbaa !0 |
| 400 | %tmp9 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 1061158912, i32 1048576000>, <8 x i32> undef, <4 x i32> %tmp8, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) |
| 401 | %tmp10 = extractelement <4 x float> %tmp9, i32 0 |
| 402 | %tmp12 = call i32 @llvm.SI.packf16(float %tmp10, float undef) |
| 403 | %tmp13 = bitcast i32 %tmp12 to float |
| 404 | call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp13, float undef, float undef, float undef) |
| 405 | ret void |
| 406 | } |
| 407 | |
| 408 | declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1 |
| 409 | |
Matt Arsenault | 45f8216 | 2016-07-11 23:35:48 +0000 | [diff] [blame] | 410 | attributes #0 = { nounwind } |
Tom Stellard | f0a2107 | 2014-11-18 20:39:39 +0000 | [diff] [blame] | 411 | attributes #1 = { nounwind readnone } |
Matt Arsenault | 45f8216 | 2016-07-11 23:35:48 +0000 | [diff] [blame] | 412 | attributes #2 = { nounwind readonly } |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 413 | |
| 414 | !0 = !{!1, !1, i64 0, i32 1} |
Sanjoy Das | 3336f68 | 2016-12-11 20:07:15 +0000 | [diff] [blame] | 415 | !1 = !{!"const", !3} |
Matt Arsenault | 325cca3 | 2016-01-23 05:42:43 +0000 | [diff] [blame] | 416 | !2 = !{!1, !1, i64 0} |
Sanjoy Das | 3336f68 | 2016-12-11 20:07:15 +0000 | [diff] [blame] | 417 | !3 = !{!"tbaa root"} |