Mandeep Singh Grang | ed64963 | 2017-07-17 17:32:45 +0000 | [diff] [blame] | 1 | ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s |
Nicolai Haehnle | b0c9748 | 2016-04-22 04:04:08 +0000 | [diff] [blame] | 2 | |
| 3 | ; CHECK-LABEL: {{^}}test1: |
Nicolai Haehnle | ce4ddd0 | 2017-09-29 15:37:31 +0000 | [diff] [blame] | 4 | ; CHECK: s_mov_b64 s[0:1], exec |
| 5 | ; CHECK: v_cndmask_b32_e64 v0, 0, 1, s[0:1] |
| 6 | ; |
| 7 | ; Note: The hardware doesn't implement EXEC as src2 for v_cndmask. |
Nicolai Haehnle | b0c9748 | 2016-04-22 04:04:08 +0000 | [diff] [blame] | 8 | ; |
| 9 | ; Note: We could generate better code here if we recognized earlier that |
| 10 | ; there is no WQM use and therefore llvm.amdgcn.ps.live is constant. However, |
| 11 | ; the expectation is that the intrinsic will be used in non-trivial shaders, |
| 12 | ; so such an optimization doesn't seem worth the effort. |
Matt Arsenault | 964a848 | 2017-03-21 16:24:12 +0000 | [diff] [blame] | 13 | define amdgpu_ps float @test1() #0 { |
Nicolai Haehnle | b0c9748 | 2016-04-22 04:04:08 +0000 | [diff] [blame] | 14 | %live = call i1 @llvm.amdgcn.ps.live() |
| 15 | %live.32 = zext i1 %live to i32 |
| 16 | %r = bitcast i32 %live.32 to float |
| 17 | ret float %r |
| 18 | } |
| 19 | |
| 20 | ; CHECK-LABEL: {{^}}test2: |
| 21 | ; CHECK: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec |
| 22 | ; CHECK-DAG: s_wqm_b64 exec, exec |
| 23 | ; CHECK-DAG: v_cndmask_b32_e64 [[VAR:v[0-9]+]], 0, 1, [[LIVE]] |
| 24 | ; CHECK: image_sample v0, [[VAR]], |
Matt Arsenault | 964a848 | 2017-03-21 16:24:12 +0000 | [diff] [blame] | 25 | define amdgpu_ps float @test2() #0 { |
Nicolai Haehnle | b0c9748 | 2016-04-22 04:04:08 +0000 | [diff] [blame] | 26 | %live = call i1 @llvm.amdgcn.ps.live() |
| 27 | %live.32 = zext i1 %live to i32 |
Matt Arsenault | 964a848 | 2017-03-21 16:24:12 +0000 | [diff] [blame] | 28 | %live.32.bc = bitcast i32 %live.32 to float |
Nicolai Haehnle | 1045928 | 2018-06-21 13:37:19 +0000 | [diff] [blame] | 29 | %t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %live.32.bc, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) |
Nicolai Haehnle | b0c9748 | 2016-04-22 04:04:08 +0000 | [diff] [blame] | 30 | %r = extractelement <4 x float> %t, i32 0 |
| 31 | ret float %r |
| 32 | } |
| 33 | |
| 34 | ; CHECK-LABEL: {{^}}test3: |
| 35 | ; CHECK: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec |
| 36 | ; CHECK-DAG: s_wqm_b64 exec, exec |
| 37 | ; CHECK-DAG: s_xor_b64 [[HELPER:s\[[0-9]+:[0-9]+\]]], [[LIVE]], -1 |
| 38 | ; CHECK_DAG: s_and_saveexec_b64 [[SAVED:s\[[0-9]+:[0-9]+\]]], [[HELPER]] |
| 39 | ; CHECK: ; %dead |
Matt Arsenault | 964a848 | 2017-03-21 16:24:12 +0000 | [diff] [blame] | 40 | define amdgpu_ps float @test3(i32 %in) #0 { |
Nicolai Haehnle | b0c9748 | 2016-04-22 04:04:08 +0000 | [diff] [blame] | 41 | entry: |
| 42 | %live = call i1 @llvm.amdgcn.ps.live() |
| 43 | br i1 %live, label %end, label %dead |
| 44 | |
| 45 | dead: |
| 46 | %tc.dead = mul i32 %in, 2 |
| 47 | br label %end |
| 48 | |
| 49 | end: |
| 50 | %tc = phi i32 [ %in, %entry ], [ %tc.dead, %dead ] |
Matt Arsenault | 964a848 | 2017-03-21 16:24:12 +0000 | [diff] [blame] | 51 | %tc.bc = bitcast i32 %tc to float |
Nicolai Haehnle | 1045928 | 2018-06-21 13:37:19 +0000 | [diff] [blame] | 52 | %t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tc.bc, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0 |
Nicolai Haehnle | b0c9748 | 2016-04-22 04:04:08 +0000 | [diff] [blame] | 53 | %r = extractelement <4 x float> %t, i32 0 |
| 54 | ret float %r |
| 55 | } |
| 56 | |
Matt Arsenault | 964a848 | 2017-03-21 16:24:12 +0000 | [diff] [blame] | 57 | declare i1 @llvm.amdgcn.ps.live() #1 |
Nicolai Haehnle | 1045928 | 2018-06-21 13:37:19 +0000 | [diff] [blame] | 58 | declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2 |
Nicolai Haehnle | b0c9748 | 2016-04-22 04:04:08 +0000 | [diff] [blame] | 59 | |
Matt Arsenault | 964a848 | 2017-03-21 16:24:12 +0000 | [diff] [blame] | 60 | attributes #0 = { nounwind } |
| 61 | attributes #1 = { nounwind readnone } |
| 62 | attributes #2 = { nounwind readonly } |