Neil Henning | 76504a4 | 2018-12-12 16:15:21 +0000 | [diff] [blame] | 1 | ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s |
| 2 | |
| 3 | ;CHECK-LABEL: {{^}}s_buffer_load_imm: |
| 4 | ;CHECK-NOT: s_waitcnt; |
| 5 | ;CHECK: s_buffer_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x4 |
| 6 | define amdgpu_ps void @s_buffer_load_imm(<4 x i32> inreg %desc) { |
| 7 | main_body: |
| 8 | %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 4, i32 0) |
| 9 | %bitcast = bitcast i32 %load to float |
| 10 | call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true) |
| 11 | ret void |
| 12 | } |
| 13 | |
| 14 | ;CHECK-LABEL: {{^}}s_buffer_load_index: |
| 15 | ;CHECK-NOT: s_waitcnt; |
| 16 | ;CHECK: s_buffer_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} |
| 17 | define amdgpu_ps void @s_buffer_load_index(<4 x i32> inreg %desc, i32 inreg %index) { |
| 18 | main_body: |
| 19 | %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %index, i32 0) |
| 20 | %bitcast = bitcast i32 %load to float |
| 21 | call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true) |
| 22 | ret void |
| 23 | } |
| 24 | |
| 25 | ;CHECK-LABEL: {{^}}s_buffer_loadx2_imm: |
| 26 | ;CHECK-NOT: s_waitcnt; |
| 27 | ;CHECK: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x40 |
| 28 | define amdgpu_ps void @s_buffer_loadx2_imm(<4 x i32> inreg %desc) { |
| 29 | main_body: |
| 30 | %load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 64, i32 0) |
| 31 | %bitcast = bitcast <2 x i32> %load to <2 x float> |
| 32 | %x = extractelement <2 x float> %bitcast, i32 0 |
| 33 | %y = extractelement <2 x float> %bitcast, i32 1 |
| 34 | call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true) |
| 35 | ret void |
| 36 | } |
| 37 | |
| 38 | ;CHECK-LABEL: {{^}}s_buffer_loadx2_index: |
| 39 | ;CHECK-NOT: s_waitcnt; |
| 40 | ;CHECK: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} |
| 41 | define amdgpu_ps void @s_buffer_loadx2_index(<4 x i32> inreg %desc, i32 inreg %index) { |
| 42 | main_body: |
| 43 | %load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 %index, i32 0) |
| 44 | %bitcast = bitcast <2 x i32> %load to <2 x float> |
| 45 | %x = extractelement <2 x float> %bitcast, i32 0 |
| 46 | %y = extractelement <2 x float> %bitcast, i32 1 |
| 47 | call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true) |
| 48 | ret void |
| 49 | } |
| 50 | |
| 51 | ;CHECK-LABEL: {{^}}s_buffer_loadx4_imm: |
| 52 | ;CHECK-NOT: s_waitcnt; |
| 53 | ;CHECK: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0xc8 |
| 54 | define amdgpu_ps void @s_buffer_loadx4_imm(<4 x i32> inreg %desc) { |
| 55 | main_body: |
| 56 | %load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 200, i32 0) |
| 57 | %bitcast = bitcast <4 x i32> %load to <4 x float> |
| 58 | %x = extractelement <4 x float> %bitcast, i32 0 |
| 59 | %y = extractelement <4 x float> %bitcast, i32 1 |
| 60 | %z = extractelement <4 x float> %bitcast, i32 2 |
| 61 | %w = extractelement <4 x float> %bitcast, i32 3 |
| 62 | call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true) |
| 63 | ret void |
| 64 | } |
| 65 | |
| 66 | ;CHECK-LABEL: {{^}}s_buffer_loadx4_index: |
| 67 | ;CHECK-NOT: s_waitcnt; |
| 68 | ;CHECK: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} |
| 69 | define amdgpu_ps void @s_buffer_loadx4_index(<4 x i32> inreg %desc, i32 inreg %index) { |
| 70 | main_body: |
| 71 | %load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 %index, i32 0) |
| 72 | %bitcast = bitcast <4 x i32> %load to <4 x float> |
| 73 | %x = extractelement <4 x float> %bitcast, i32 0 |
| 74 | %y = extractelement <4 x float> %bitcast, i32 1 |
| 75 | %z = extractelement <4 x float> %bitcast, i32 2 |
| 76 | %w = extractelement <4 x float> %bitcast, i32 3 |
| 77 | call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true) |
| 78 | ret void |
| 79 | } |
| 80 | |
| 81 | ;CHECK-LABEL: {{^}}s_buffer_load_imm_mergex2: |
| 82 | ;CHECK-NOT: s_waitcnt; |
| 83 | ;CHECK: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x4 |
| 84 | define amdgpu_ps void @s_buffer_load_imm_mergex2(<4 x i32> inreg %desc) { |
| 85 | main_body: |
| 86 | %load0 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 4, i32 0) |
| 87 | %load1 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 8, i32 0) |
| 88 | %x = bitcast i32 %load0 to float |
| 89 | %y = bitcast i32 %load1 to float |
| 90 | call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true) |
| 91 | ret void |
| 92 | } |
| 93 | |
| 94 | ;CHECK-LABEL: {{^}}s_buffer_load_imm_mergex4: |
| 95 | ;CHECK-NOT: s_waitcnt; |
| 96 | ;CHECK: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x8 |
| 97 | define amdgpu_ps void @s_buffer_load_imm_mergex4(<4 x i32> inreg %desc) { |
| 98 | main_body: |
| 99 | %load0 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 8, i32 0) |
| 100 | %load1 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 12, i32 0) |
| 101 | %load2 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 16, i32 0) |
| 102 | %load3 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 20, i32 0) |
| 103 | %x = bitcast i32 %load0 to float |
| 104 | %y = bitcast i32 %load1 to float |
| 105 | %z = bitcast i32 %load2 to float |
| 106 | %w = bitcast i32 %load3 to float |
| 107 | call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true) |
| 108 | ret void |
| 109 | } |
| 110 | |
| 111 | declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) |
| 112 | declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32) |
| 113 | declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32) |
| 114 | declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32) |