Konstantin Zhuravlyov | a25e052 | 2018-11-15 02:32:43 +0000 | [diff] [blame] | 1 | ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=-code-object-v3 -amdgpu-ir-lower-kernel-arguments=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI,GCN,HSA-VI,FUNC %s |
Matt Arsenault | 4bec7d4 | 2018-07-20 09:05:08 +0000 | [diff] [blame] | 2 | |
| 3 | ; Repeat of some problematic tests in kernel-args.ll, with the IR |
| 4 | ; argument lowering pass disabled. Struct padding needs to be |
| 5 | ; accounted for, as well as legalization of types changing offsets. |
| 6 | |
| 7 | ; FUNC-LABEL: {{^}}i1_arg: |
| 8 | ; HSA-VI: kernarg_segment_byte_size = 12 |
| 9 | ; HSA-VI: kernarg_segment_alignment = 4 |
| 10 | |
| 11 | ; GCN: s_load_dword s |
| 12 | ; GCN: s_and_b32 |
| 13 | define amdgpu_kernel void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind { |
| 14 | store i1 %x, i1 addrspace(1)* %out, align 1 |
| 15 | ret void |
| 16 | } |
| 17 | |
| 18 | ; FUNC-LABEL: {{^}}v3i8_arg: |
| 19 | ; HSA-VI: kernarg_segment_byte_size = 12 |
| 20 | ; HSA-VI: kernarg_segment_alignment = 4 |
| 21 | ; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x0 |
| 22 | ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x8 |
| 23 | define amdgpu_kernel void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind { |
| 24 | entry: |
| 25 | store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4 |
| 26 | ret void |
| 27 | } |
| 28 | |
| 29 | ; FUNC-LABEL: {{^}}i65_arg: |
| 30 | ; HSA-VI: kernarg_segment_byte_size = 24 |
| 31 | ; HSA-VI: kernarg_segment_alignment = 4 |
| 32 | ; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x0 |
| 33 | ; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x8 |
| 34 | define amdgpu_kernel void @i65_arg(i65 addrspace(1)* nocapture %out, i65 %in) nounwind { |
| 35 | entry: |
| 36 | store i65 %in, i65 addrspace(1)* %out, align 4 |
| 37 | ret void |
| 38 | } |
| 39 | |
| 40 | ; FUNC-LABEL: {{^}}empty_struct_arg: |
| 41 | ; HSA-VI: kernarg_segment_byte_size = 0 |
| 42 | define amdgpu_kernel void @empty_struct_arg({} %in) nounwind { |
| 43 | ret void |
| 44 | } |
| 45 | |
| 46 | ; The correct load offsets for these: |
| 47 | ; load 4 from 0, |
| 48 | ; load 8 from 8 |
| 49 | ; load 4 from 24 |
| 50 | ; load 8 from 32 |
| 51 | |
| 52 | ; With the SelectionDAG argument lowering, the alignments for the |
| 53 | ; struct members is not properly considered, making these wrong. |
| 54 | |
| 55 | ; FIXME: Total argument size is computed wrong |
| 56 | ; FUNC-LABEL: {{^}}struct_argument_alignment: |
| 57 | ; HSA-VI: kernarg_segment_byte_size = 40 |
| 58 | ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x0 |
| 59 | ; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x8 |
| 60 | ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x18 |
| 61 | ; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x20 |
| 62 | define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8, {i32, i64} %arg1) { |
| 63 | %val0 = extractvalue {i32, i64} %arg0, 0 |
| 64 | %val1 = extractvalue {i32, i64} %arg0, 1 |
| 65 | %val2 = extractvalue {i32, i64} %arg1, 0 |
| 66 | %val3 = extractvalue {i32, i64} %arg1, 1 |
| 67 | store volatile i32 %val0, i32 addrspace(1)* null |
| 68 | store volatile i64 %val1, i64 addrspace(1)* null |
| 69 | store volatile i32 %val2, i32 addrspace(1)* null |
| 70 | store volatile i64 %val3, i64 addrspace(1)* null |
| 71 | ret void |
| 72 | } |
| 73 | |
| 74 | ; No padding between i8 and next struct, but round up at end to 4 byte |
| 75 | ; multiple. |
| 76 | ; FUNC-LABEL: {{^}}packed_struct_argument_alignment: |
| 77 | ; HSA-VI: kernarg_segment_byte_size = 28 |
Matt Arsenault | 4bec7d4 | 2018-07-20 09:05:08 +0000 | [diff] [blame] | 78 | ; HSA-VI: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:17 |
Jay Foad | b777e55 | 2020-01-14 15:40:52 +0000 | [diff] [blame] | 79 | ; HSA-VI: global_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:13 |
Matt Arsenault | 4bec7d4 | 2018-07-20 09:05:08 +0000 | [diff] [blame] | 80 | ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x0 |
| 81 | ; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x4 |
| 82 | define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0, i8, <{i32, i64}> %arg1) { |
| 83 | %val0 = extractvalue <{i32, i64}> %arg0, 0 |
| 84 | %val1 = extractvalue <{i32, i64}> %arg0, 1 |
| 85 | %val2 = extractvalue <{i32, i64}> %arg1, 0 |
| 86 | %val3 = extractvalue <{i32, i64}> %arg1, 1 |
| 87 | store volatile i32 %val0, i32 addrspace(1)* null |
| 88 | store volatile i64 %val1, i64 addrspace(1)* null |
| 89 | store volatile i32 %val2, i32 addrspace(1)* null |
| 90 | store volatile i64 %val3, i64 addrspace(1)* null |
| 91 | ret void |
| 92 | } |
| 93 | |
| 94 | ; GCN-LABEL: {{^}}struct_argument_alignment_after: |
| 95 | ; HSA-VI: kernarg_segment_byte_size = 64 |
| 96 | ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x0 |
| 97 | ; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x8 |
| 98 | ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x18 |
| 99 | ; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x20 |
| 100 | ; HSA-VI: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x30 |
| 101 | define amdgpu_kernel void @struct_argument_alignment_after({i32, i64} %arg0, i8, {i32, i64} %arg2, i8, <4 x i32> %arg4) { |
| 102 | %val0 = extractvalue {i32, i64} %arg0, 0 |
| 103 | %val1 = extractvalue {i32, i64} %arg0, 1 |
| 104 | %val2 = extractvalue {i32, i64} %arg2, 0 |
| 105 | %val3 = extractvalue {i32, i64} %arg2, 1 |
| 106 | store volatile i32 %val0, i32 addrspace(1)* null |
| 107 | store volatile i64 %val1, i64 addrspace(1)* null |
| 108 | store volatile i32 %val2, i32 addrspace(1)* null |
| 109 | store volatile i64 %val3, i64 addrspace(1)* null |
| 110 | store volatile <4 x i32> %arg4, <4 x i32> addrspace(1)* null |
| 111 | ret void |
| 112 | } |
| 113 | |
| 114 | ; GCN-LABEL: {{^}}array_3xi32: |
| 115 | ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x0 |
| 116 | ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x4 |
| 117 | ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x8 |
| 118 | ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0xc |
| 119 | define amdgpu_kernel void @array_3xi32(i16 %arg0, [3 x i32] %arg1) { |
| 120 | store volatile i16 %arg0, i16 addrspace(1)* undef |
| 121 | store volatile [3 x i32] %arg1, [3 x i32] addrspace(1)* undef |
| 122 | ret void |
| 123 | } |
| 124 | |
| 125 | ; GCN-LABEL: {{^}}array_3xi16: |
| 126 | ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x0 |
| 127 | ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x4 |
| 128 | define amdgpu_kernel void @array_3xi16(i8 %arg0, [3 x i16] %arg1) { |
| 129 | store volatile i8 %arg0, i8 addrspace(1)* undef |
| 130 | store volatile [3 x i16] %arg1, [3 x i16] addrspace(1)* undef |
| 131 | ret void |
| 132 | } |