blob: 6fa26cb3879357bdd09e25e2c45d6b129fb3b0aa [file] [log] [blame]
Tom Stellard7998db62016-09-16 22:20:24 +00001; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s --check-prefixes=SI,GCN,MESA-GCN,FUNC
Matt Arsenault7aad8fd2017-01-24 22:02:15 +00002; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefixes=VI,GCN,MESA-VI,MESA-GCN,FUNC
Tom Stellard7998db62016-09-16 22:20:24 +00003; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs | FileCheck %s --check-prefixes=VI,GCN,HSA-VI,FUNC
Marek Olsakfa6607d2015-02-11 14:26:46 +00004; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
5; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
Tom Stellardaf775432013-10-23 00:44:32 +00006
Marek Olsakfa6607d2015-02-11 14:26:46 +00007; FUNC-LABEL: {{^}}i8_arg:
Tom Stellard175959e2016-12-06 21:53:10 +00008; HSA-VI: kernarg_segment_alignment = 4
Tom Stellard05691a62015-11-06 21:58:37 +00009; EG: AND_INT {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
10; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
Tom Stellard7998db62016-09-16 22:20:24 +000011; MESA-VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
12; MESA-GCN: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff
13; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8
14; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0
Konstantin Zhuravlyov0a1a7b62016-11-17 16:41:49 +000015; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]]
16; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]]
Tom Stellard7998db62016-09-16 22:20:24 +000017; FIXME: Should be using s_load_dword
Konstantin Zhuravlyov0a1a7b62016-11-17 16:41:49 +000018; HSA-VI: flat_load_ubyte v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]]
Tom Stellardaf775432013-10-23 00:44:32 +000019
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000020define amdgpu_kernel void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +000021entry:
22 %0 = zext i8 %in to i32
23 store i32 %0, i32 addrspace(1)* %out, align 4
24 ret void
25}
26
Marek Olsakfa6607d2015-02-11 14:26:46 +000027; FUNC-LABEL: {{^}}i8_zext_arg:
Tom Stellard175959e2016-12-06 21:53:10 +000028; HSA-VI: kernarg_segment_alignment = 4
Marek Olsak37cd4d02015-02-03 21:53:27 +000029; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
Marek Olsak37cd4d02015-02-03 21:53:27 +000030; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
Tom Stellard7998db62016-09-16 22:20:24 +000031; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
32; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8
33; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0
Konstantin Zhuravlyov0a1a7b62016-11-17 16:41:49 +000034; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]]
35; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]]
Tom Stellard7998db62016-09-16 22:20:24 +000036; FIXME: Should be using s_load_dword
37; HSA-VI: flat_load_ubyte v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]]
Tom Stellardaf775432013-10-23 00:44:32 +000038
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000039define amdgpu_kernel void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +000040entry:
41 %0 = zext i8 %in to i32
42 store i32 %0, i32 addrspace(1)* %out, align 4
43 ret void
44}
45
Marek Olsakfa6607d2015-02-11 14:26:46 +000046; FUNC-LABEL: {{^}}i8_sext_arg:
Tom Stellard175959e2016-12-06 21:53:10 +000047; HSA-VI: kernarg_segment_alignment = 4
Marek Olsak37cd4d02015-02-03 21:53:27 +000048; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
Marek Olsak37cd4d02015-02-03 21:53:27 +000049; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
Tom Stellard7998db62016-09-16 22:20:24 +000050; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
51; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8
52; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0
Konstantin Zhuravlyov0a1a7b62016-11-17 16:41:49 +000053; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]]
54; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]]
Tom Stellard7998db62016-09-16 22:20:24 +000055; FIXME: Should be using s_load_dword
56; HSA-VI: flat_load_sbyte v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]]
Tom Stellardaf775432013-10-23 00:44:32 +000057
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000058define amdgpu_kernel void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +000059entry:
60 %0 = sext i8 %in to i32
61 store i32 %0, i32 addrspace(1)* %out, align 4
62 ret void
63}
64
Marek Olsakfa6607d2015-02-11 14:26:46 +000065; FUNC-LABEL: {{^}}i16_arg:
Tom Stellard175959e2016-12-06 21:53:10 +000066; HSA-VI: kernarg_segment_alignment = 4
Tom Stellard05691a62015-11-06 21:58:37 +000067; EG: AND_INT {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
68; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
Tom Stellard7998db62016-09-16 22:20:24 +000069; MESA-VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
70; MESA-GCN: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff
71; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8
72; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0
Konstantin Zhuravlyov0a1a7b62016-11-17 16:41:49 +000073; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]]
74; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]]
Tom Stellard7998db62016-09-16 22:20:24 +000075; FIXME: Should be using s_load_dword
76; HSA-VI: flat_load_ushort v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]]
Tom Stellardaf775432013-10-23 00:44:32 +000077
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000078define amdgpu_kernel void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +000079entry:
80 %0 = zext i16 %in to i32
81 store i32 %0, i32 addrspace(1)* %out, align 4
82 ret void
83}
84
Marek Olsakfa6607d2015-02-11 14:26:46 +000085; FUNC-LABEL: {{^}}i16_zext_arg:
Tom Stellard175959e2016-12-06 21:53:10 +000086; HSA-VI: kernarg_segment_alignment = 4
Marek Olsak37cd4d02015-02-03 21:53:27 +000087; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
Marek Olsak37cd4d02015-02-03 21:53:27 +000088; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
Tom Stellard7998db62016-09-16 22:20:24 +000089; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
90; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8
91; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0
Konstantin Zhuravlyov0a1a7b62016-11-17 16:41:49 +000092; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]]
93; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]]
Tom Stellard7998db62016-09-16 22:20:24 +000094; FIXME: Should be using s_load_dword
95; HSA-VI: flat_load_ushort v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]]
Tom Stellardaf775432013-10-23 00:44:32 +000096
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000097define amdgpu_kernel void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +000098entry:
99 %0 = zext i16 %in to i32
100 store i32 %0, i32 addrspace(1)* %out, align 4
101 ret void
102}
103
Marek Olsakfa6607d2015-02-11 14:26:46 +0000104; FUNC-LABEL: {{^}}i16_sext_arg:
Tom Stellard175959e2016-12-06 21:53:10 +0000105; HSA-VI: kernarg_segment_alignment = 4
Marek Olsak37cd4d02015-02-03 21:53:27 +0000106; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
Marek Olsak37cd4d02015-02-03 21:53:27 +0000107; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
Tom Stellard7998db62016-09-16 22:20:24 +0000108; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
109; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8
110; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0
Konstantin Zhuravlyov0a1a7b62016-11-17 16:41:49 +0000111; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]]
112; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]]
Tom Stellard7998db62016-09-16 22:20:24 +0000113; FIXME: Should be using s_load_dword
114; HSA-VI: flat_load_sshort v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]]
Tom Stellardaf775432013-10-23 00:44:32 +0000115
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000116define amdgpu_kernel void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +0000117entry:
118 %0 = sext i16 %in to i32
119 store i32 %0, i32 addrspace(1)* %out, align 4
120 ret void
121}
122
Marek Olsakfa6607d2015-02-11 14:26:46 +0000123; FUNC-LABEL: {{^}}i32_arg:
Tom Stellard175959e2016-12-06 21:53:10 +0000124; HSA-VI: kernarg_segment_alignment = 4
Marek Olsak37cd4d02015-02-03 21:53:27 +0000125; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z
Marek Olsakfa6607d2015-02-11 14:26:46 +0000126; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
Tom Stellard7998db62016-09-16 22:20:24 +0000127; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
128; HSA-VI: s_load_dword s{{[0-9]}}, s[4:5], 0x8
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000129define amdgpu_kernel void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +0000130entry:
131 store i32 %in, i32 addrspace(1)* %out, align 4
132 ret void
133}
134
Marek Olsakfa6607d2015-02-11 14:26:46 +0000135; FUNC-LABEL: {{^}}f32_arg:
Tom Stellard175959e2016-12-06 21:53:10 +0000136; HSA-VI: kernarg_segment_alignment = 4
Marek Olsak37cd4d02015-02-03 21:53:27 +0000137; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z
Marek Olsakfa6607d2015-02-11 14:26:46 +0000138; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
Tom Stellard7998db62016-09-16 22:20:24 +0000139; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
140; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x8
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000141define amdgpu_kernel void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +0000142entry:
143 store float %in, float addrspace(1)* %out, align 4
144 ret void
145}
146
Marek Olsakfa6607d2015-02-11 14:26:46 +0000147; FUNC-LABEL: {{^}}v2i8_arg:
Tom Stellard175959e2016-12-06 21:53:10 +0000148; HSA-VI: kernarg_segment_alignment = 4
Marek Olsak37cd4d02015-02-03 21:53:27 +0000149; EG: VTX_READ_8
150; EG: VTX_READ_8
Tom Stellard7998db62016-09-16 22:20:24 +0000151; MESA-GCN: buffer_load_ubyte
152; MESA-GCN: buffer_load_ubyte
153; HSA-VI: flat_load_ubyte
154; HSA-VI: flat_load_ubyte
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000155define amdgpu_kernel void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
Tom Stellardaf775432013-10-23 00:44:32 +0000156entry:
157 store <2 x i8> %in, <2 x i8> addrspace(1)* %out
158 ret void
159}
160
Marek Olsakfa6607d2015-02-11 14:26:46 +0000161; FUNC-LABEL: {{^}}v2i16_arg:
Tom Stellard175959e2016-12-06 21:53:10 +0000162; HSA-VI: kernarg_segment_alignment = 4
Marek Olsak37cd4d02015-02-03 21:53:27 +0000163; EG: VTX_READ_16
164; EG: VTX_READ_16
Tom Stellard7998db62016-09-16 22:20:24 +0000165; MESA-GCN: buffer_load_ushort
166; MESA-GCN: buffer_load_ushort
167; HSA-VI: flat_load_ushort
168; HSA-VI: flat_load_ushort
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000169define amdgpu_kernel void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) {
Tom Stellardaf775432013-10-23 00:44:32 +0000170entry:
171 store <2 x i16> %in, <2 x i16> addrspace(1)* %out
172 ret void
173}
174
Marek Olsakfa6607d2015-02-11 14:26:46 +0000175; FUNC-LABEL: {{^}}v2i32_arg:
Tom Stellard175959e2016-12-06 21:53:10 +0000176; HSA-VI: kernarg_segment_alignment = 4
Marek Olsak37cd4d02015-02-03 21:53:27 +0000177; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
178; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
Marek Olsak37cd4d02015-02-03 21:53:27 +0000179; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
Tom Stellard7998db62016-09-16 22:20:24 +0000180; MESA-VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c
181; HSA-VI: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x8
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000182define amdgpu_kernel void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +0000183entry:
184 store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4
185 ret void
186}
187
Marek Olsakfa6607d2015-02-11 14:26:46 +0000188; FUNC-LABEL: {{^}}v2f32_arg:
Tom Stellard175959e2016-12-06 21:53:10 +0000189; HSA-VI: kernarg_segment_alignment = 4
Marek Olsak37cd4d02015-02-03 21:53:27 +0000190; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
191; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
Marek Olsak37cd4d02015-02-03 21:53:27 +0000192; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
Tom Stellard7998db62016-09-16 22:20:24 +0000193; MESA-VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c
194; HSA-VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[4:5], 0x8
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000195define amdgpu_kernel void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +0000196entry:
197 store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4
198 ret void
199}
200
Marek Olsakfa6607d2015-02-11 14:26:46 +0000201; FUNC-LABEL: {{^}}v3i8_arg:
Tom Stellard175959e2016-12-06 21:53:10 +0000202; HSA-VI: kernarg_segment_alignment = 4
Tom Stellard7998db62016-09-16 22:20:24 +0000203; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40
204; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41
205; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42
206; MESA-GCN: buffer_load_ubyte
207; MESA-GCN: buffer_load_ubyte
208; MESA-GCN: buffer_load_ubyte
209; HSA-VI: flat_load_ubyte
210; HSA-VI: flat_load_ubyte
211; HSA-VI: flat_load_ubyte
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000212define amdgpu_kernel void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +0000213entry:
214 store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4
215 ret void
216}
217
Marek Olsakfa6607d2015-02-11 14:26:46 +0000218; FUNC-LABEL: {{^}}v3i16_arg:
Tom Stellard175959e2016-12-06 21:53:10 +0000219; HSA-VI: kernarg_segment_alignment = 4
Tom Stellard7998db62016-09-16 22:20:24 +0000220; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44
221; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46
222; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48
223; MESA-GCN: buffer_load_ushort
224; MESA-GCN: buffer_load_ushort
225; MESA-GCN: buffer_load_ushort
226; HSA-VI: flat_load_ushort
227; HSA-VI: flat_load_ushort
228; HSA-VI: flat_load_ushort
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000229define amdgpu_kernel void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +0000230entry:
231 store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4
232 ret void
233}
Marek Olsakfa6607d2015-02-11 14:26:46 +0000234; FUNC-LABEL: {{^}}v3i32_arg:
Tom Stellard175959e2016-12-06 21:53:10 +0000235; HSA-VI: kernarg_segment_alignment = 4
Marek Olsak37cd4d02015-02-03 21:53:27 +0000236; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
237; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
238; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
Marek Olsak37cd4d02015-02-03 21:53:27 +0000239; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
Tom Stellard7998db62016-09-16 22:20:24 +0000240; MESA-VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34
241; HSA-VI: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x10
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000242define amdgpu_kernel void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +0000243entry:
244 store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4
245 ret void
246}
247
Marek Olsakfa6607d2015-02-11 14:26:46 +0000248; FUNC-LABEL: {{^}}v3f32_arg:
Tom Stellard175959e2016-12-06 21:53:10 +0000249; HSA-VI: kernarg_segment_alignment = 4
Marek Olsak37cd4d02015-02-03 21:53:27 +0000250; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
251; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
252; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
Marek Olsak37cd4d02015-02-03 21:53:27 +0000253; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
Tom Stellard7998db62016-09-16 22:20:24 +0000254; MESA-VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34
255; HSA-VI: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x10
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000256define amdgpu_kernel void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +0000257entry:
258 store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4
259 ret void
260}
261
Marek Olsakfa6607d2015-02-11 14:26:46 +0000262; FUNC-LABEL: {{^}}v4i8_arg:
Tom Stellard175959e2016-12-06 21:53:10 +0000263; HSA-VI: kernarg_segment_alignment = 4
Marek Olsak37cd4d02015-02-03 21:53:27 +0000264; EG: VTX_READ_8
265; EG: VTX_READ_8
266; EG: VTX_READ_8
267; EG: VTX_READ_8
Tom Stellard7998db62016-09-16 22:20:24 +0000268; MESA-GCN: buffer_load_ubyte
269; MESA-GCN: buffer_load_ubyte
270; MESA-GCN: buffer_load_ubyte
271; MESA-GCN: buffer_load_ubyte
272; HSA-VI: flat_load_ubyte
273; HSA-VI: flat_load_ubyte
274; HSA-VI: flat_load_ubyte
275; HSA-VI: flat_load_ubyte
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000276define amdgpu_kernel void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
Tom Stellardaf775432013-10-23 00:44:32 +0000277entry:
278 store <4 x i8> %in, <4 x i8> addrspace(1)* %out
279 ret void
280}
281
Marek Olsakfa6607d2015-02-11 14:26:46 +0000282; FUNC-LABEL: {{^}}v4i16_arg:
Tom Stellard175959e2016-12-06 21:53:10 +0000283; HSA-VI: kernarg_segment_alignment = 4
Marek Olsak37cd4d02015-02-03 21:53:27 +0000284; EG: VTX_READ_16
285; EG: VTX_READ_16
286; EG: VTX_READ_16
287; EG: VTX_READ_16
Tom Stellard7998db62016-09-16 22:20:24 +0000288; MESA-GCN: buffer_load_ushort
289; MESA-GCN: buffer_load_ushort
290; MESA-GCN: buffer_load_ushort
291; MESA-GCN: buffer_load_ushort
292; HSA-GCN: flat_load_ushort
293; HSA-GCN: flat_load_ushort
294; HSA-GCN: flat_load_ushort
295; HSA-GCN: flat_load_ushort
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000296define amdgpu_kernel void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) {
Tom Stellardaf775432013-10-23 00:44:32 +0000297entry:
298 store <4 x i16> %in, <4 x i16> addrspace(1)* %out
299 ret void
300}
301
Marek Olsakfa6607d2015-02-11 14:26:46 +0000302; FUNC-LABEL: {{^}}v4i32_arg:
Tom Stellard175959e2016-12-06 21:53:10 +0000303; HSA-VI: kernarg_segment_alignment = 4
Marek Olsak37cd4d02015-02-03 21:53:27 +0000304; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
305; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
306; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
307; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
Marek Olsak37cd4d02015-02-03 21:53:27 +0000308; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
Tom Stellard7998db62016-09-16 22:20:24 +0000309; MESA-VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34
310; HSA-VI: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x10
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000311define amdgpu_kernel void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +0000312entry:
313 store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
314 ret void
315}
316
Marek Olsakfa6607d2015-02-11 14:26:46 +0000317; FUNC-LABEL: {{^}}v4f32_arg:
Tom Stellard175959e2016-12-06 21:53:10 +0000318; HSA-VI: kernarg_segment_alignment = 4
Marek Olsak37cd4d02015-02-03 21:53:27 +0000319; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
320; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
321; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
322; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
Marek Olsak37cd4d02015-02-03 21:53:27 +0000323; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
Tom Stellard7998db62016-09-16 22:20:24 +0000324; MESA-VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34
325; HSA-VI: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x10
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000326define amdgpu_kernel void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +0000327entry:
328 store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4
329 ret void
330}
331
Marek Olsakfa6607d2015-02-11 14:26:46 +0000332; FUNC-LABEL: {{^}}v8i8_arg:
Tom Stellard175959e2016-12-06 21:53:10 +0000333; HSA-VI: kernarg_segment_alignment = 4
Marek Olsak37cd4d02015-02-03 21:53:27 +0000334; EG: VTX_READ_8
335; EG: VTX_READ_8
336; EG: VTX_READ_8
337; EG: VTX_READ_8
338; EG: VTX_READ_8
339; EG: VTX_READ_8
340; EG: VTX_READ_8
341; EG: VTX_READ_8
Tom Stellard7998db62016-09-16 22:20:24 +0000342; MESA-GCN: buffer_load_ubyte
343; MESA-GCN: buffer_load_ubyte
344; MESA-GCN: buffer_load_ubyte
345; MESA-GCN: buffer_load_ubyte
346; MESA-GCN: buffer_load_ubyte
347; MESA-GCN: buffer_load_ubyte
348; MESA-GCN: buffer_load_ubyte
349; HSA-GCN: float_load_ubyte
350; HSA-GCN: float_load_ubyte
351; HSA-GCN: float_load_ubyte
352; HSA-GCN: float_load_ubyte
353; HSA-GCN: float_load_ubyte
354; HSA-GCN: float_load_ubyte
355; HSA-GCN: float_load_ubyte
356; HSA-GCN: float_load_ubyte
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000357define amdgpu_kernel void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
Tom Stellardaf775432013-10-23 00:44:32 +0000358entry:
359 store <8 x i8> %in, <8 x i8> addrspace(1)* %out
360 ret void
361}
362
Marek Olsakfa6607d2015-02-11 14:26:46 +0000363; FUNC-LABEL: {{^}}v8i16_arg:
Tom Stellard175959e2016-12-06 21:53:10 +0000364; HSA-VI: kernarg_segment_alignment = 4
Marek Olsak37cd4d02015-02-03 21:53:27 +0000365; EG: VTX_READ_16
366; EG: VTX_READ_16
367; EG: VTX_READ_16
368; EG: VTX_READ_16
369; EG: VTX_READ_16
370; EG: VTX_READ_16
371; EG: VTX_READ_16
372; EG: VTX_READ_16
Tom Stellard7998db62016-09-16 22:20:24 +0000373; MESA-GCN: buffer_load_ushort
374; MESA-GCN: buffer_load_ushort
375; MESA-GCN: buffer_load_ushort
376; MESA-GCN: buffer_load_ushort
377; MESA-GCN: buffer_load_ushort
378; MESA-GCN: buffer_load_ushort
379; MESA-GCN: buffer_load_ushort
380; MESA-GCN: buffer_load_ushort
381; HSA-VI: flat_load_ushort
382; HSA-VI: flat_load_ushort
383; HSA-VI: flat_load_ushort
384; HSA-VI: flat_load_ushort
385; HSA-VI: flat_load_ushort
386; HSA-VI: flat_load_ushort
387; HSA-VI: flat_load_ushort
388; HSA-VI: flat_load_ushort
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000389define amdgpu_kernel void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) {
Tom Stellardaf775432013-10-23 00:44:32 +0000390entry:
391 store <8 x i16> %in, <8 x i16> addrspace(1)* %out
392 ret void
393}
394
Marek Olsakfa6607d2015-02-11 14:26:46 +0000395; FUNC-LABEL: {{^}}v8i32_arg:
Tom Stellard175959e2016-12-06 21:53:10 +0000396; HSA-VI: kernarg_segment_alignment = 5
Marek Olsak37cd4d02015-02-03 21:53:27 +0000397; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
398; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
399; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
400; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X
401; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y
402; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
403; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
404; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
Matt Arsenault0e3d3892015-11-30 21:15:53 +0000405; SI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x11
Tom Stellard7998db62016-09-16 22:20:24 +0000406; MESA-VI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x44
407; HSA-VI: s_load_dwordx8 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x20
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000408define amdgpu_kernel void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +0000409entry:
410 store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4
411 ret void
412}
413
Marek Olsakfa6607d2015-02-11 14:26:46 +0000414; FUNC-LABEL: {{^}}v8f32_arg:
Tom Stellard175959e2016-12-06 21:53:10 +0000415; HSA-VI: kernarg_segment_alignment = 5
Marek Olsak37cd4d02015-02-03 21:53:27 +0000416; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
417; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
418; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
419; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X
420; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y
421; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
422; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
423; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
Matt Arsenault0e3d3892015-11-30 21:15:53 +0000424; SI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x11
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000425define amdgpu_kernel void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +0000426entry:
427 store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4
428 ret void
429}
430
Marek Olsakfa6607d2015-02-11 14:26:46 +0000431; FUNC-LABEL: {{^}}v16i8_arg:
Tom Stellard175959e2016-12-06 21:53:10 +0000432; HSA-VI: kernarg_segment_alignment = 4
Marek Olsak37cd4d02015-02-03 21:53:27 +0000433; EG: VTX_READ_8
434; EG: VTX_READ_8
435; EG: VTX_READ_8
436; EG: VTX_READ_8
437; EG: VTX_READ_8
438; EG: VTX_READ_8
439; EG: VTX_READ_8
440; EG: VTX_READ_8
441; EG: VTX_READ_8
442; EG: VTX_READ_8
443; EG: VTX_READ_8
444; EG: VTX_READ_8
445; EG: VTX_READ_8
446; EG: VTX_READ_8
447; EG: VTX_READ_8
448; EG: VTX_READ_8
Tom Stellard7998db62016-09-16 22:20:24 +0000449; MESA-GCN: buffer_load_ubyte
450; MESA-GCN: buffer_load_ubyte
451; MESA-GCN: buffer_load_ubyte
452; MESA-GCN: buffer_load_ubyte
453; MESA-GCN: buffer_load_ubyte
454; MESA-GCN: buffer_load_ubyte
455; MESA-GCN: buffer_load_ubyte
456; MESA-GCN: buffer_load_ubyte
457; MESA-GCN: buffer_load_ubyte
458; MESA-GCN: buffer_load_ubyte
459; MESA-GCN: buffer_load_ubyte
460; MESA-GCN: buffer_load_ubyte
461; MESA-GCN: buffer_load_ubyte
462; MESA-GCN: buffer_load_ubyte
463; MESA-GCN: buffer_load_ubyte
464; MESA-GCN: buffer_load_ubyte
465; HSA-VI: flat_load_ubyte
466; HSA-VI: flat_load_ubyte
467; HSA-VI: flat_load_ubyte
468; HSA-VI: flat_load_ubyte
469; HSA-VI: flat_load_ubyte
470; HSA-VI: flat_load_ubyte
471; HSA-VI: flat_load_ubyte
472; HSA-VI: flat_load_ubyte
473; HSA-VI: flat_load_ubyte
474; HSA-VI: flat_load_ubyte
475; HSA-VI: flat_load_ubyte
476; HSA-VI: flat_load_ubyte
477; HSA-VI: flat_load_ubyte
478; HSA-VI: flat_load_ubyte
479; HSA-VI: flat_load_ubyte
480; HSA-VI: flat_load_ubyte
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000481define amdgpu_kernel void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) {
Tom Stellardaf775432013-10-23 00:44:32 +0000482entry:
483 store <16 x i8> %in, <16 x i8> addrspace(1)* %out
484 ret void
485}
486
Marek Olsakfa6607d2015-02-11 14:26:46 +0000487; FUNC-LABEL: {{^}}v16i16_arg:
Tom Stellard175959e2016-12-06 21:53:10 +0000488; HSA-VI: kernarg_segment_alignment = 5
Marek Olsak37cd4d02015-02-03 21:53:27 +0000489; EG: VTX_READ_16
490; EG: VTX_READ_16
491; EG: VTX_READ_16
492; EG: VTX_READ_16
493; EG: VTX_READ_16
494; EG: VTX_READ_16
495; EG: VTX_READ_16
496; EG: VTX_READ_16
497; EG: VTX_READ_16
498; EG: VTX_READ_16
499; EG: VTX_READ_16
500; EG: VTX_READ_16
501; EG: VTX_READ_16
502; EG: VTX_READ_16
503; EG: VTX_READ_16
504; EG: VTX_READ_16
Tom Stellard7998db62016-09-16 22:20:24 +0000505; MESA-GCN: buffer_load_ushort
506; MESA-GCN: buffer_load_ushort
507; MESA-GCN: buffer_load_ushort
508; MESA-GCN: buffer_load_ushort
509; MESA-GCN: buffer_load_ushort
510; MESA-GCN: buffer_load_ushort
511; MESA-GCN: buffer_load_ushort
512; MESA-GCN: buffer_load_ushort
513; MESA-GCN: buffer_load_ushort
514; MESA-GCN: buffer_load_ushort
515; MESA-GCN: buffer_load_ushort
516; MESA-GCN: buffer_load_ushort
517; MESA-GCN: buffer_load_ushort
518; MESA-GCN: buffer_load_ushort
519; MESA-GCN: buffer_load_ushort
520; MESA-GCN: buffer_load_ushort
521; HSA-VI: flat_load_ushort
522; HSA-VI: flat_load_ushort
523; HSA-VI: flat_load_ushort
524; HSA-VI: flat_load_ushort
525; HSA-VI: flat_load_ushort
526; HSA-VI: flat_load_ushort
527; HSA-VI: flat_load_ushort
528; HSA-VI: flat_load_ushort
529; HSA-VI: flat_load_ushort
530; HSA-VI: flat_load_ushort
531; HSA-VI: flat_load_ushort
532; HSA-VI: flat_load_ushort
533; HSA-VI: flat_load_ushort
534; HSA-VI: flat_load_ushort
535; HSA-VI: flat_load_ushort
536; HSA-VI: flat_load_ushort
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000537define amdgpu_kernel void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) {
Tom Stellardaf775432013-10-23 00:44:32 +0000538entry:
539 store <16 x i16> %in, <16 x i16> addrspace(1)* %out
540 ret void
541}
542
Marek Olsakfa6607d2015-02-11 14:26:46 +0000543; FUNC-LABEL: {{^}}v16i32_arg:
Tom Stellard175959e2016-12-06 21:53:10 +0000544; HSA-VI: kernarg_segment_alignment = 6
Marek Olsak37cd4d02015-02-03 21:53:27 +0000545; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
546; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
547; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
548; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X
549; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y
550; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z
551; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W
552; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X
553; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y
554; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z
555; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W
556; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X
557; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y
558; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
559; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
560; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
Matt Arsenault0e3d3892015-11-30 21:15:53 +0000561; SI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x19
Tom Stellard7998db62016-09-16 22:20:24 +0000562; MESA-VI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x64
563; HSA-VI: s_load_dwordx16 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x40
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000564define amdgpu_kernel void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +0000565entry:
566 store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4
567 ret void
568}
569
Marek Olsakfa6607d2015-02-11 14:26:46 +0000570; FUNC-LABEL: {{^}}v16f32_arg:
Tom Stellard175959e2016-12-06 21:53:10 +0000571; HSA-VI: kernarg_segment_alignment = 6
Marek Olsak37cd4d02015-02-03 21:53:27 +0000572; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
573; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
574; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
575; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X
576; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y
577; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z
578; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W
579; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X
580; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y
581; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z
582; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W
583; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X
584; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y
585; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
586; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
587; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
Matt Arsenault0e3d3892015-11-30 21:15:53 +0000588; SI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x19
Tom Stellard7998db62016-09-16 22:20:24 +0000589; MESA-VI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x64
590; HSA-VI: s_load_dwordx16 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x40
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000591define amdgpu_kernel void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +0000592entry:
593 store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
594 ret void
595}
Matt Arsenault74ef2772014-08-13 18:14:11 +0000596
Tom Stellard79243d92014-10-01 17:15:17 +0000597; FUNC-LABEL: {{^}}kernel_arg_i64:
Tom Stellard7998db62016-09-16 22:20:24 +0000598; MESA-GCN: s_load_dwordx2
599; MESA-GCN: s_load_dwordx2
600; MESA-GCN: buffer_store_dwordx2
601; HSA-VI: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x8
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000602define amdgpu_kernel void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
Matt Arsenault74ef2772014-08-13 18:14:11 +0000603 store i64 %a, i64 addrspace(1)* %out, align 8
604 ret void
605}
606
Matt Arsenault957bfc72015-04-26 00:53:33 +0000607; FUNC-LABEL: {{^}}f64_kernel_arg:
608; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9
609; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb
Tom Stellard7998db62016-09-16 22:20:24 +0000610; MESA-VI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x24
611; MESA-VI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x2c
612; MESA-GCN: buffer_store_dwordx2
613; HSA-VI: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x8
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000614define amdgpu_kernel void @f64_kernel_arg(double addrspace(1)* %out, double %in) {
Matt Arsenault957bfc72015-04-26 00:53:33 +0000615entry:
616 store double %in, double addrspace(1)* %out
617 ret void
618}
619
Tom Stellard79243d92014-10-01 17:15:17 +0000620; XFUNC-LABEL: {{^}}kernel_arg_v1i64:
Marek Olsakfa6607d2015-02-11 14:26:46 +0000621; XGCN: s_load_dwordx2
622; XGCN: s_load_dwordx2
623; XGCN: buffer_store_dwordx2
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000624; define amdgpu_kernel void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind {
Matt Arsenault74ef2772014-08-13 18:14:11 +0000625; store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8
626; ret void
627; }
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000628
629; FUNC-LABEL: {{^}}i1_arg:
630; SI: buffer_load_ubyte
631; SI: v_and_b32_e32
632; SI: buffer_store_byte
633; SI: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000634define amdgpu_kernel void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000635 store i1 %x, i1 addrspace(1)* %out, align 1
636 ret void
637}
638
639; FUNC-LABEL: {{^}}i1_arg_zext_i32:
640; SI: buffer_load_ubyte
641; SI: buffer_store_dword
642; SI: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000643define amdgpu_kernel void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000644 %ext = zext i1 %x to i32
645 store i32 %ext, i32 addrspace(1)* %out, align 4
646 ret void
647}
648
649; FUNC-LABEL: {{^}}i1_arg_zext_i64:
650; SI: buffer_load_ubyte
651; SI: buffer_store_dwordx2
652; SI: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000653define amdgpu_kernel void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000654 %ext = zext i1 %x to i64
655 store i64 %ext, i64 addrspace(1)* %out, align 8
656 ret void
657}
658
659; FUNC-LABEL: {{^}}i1_arg_sext_i32:
660; SI: buffer_load_ubyte
661; SI: buffer_store_dword
662; SI: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000663define amdgpu_kernel void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000664 %ext = sext i1 %x to i32
665 store i32 %ext, i32addrspace(1)* %out, align 4
666 ret void
667}
668
669; FUNC-LABEL: {{^}}i1_arg_sext_i64:
670; SI: buffer_load_ubyte
671; SI: v_bfe_i32
672; SI: v_ashrrev_i32
673; SI: buffer_store_dwordx2
674; SI: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000675define amdgpu_kernel void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000676 %ext = sext i1 %x to i64
677 store i64 %ext, i64 addrspace(1)* %out, align 8
678 ret void
679}