blob: 6b6473124bb9f468b7f6beec31d3d22a609f71a6 [file] [log] [blame]
Joel E. Denny9fa9c932018-07-11 20:25:49 +00001; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap -enable-var-scope --check-prefixes=SI,GCN,MESA-GCN,FUNC %s
2; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=VI,GCN,MESA-VI,MESA-GCN,FUNC %s
Matt Arsenaultfab7d272018-12-07 20:57:43 +00003; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=-code-object-v3 -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=VI,GCN,HSA-GFX9,FUNC %s
Matt Arsenault72b0e382018-07-28 12:34:25 +00004; RUN: llc < %s -march=r600 -mcpu=redwood -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=EG,EGCM,FUNC %s
5; RUN: llc < %s -march=r600 -mcpu=cayman -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap -enable-var-scope --check-prefixes=CM,EGCM,FUNC %s
Tom Stellardaf775432013-10-23 00:44:32 +00006
Marek Olsakfa6607d2015-02-11 14:26:46 +00007; FUNC-LABEL: {{^}}i8_arg:
Matt Arsenaultfab7d272018-12-07 20:57:43 +00008; HSA-GFX9: kernarg_segment_byte_size = 12
9; HSA-GFX9: kernarg_segment_alignment = 4
Matt Arsenault72b0e382018-07-28 12:34:25 +000010
Tom Stellard05691a62015-11-06 21:58:37 +000011; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
Tom Stellard7998db62016-09-16 22:20:24 +000012; MESA-VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
13; MESA-GCN: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff
Matt Arsenault90083d32018-06-07 09:54:49 +000014
Matt Arsenaultfab7d272018-12-07 20:57:43 +000015; HSA-GFX9: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x8
16; HSA-GFX9: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff
Matt Arsenault72b0e382018-07-28 12:34:25 +000017
18
Jan Vesely93b25272018-08-01 18:36:07 +000019; EGCM: VTX_READ_8{{.*}} #3
20; EGCM: KC0[2].Y
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000021define amdgpu_kernel void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
Matt Arsenault29f30372018-07-05 17:01:20 +000022 %ext = zext i8 %in to i32
23 store i32 %ext, i32 addrspace(1)* %out, align 4
Tom Stellardaf775432013-10-23 00:44:32 +000024 ret void
25}
26
Marek Olsakfa6607d2015-02-11 14:26:46 +000027; FUNC-LABEL: {{^}}i8_zext_arg:
Matt Arsenaultfab7d272018-12-07 20:57:43 +000028; HSA-GFX9: kernarg_segment_byte_size = 12
29; HSA-GFX9: kernarg_segment_alignment = 4
Marek Olsak37cd4d02015-02-03 21:53:27 +000030; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
Tom Stellard7998db62016-09-16 22:20:24 +000031; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
Tom Stellardaf775432013-10-23 00:44:32 +000032
Matt Arsenaultfab7d272018-12-07 20:57:43 +000033; HSA-GFX9: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x8
34; HSA-GFX9: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff
Matt Arsenault72b0e382018-07-28 12:34:25 +000035
36
37; EG: BFE_INT T0.X, T0.X, 0.0, literal.x,
38; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
39; EG-NEXT: 8(1.121039e-44), 2(2.802597e-45)
40
41; CM: BFE_INT * T0.X, T0.X, 0.0, literal.x,
42; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
43; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
44; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000045define amdgpu_kernel void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
Matt Arsenault29f30372018-07-05 17:01:20 +000046 %ext = zext i8 %in to i32
47 store i32 %ext, i32 addrspace(1)* %out, align 4
Tom Stellardaf775432013-10-23 00:44:32 +000048 ret void
49}
50
Marek Olsakfa6607d2015-02-11 14:26:46 +000051; FUNC-LABEL: {{^}}i8_sext_arg:
Matt Arsenaultfab7d272018-12-07 20:57:43 +000052; HSA-GFX9: kernarg_segment_byte_size = 12
53; HSA-GFX9: kernarg_segment_alignment = 4
Marek Olsak37cd4d02015-02-03 21:53:27 +000054; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
Tom Stellardaf775432013-10-23 00:44:32 +000055
Matt Arsenault90083d32018-06-07 09:54:49 +000056; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
57
Matt Arsenaultfab7d272018-12-07 20:57:43 +000058; HSA-GFX9: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x8
59; HSA-GFX9: s_sext_i32_i8 s{{[0-9]+}}, [[VAL]]
60; HSA-GFX9: global_store_dword
Matt Arsenault72b0e382018-07-28 12:34:25 +000061
62
63; EG: BFE_INT T0.X, T0.X, 0.0, literal.x,
64; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
65; EG-NEXT: 8(1.121039e-44), 2(2.802597e-45)
66
67; CM: BFE_INT * T0.X, T0.X, 0.0, literal.x,
68; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
69; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
70; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000071define amdgpu_kernel void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
Matt Arsenault29f30372018-07-05 17:01:20 +000072 %ext = sext i8 %in to i32
73 store i32 %ext, i32 addrspace(1)* %out, align 4
Tom Stellardaf775432013-10-23 00:44:32 +000074 ret void
75}
76
Marek Olsakfa6607d2015-02-11 14:26:46 +000077; FUNC-LABEL: {{^}}i16_arg:
Matt Arsenaultfab7d272018-12-07 20:57:43 +000078; HSA-GFX9: kernarg_segment_byte_size = 12
79; HSA-GFX9: kernarg_segment_alignment = 4
Matt Arsenault1ea04022018-05-29 19:35:00 +000080
Tom Stellard05691a62015-11-06 21:58:37 +000081; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
Matt Arsenault90083d32018-06-07 09:54:49 +000082
Tom Stellard7998db62016-09-16 22:20:24 +000083; MESA-VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
84; MESA-GCN: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff
Tom Stellardaf775432013-10-23 00:44:32 +000085
Matt Arsenaultfab7d272018-12-07 20:57:43 +000086; HSA-GFX9: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x8
87; HSA-GFX9: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xffff{{$}}
88; HSA-GFX9: global_store_dword
Matt Arsenault72b0e382018-07-28 12:34:25 +000089
Jan Vesely93b25272018-08-01 18:36:07 +000090; EGCM: VTX_READ_16
91; EGCM: KC0[2].Y
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000092define amdgpu_kernel void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
Matt Arsenault29f30372018-07-05 17:01:20 +000093 %ext = zext i16 %in to i32
94 store i32 %ext, i32 addrspace(1)* %out, align 4
Tom Stellardaf775432013-10-23 00:44:32 +000095 ret void
96}
97
Marek Olsakfa6607d2015-02-11 14:26:46 +000098; FUNC-LABEL: {{^}}i16_zext_arg:
Matt Arsenaultfab7d272018-12-07 20:57:43 +000099; HSA-GFX9: kernarg_segment_byte_size = 12
100; HSA-GFX9: kernarg_segment_alignment = 4
Matt Arsenault1ea04022018-05-29 19:35:00 +0000101
Marek Olsak37cd4d02015-02-03 21:53:27 +0000102; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
Tom Stellard7998db62016-09-16 22:20:24 +0000103; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
Tom Stellardaf775432013-10-23 00:44:32 +0000104
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000105; HSA-GFX9: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x8
106; HSA-GFX9: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xffff{{$}}
107; HSA-GFX9: global_store_dword
Matt Arsenault72b0e382018-07-28 12:34:25 +0000108
109; EG: BFE_INT T0.X, T0.X, 0.0, literal.x,
110; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
111; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45)
112
113; CM: BFE_INT * T0.X, T0.X, 0.0, literal.x,
114; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
115; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
116; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000117define amdgpu_kernel void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
Matt Arsenault29f30372018-07-05 17:01:20 +0000118 %ext = zext i16 %in to i32
119 store i32 %ext, i32 addrspace(1)* %out, align 4
Tom Stellardaf775432013-10-23 00:44:32 +0000120 ret void
121}
122
Marek Olsakfa6607d2015-02-11 14:26:46 +0000123; FUNC-LABEL: {{^}}i16_sext_arg:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000124; HSA-GFX9: kernarg_segment_byte_size = 12
125; HSA-GFX9: kernarg_segment_alignment = 4
Matt Arsenault1ea04022018-05-29 19:35:00 +0000126
Marek Olsak37cd4d02015-02-03 21:53:27 +0000127; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
Tom Stellard7998db62016-09-16 22:20:24 +0000128; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
Tom Stellardaf775432013-10-23 00:44:32 +0000129
Matt Arsenault90083d32018-06-07 09:54:49 +0000130
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000131; HSA-GFX9: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x8
132; HSA-GFX9: s_sext_i32_i16 s{{[0-9]+}}, [[VAL]]
133; HSA-GFX9: global_store_dword
Matt Arsenault72b0e382018-07-28 12:34:25 +0000134
135; EG: BFE_INT T0.X, T0.X, 0.0, literal.x,
136; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
137; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45)
138
139; CM: BFE_INT * T0.X, T0.X, 0.0, literal.x,
140; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
141; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
142; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000143define amdgpu_kernel void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
Matt Arsenault29f30372018-07-05 17:01:20 +0000144 %ext = sext i16 %in to i32
145 store i32 %ext, i32 addrspace(1)* %out, align 4
Tom Stellardaf775432013-10-23 00:44:32 +0000146 ret void
147}
148
Marek Olsakfa6607d2015-02-11 14:26:46 +0000149; FUNC-LABEL: {{^}}i32_arg:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000150; HSA-GFX9: kernarg_segment_byte_size = 12
151; HSA-GFX9: kernarg_segment_alignment = 4
Matt Arsenault1ea04022018-05-29 19:35:00 +0000152
Matt Arsenault72b0e382018-07-28 12:34:25 +0000153; EGCM: T{{[0-9]\.[XYZW]}}, KC0[2].Z
Marek Olsakfa6607d2015-02-11 14:26:46 +0000154; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
Tom Stellard7998db62016-09-16 22:20:24 +0000155; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000156; HSA-GFX9: s_load_dword s{{[0-9]}}, s[4:5], 0x8
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000157define amdgpu_kernel void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +0000158entry:
159 store i32 %in, i32 addrspace(1)* %out, align 4
160 ret void
161}
162
Marek Olsakfa6607d2015-02-11 14:26:46 +0000163; FUNC-LABEL: {{^}}f32_arg:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000164; HSA-GFX9: kernarg_segment_byte_size = 12
165; HSA-GFX9: kernarg_segment_alignment = 4
Matt Arsenault72b0e382018-07-28 12:34:25 +0000166; EGCM: T{{[0-9]\.[XYZW]}}, KC0[2].Z
Marek Olsakfa6607d2015-02-11 14:26:46 +0000167; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
Tom Stellard7998db62016-09-16 22:20:24 +0000168; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000169; HSA-GFX9: s_load_dword s{{[0-9]+}}, s[4:5], 0x8
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000170define amdgpu_kernel void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +0000171entry:
172 store float %in, float addrspace(1)* %out, align 4
173 ret void
174}
175
Marek Olsakfa6607d2015-02-11 14:26:46 +0000176; FUNC-LABEL: {{^}}v2i8_arg:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000177; HSA-GFX9: kernarg_segment_byte_size = 12
178; HSA-GFX9: kernarg_segment_alignment = 4
Matt Arsenault1ea04022018-05-29 19:35:00 +0000179
Matt Arsenault72b0e382018-07-28 12:34:25 +0000180; EGCM: VTX_READ_8
181; EGCM: VTX_READ_8
Matt Arsenault7b4826e2018-05-30 16:17:51 +0000182
Matt Arsenault90083d32018-06-07 09:54:49 +0000183; GCN: s_load_dword s
184; GCN-NOT: {{buffer|flat|global}}_load_
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000185define amdgpu_kernel void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
Tom Stellardaf775432013-10-23 00:44:32 +0000186entry:
187 store <2 x i8> %in, <2 x i8> addrspace(1)* %out
188 ret void
189}
190
Marek Olsakfa6607d2015-02-11 14:26:46 +0000191; FUNC-LABEL: {{^}}v2i16_arg:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000192; HSA-GFX9: kernarg_segment_byte_size = 12
193; HSA-GFX9: kernarg_segment_alignment = 4
Matt Arsenault1ea04022018-05-29 19:35:00 +0000194
Matt Arsenault72b0e382018-07-28 12:34:25 +0000195; EGCM: VTX_READ_16
196; EGCM: VTX_READ_16
Matt Arsenault1349a042018-05-22 06:32:10 +0000197
Matt Arsenault7b4826e2018-05-30 16:17:51 +0000198; SI: s_load_dword s{{[0-9]+}}, s[0:1], 0xb
199; MESA-VI: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000200; HSA-GFX9: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x8
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000201define amdgpu_kernel void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) {
Tom Stellardaf775432013-10-23 00:44:32 +0000202entry:
203 store <2 x i16> %in, <2 x i16> addrspace(1)* %out
204 ret void
205}
206
Marek Olsakfa6607d2015-02-11 14:26:46 +0000207; FUNC-LABEL: {{^}}v2i32_arg:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000208; HSA-GFX9: kernarg_segment_byte_size = 16
209; HSA-GFX9: kernarg_segment_alignment = 4
Matt Arsenault1ea04022018-05-29 19:35:00 +0000210
Matt Arsenault72b0e382018-07-28 12:34:25 +0000211; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
212; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
Marek Olsak37cd4d02015-02-03 21:53:27 +0000213; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
Tom Stellard7998db62016-09-16 22:20:24 +0000214; MESA-VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000215; HSA-GFX9: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x8
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000216define amdgpu_kernel void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +0000217entry:
218 store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4
219 ret void
220}
221
Marek Olsakfa6607d2015-02-11 14:26:46 +0000222; FUNC-LABEL: {{^}}v2f32_arg:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000223; HSA-GFX9: kernarg_segment_byte_size = 16
224; HSA-GFX9: kernarg_segment_alignment = 4
Matt Arsenault1ea04022018-05-29 19:35:00 +0000225
Matt Arsenault72b0e382018-07-28 12:34:25 +0000226; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
227; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
Marek Olsak37cd4d02015-02-03 21:53:27 +0000228; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
Tom Stellard7998db62016-09-16 22:20:24 +0000229; MESA-VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000230; HSA-GFX9: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[4:5], 0x8
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000231define amdgpu_kernel void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +0000232entry:
233 store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4
234 ret void
235}
236
Marek Olsakfa6607d2015-02-11 14:26:46 +0000237; FUNC-LABEL: {{^}}v3i8_arg:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000238; HSA-GFX9: kernarg_segment_byte_size = 12
239; HSA-GFX9: kernarg_segment_alignment = 4
Matt Arsenault1ea04022018-05-29 19:35:00 +0000240
Matt Arsenault72b0e382018-07-28 12:34:25 +0000241; EGCM-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40
242; EGCM-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41
243; EGCM-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42
Matt Arsenault7b4826e2018-05-30 16:17:51 +0000244
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000245; SI: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
246
247; VI-MESA: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
248; VI-HSA: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x8
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000249define amdgpu_kernel void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +0000250entry:
251 store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4
252 ret void
253}
254
Marek Olsakfa6607d2015-02-11 14:26:46 +0000255; FUNC-LABEL: {{^}}v3i16_arg:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000256; HSA-GFX9: kernarg_segment_byte_size = 16
257; HSA-GFX9: kernarg_segment_alignment = 4
Matt Arsenault1ea04022018-05-29 19:35:00 +0000258
Matt Arsenault72b0e382018-07-28 12:34:25 +0000259; EGCM-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44
260; EGCM-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46
261; EGCM-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48
Matt Arsenault7b4826e2018-05-30 16:17:51 +0000262
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000263; SI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
Matt Arsenault02dc7e12018-06-15 15:15:46 +0000264
265; VI-HSA: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x8
266; VI-MESA: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000267define amdgpu_kernel void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +0000268entry:
269 store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4
270 ret void
271}
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000272
Marek Olsakfa6607d2015-02-11 14:26:46 +0000273; FUNC-LABEL: {{^}}v3i32_arg:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000274; HSA-GFX9: kernarg_segment_byte_size = 32
275; HSA-GFX9: kernarg_segment_alignment = 4
Matt Arsenault72b0e382018-07-28 12:34:25 +0000276; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
277; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
278; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
Marek Olsak37cd4d02015-02-03 21:53:27 +0000279; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
Tom Stellard7998db62016-09-16 22:20:24 +0000280; MESA-VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000281; HSA-GFX9: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x10
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000282define amdgpu_kernel void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +0000283entry:
284 store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4
285 ret void
286}
287
Marek Olsakfa6607d2015-02-11 14:26:46 +0000288; FUNC-LABEL: {{^}}v3f32_arg:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000289; HSA-GFX9: kernarg_segment_byte_size = 32
290; HSA-GFX9: kernarg_segment_alignment = 4
Matt Arsenault72b0e382018-07-28 12:34:25 +0000291; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
292; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
293; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
Marek Olsak37cd4d02015-02-03 21:53:27 +0000294; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
Tom Stellard7998db62016-09-16 22:20:24 +0000295; MESA-VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000296; HSA-GFX9: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x10
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000297define amdgpu_kernel void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +0000298entry:
299 store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4
300 ret void
301}
302
Marek Olsakfa6607d2015-02-11 14:26:46 +0000303; FUNC-LABEL: {{^}}v4i8_arg:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000304; HSA-GFX9: kernarg_segment_byte_size = 12
305; HSA-GFX9: kernarg_segment_alignment = 4
Matt Arsenault72b0e382018-07-28 12:34:25 +0000306; EGCM: VTX_READ_8
307; EGCM: VTX_READ_8
308; EGCM: VTX_READ_8
309; EGCM: VTX_READ_8
Matt Arsenault7b4826e2018-05-30 16:17:51 +0000310
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000311; GCN-DAG: s_load_dwordx2 s
312; GCN-DAG: s_load_dword s
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000313define amdgpu_kernel void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
Tom Stellardaf775432013-10-23 00:44:32 +0000314entry:
315 store <4 x i8> %in, <4 x i8> addrspace(1)* %out
316 ret void
317}
318
Marek Olsakfa6607d2015-02-11 14:26:46 +0000319; FUNC-LABEL: {{^}}v4i16_arg:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000320; HSA-GFX9: kernarg_segment_byte_size = 16
321; HSA-GFX9: kernarg_segment_alignment = 4
Matt Arsenault72b0e382018-07-28 12:34:25 +0000322; EGCM: VTX_READ_16
323; EGCM: VTX_READ_16
324; EGCM: VTX_READ_16
325; EGCM: VTX_READ_16
Matt Arsenault1349a042018-05-22 06:32:10 +0000326
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000327; SI-DAG: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0xb
Matt Arsenault7b4826e2018-05-30 16:17:51 +0000328; SI-DAG: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x9
Matt Arsenault1349a042018-05-22 06:32:10 +0000329
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000330; MESA-VI-DAG: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x24
331; MESA-VI-DAG: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x2c
332
333
334; MESA-VI-DAG: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x24
335; MESA-VI-DAG: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x2c
336
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000337; HSA-GFX9-DAG: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x0
338; HSA-GFX9-DAG: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x8
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000339define amdgpu_kernel void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) {
Tom Stellardaf775432013-10-23 00:44:32 +0000340entry:
341 store <4 x i16> %in, <4 x i16> addrspace(1)* %out
342 ret void
343}
344
Marek Olsakfa6607d2015-02-11 14:26:46 +0000345; FUNC-LABEL: {{^}}v4i32_arg:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000346; HSA-GFX9: kernarg_segment_byte_size = 32
347; HSA-GFX9: kernarg_segment_alignment = 4
Matt Arsenault72b0e382018-07-28 12:34:25 +0000348; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
349; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
350; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
351; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
Matt Arsenault1349a042018-05-22 06:32:10 +0000352
Marek Olsak37cd4d02015-02-03 21:53:27 +0000353; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
Tom Stellard7998db62016-09-16 22:20:24 +0000354; MESA-VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000355; HSA-GFX9: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x10
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000356define amdgpu_kernel void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +0000357entry:
358 store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
359 ret void
360}
361
Marek Olsakfa6607d2015-02-11 14:26:46 +0000362; FUNC-LABEL: {{^}}v4f32_arg:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000363; HSA-GFX9: kernarg_segment_byte_size = 32
364; HSA-GFX9: kernarg_segment_alignment = 4
Matt Arsenault72b0e382018-07-28 12:34:25 +0000365; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
366; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
367; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
368; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
Marek Olsak37cd4d02015-02-03 21:53:27 +0000369; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
Tom Stellard7998db62016-09-16 22:20:24 +0000370; MESA-VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000371; HSA-GFX9: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x10
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000372define amdgpu_kernel void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +0000373entry:
374 store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4
375 ret void
376}
377
Matt Arsenault90083d32018-06-07 09:54:49 +0000378; FIXME: Lots of unpack and re-pack junk on VI
Marek Olsakfa6607d2015-02-11 14:26:46 +0000379; FUNC-LABEL: {{^}}v8i8_arg:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000380; HSA-GFX9: kernarg_segment_byte_size = 16
381; HSA-GFX9: kernarg_segment_alignment = 4
Matt Arsenault72b0e382018-07-28 12:34:25 +0000382; EGCM: VTX_READ_8
383; EGCM: VTX_READ_8
384; EGCM: VTX_READ_8
385; EGCM: VTX_READ_8
386; EGCM: VTX_READ_8
387; EGCM: VTX_READ_8
388; EGCM: VTX_READ_8
389; EGCM: VTX_READ_8
Matt Arsenault7b4826e2018-05-30 16:17:51 +0000390
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000391; SI-NOT: {{buffer|flat|global}}_load
Matt Arsenault90083d32018-06-07 09:54:49 +0000392; SI: s_load_dwordx2 s
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000393; SI-NEXT: s_load_dwordx2 s
Matt Arsenault90083d32018-06-07 09:54:49 +0000394; SI-NOT: {{buffer|flat|global}}_load
395
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000396; VI: s_load_dwordx2 s
397; VI-NEXT: s_load_dwordx2 s
398; VI-NOT: lshl
399; VI-NOT: _or
400; VI-NOT: _sdwa
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000401define amdgpu_kernel void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
Tom Stellardaf775432013-10-23 00:44:32 +0000402entry:
403 store <8 x i8> %in, <8 x i8> addrspace(1)* %out
404 ret void
405}
406
Marek Olsakfa6607d2015-02-11 14:26:46 +0000407; FUNC-LABEL: {{^}}v8i16_arg:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000408; HSA-GFX9: kernarg_segment_byte_size = 32
409; HSA-GFX9: kernarg_segment_alignment = 4
Matt Arsenault72b0e382018-07-28 12:34:25 +0000410; EGCM: VTX_READ_16
411; EGCM: VTX_READ_16
412; EGCM: VTX_READ_16
413; EGCM: VTX_READ_16
414; EGCM: VTX_READ_16
415; EGCM: VTX_READ_16
416; EGCM: VTX_READ_16
417; EGCM: VTX_READ_16
Matt Arsenault1349a042018-05-22 06:32:10 +0000418
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000419; SI: s_load_dwordx4
420; SI-NEXT: s_load_dwordx2
Matt Arsenault90083d32018-06-07 09:54:49 +0000421; SI-NOT: {{buffer|flat|global}}_load
422
Matt Arsenault1349a042018-05-22 06:32:10 +0000423
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000424; MESA-VI: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x34
Matt Arsenault02dc7e12018-06-15 15:15:46 +0000425
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000426; HSA-GFX9: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x10
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000427define amdgpu_kernel void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) {
Tom Stellardaf775432013-10-23 00:44:32 +0000428entry:
429 store <8 x i16> %in, <8 x i16> addrspace(1)* %out
430 ret void
431}
432
Marek Olsakfa6607d2015-02-11 14:26:46 +0000433; FUNC-LABEL: {{^}}v8i32_arg:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000434; HSA-GFX9: kernarg_segment_byte_size = 64
435; HSA-GFX9: kernarg_segment_alignment = 5
Matt Arsenault72b0e382018-07-28 12:34:25 +0000436; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
437; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
438; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
439; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X
440; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y
441; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
442; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
443; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000444
Matt Arsenault0e3d3892015-11-30 21:15:53 +0000445; SI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x11
Tom Stellard7998db62016-09-16 22:20:24 +0000446; MESA-VI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x44
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000447; HSA-GFX9: s_load_dwordx8 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x20
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000448define amdgpu_kernel void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +0000449entry:
450 store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4
451 ret void
452}
453
Marek Olsakfa6607d2015-02-11 14:26:46 +0000454; FUNC-LABEL: {{^}}v8f32_arg:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000455; HSA-GFX9: kernarg_segment_byte_size = 64
456; HSA-GFX9: kernarg_segment_alignment = 5
Matt Arsenault72b0e382018-07-28 12:34:25 +0000457; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
458; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
459; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
460; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X
461; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y
462; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
463; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
464; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
Matt Arsenault0e3d3892015-11-30 21:15:53 +0000465; SI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x11
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000466define amdgpu_kernel void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +0000467entry:
468 store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4
469 ret void
470}
471
Matt Arsenault90083d32018-06-07 09:54:49 +0000472; FIXME: Pack/repack on VI
473
Marek Olsakfa6607d2015-02-11 14:26:46 +0000474; FUNC-LABEL: {{^}}v16i8_arg:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000475; HSA-GFX9: kernarg_segment_byte_size = 32
476; HSA-GFX9: kernarg_segment_alignment = 4
Matt Arsenault72b0e382018-07-28 12:34:25 +0000477; EGCM: VTX_READ_8
478; EGCM: VTX_READ_8
479; EGCM: VTX_READ_8
480; EGCM: VTX_READ_8
481; EGCM: VTX_READ_8
482; EGCM: VTX_READ_8
483; EGCM: VTX_READ_8
484; EGCM: VTX_READ_8
485; EGCM: VTX_READ_8
486; EGCM: VTX_READ_8
487; EGCM: VTX_READ_8
488; EGCM: VTX_READ_8
489; EGCM: VTX_READ_8
490; EGCM: VTX_READ_8
491; EGCM: VTX_READ_8
492; EGCM: VTX_READ_8
Matt Arsenault7b4826e2018-05-30 16:17:51 +0000493
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000494; SI: s_load_dwordx4 s
495; SI-NEXT: s_load_dwordx2 s
Matt Arsenault90083d32018-06-07 09:54:49 +0000496; SI-NOT: {{buffer|flat|global}}_load
Matt Arsenault7b4826e2018-05-30 16:17:51 +0000497
Matt Arsenault90083d32018-06-07 09:54:49 +0000498
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000499; VI: s_load_dwordx4 s
500; VI-NOT: shr
501; VI-NOT: shl
502; VI-NOT: _sdwa
503; VI-NOT: _or_
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000504define amdgpu_kernel void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) {
Tom Stellardaf775432013-10-23 00:44:32 +0000505entry:
506 store <16 x i8> %in, <16 x i8> addrspace(1)* %out
507 ret void
508}
509
Marek Olsakfa6607d2015-02-11 14:26:46 +0000510; FUNC-LABEL: {{^}}v16i16_arg:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000511; HSA-GFX9: kernarg_segment_byte_size = 64
512; HSA-GFX9: kernarg_segment_alignment = 5
Matt Arsenault72b0e382018-07-28 12:34:25 +0000513; EGCM: VTX_READ_16
514; EGCM: VTX_READ_16
515; EGCM: VTX_READ_16
516; EGCM: VTX_READ_16
517; EGCM: VTX_READ_16
Matt Arsenault90083d32018-06-07 09:54:49 +0000518
Matt Arsenault72b0e382018-07-28 12:34:25 +0000519; EGCM: VTX_READ_16
520; EGCM: VTX_READ_16
521; EGCM: VTX_READ_16
522; EGCM: VTX_READ_16
523; EGCM: VTX_READ_16
524; EGCM: VTX_READ_16
525; EGCM: VTX_READ_16
526; EGCM: VTX_READ_16
527; EGCM: VTX_READ_16
528; EGCM: VTX_READ_16
529; EGCM: VTX_READ_16
Matt Arsenault1349a042018-05-22 06:32:10 +0000530
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000531; SI: s_load_dwordx8 s
532; SI-NEXT: s_load_dwordx2 s
Matt Arsenault90083d32018-06-07 09:54:49 +0000533; SI-NOT: {{buffer|flat|global}}_load
534
Matt Arsenault1349a042018-05-22 06:32:10 +0000535
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000536; MESA-VI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x44
Matt Arsenault02dc7e12018-06-15 15:15:46 +0000537
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000538; HSA-GFX9: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x20
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000539define amdgpu_kernel void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) {
Tom Stellardaf775432013-10-23 00:44:32 +0000540entry:
541 store <16 x i16> %in, <16 x i16> addrspace(1)* %out
542 ret void
543}
544
Marek Olsakfa6607d2015-02-11 14:26:46 +0000545; FUNC-LABEL: {{^}}v16i32_arg:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000546; HSA-GFX9: kernarg_segment_byte_size = 128
547; HSA-GFX9: kernarg_segment_alignment = 6
Matt Arsenault72b0e382018-07-28 12:34:25 +0000548; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
549; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
550; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
551; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X
552; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y
553; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z
554; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W
555; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X
556; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y
557; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z
558; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W
559; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X
560; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y
561; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
562; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
563; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
Matt Arsenault0e3d3892015-11-30 21:15:53 +0000564; SI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x19
Tom Stellard7998db62016-09-16 22:20:24 +0000565; MESA-VI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x64
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000566; HSA-GFX9: s_load_dwordx16 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x40
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000567define amdgpu_kernel void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +0000568entry:
569 store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4
570 ret void
571}
572
Marek Olsakfa6607d2015-02-11 14:26:46 +0000573; FUNC-LABEL: {{^}}v16f32_arg:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000574; HSA-GFX9: kernarg_segment_byte_size = 128
575; HSA-GFX9: kernarg_segment_alignment = 6
Matt Arsenault72b0e382018-07-28 12:34:25 +0000576; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
577; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
578; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
579; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X
580; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y
581; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z
582; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W
583; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X
584; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y
585; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z
586; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W
587; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X
588; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y
589; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
590; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
591; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
Matt Arsenault0e3d3892015-11-30 21:15:53 +0000592; SI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x19
Tom Stellard7998db62016-09-16 22:20:24 +0000593; MESA-VI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x64
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000594; HSA-GFX9: s_load_dwordx16 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x40
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000595define amdgpu_kernel void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
Tom Stellardaf775432013-10-23 00:44:32 +0000596entry:
597 store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
598 ret void
599}
Matt Arsenault74ef2772014-08-13 18:14:11 +0000600
Tom Stellard79243d92014-10-01 17:15:17 +0000601; FUNC-LABEL: {{^}}kernel_arg_i64:
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000602; MESA-VI: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[0:1], 0x24
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000603; HSA-GFX9: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x0
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000604
Tom Stellard7998db62016-09-16 22:20:24 +0000605; MESA-GCN: buffer_store_dwordx2
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000606define amdgpu_kernel void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
Matt Arsenault74ef2772014-08-13 18:14:11 +0000607 store i64 %a, i64 addrspace(1)* %out, align 8
608 ret void
609}
610
Matt Arsenault957bfc72015-04-26 00:53:33 +0000611; FUNC-LABEL: {{^}}f64_kernel_arg:
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000612; SI-DAG: s_load_dwordx4 s[{{[0-9]:[0-9]}}], s[0:1], 0x9
613; MESA-VI-DAG: s_load_dwordx4 s[{{[0-9]:[0-9]}}], s[0:1], 0x24
Tom Stellard7998db62016-09-16 22:20:24 +0000614; MESA-GCN: buffer_store_dwordx2
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000615
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000616; HSA-GFX9: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x0
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000617define amdgpu_kernel void @f64_kernel_arg(double addrspace(1)* %out, double %in) {
Matt Arsenault957bfc72015-04-26 00:53:33 +0000618entry:
619 store double %in, double addrspace(1)* %out
620 ret void
621}
622
Tom Stellard79243d92014-10-01 17:15:17 +0000623; XFUNC-LABEL: {{^}}kernel_arg_v1i64:
Marek Olsakfa6607d2015-02-11 14:26:46 +0000624; XGCN: s_load_dwordx2
625; XGCN: s_load_dwordx2
626; XGCN: buffer_store_dwordx2
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000627; define amdgpu_kernel void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind {
Matt Arsenault74ef2772014-08-13 18:14:11 +0000628; store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8
629; ret void
630; }
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000631
Matt Arsenault4bec7d42018-07-20 09:05:08 +0000632; FUNC-LABEL: {{^}}i65_arg:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000633; HSA-GFX9: kernarg_segment_byte_size = 24
634; HSA-GFX9: kernarg_segment_alignment = 4
635; HSA-GFX9: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x0
636; HSA-GFX9: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x8
Matt Arsenault4bec7d42018-07-20 09:05:08 +0000637define amdgpu_kernel void @i65_arg(i65 addrspace(1)* nocapture %out, i65 %in) nounwind {
638entry:
639 store i65 %in, i65 addrspace(1)* %out, align 4
640 ret void
641}
642
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000643; FUNC-LABEL: {{^}}i1_arg:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000644; HSA-GFX9: kernarg_segment_byte_size = 12
645; HSA-GFX9: kernarg_segment_alignment = 4
Matt Arsenault1ea04022018-05-29 19:35:00 +0000646
Matt Arsenault90083d32018-06-07 09:54:49 +0000647; GCN: s_load_dword s
648; GCN: s_and_b32
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000649; GCN: {{buffer|flat|global}}_store_byte
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000650define amdgpu_kernel void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000651 store i1 %x, i1 addrspace(1)* %out, align 1
652 ret void
653}
654
655; FUNC-LABEL: {{^}}i1_arg_zext_i32:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000656; HSA-GFX9: kernarg_segment_byte_size = 12
657; HSA-GFX9: kernarg_segment_alignment = 4
Matt Arsenault1ea04022018-05-29 19:35:00 +0000658
Matt Arsenault90083d32018-06-07 09:54:49 +0000659; GCN: s_load_dword
660; SGCN: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000661define amdgpu_kernel void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000662 %ext = zext i1 %x to i32
663 store i32 %ext, i32 addrspace(1)* %out, align 4
664 ret void
665}
666
667; FUNC-LABEL: {{^}}i1_arg_zext_i64:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000668; HSA-GFX9: kernarg_segment_byte_size = 12
669; HSA-GFX9: kernarg_segment_alignment = 4
Matt Arsenault1ea04022018-05-29 19:35:00 +0000670
Matt Arsenault90083d32018-06-07 09:54:49 +0000671; GCN: s_load_dword s
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000672; GCN: {{buffer|flat|global}}_store_dwordx2
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000673define amdgpu_kernel void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000674 %ext = zext i1 %x to i64
675 store i64 %ext, i64 addrspace(1)* %out, align 8
676 ret void
677}
678
679; FUNC-LABEL: {{^}}i1_arg_sext_i32:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000680; HSA-GFX9: kernarg_segment_byte_size = 12
681; HSA-GFX9: kernarg_segment_alignment = 4
Matt Arsenault1ea04022018-05-29 19:35:00 +0000682
Matt Arsenault90083d32018-06-07 09:54:49 +0000683; GCN: s_load_dword
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000684; GCN: {{buffer|flat|global}}_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000685define amdgpu_kernel void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000686 %ext = sext i1 %x to i32
687 store i32 %ext, i32addrspace(1)* %out, align 4
688 ret void
689}
690
691; FUNC-LABEL: {{^}}i1_arg_sext_i64:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000692; HSA-GFX9: kernarg_segment_byte_size = 12
693; HSA-GFX9: kernarg_segment_alignment = 4
Matt Arsenault1ea04022018-05-29 19:35:00 +0000694
Matt Arsenault90083d32018-06-07 09:54:49 +0000695; GCN: s_load_dword
696; GCN: s_bfe_i64
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000697; GCN: {{buffer|flat|global}}_store_dwordx2
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000698define amdgpu_kernel void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000699 %ext = sext i1 %x to i64
700 store i64 %ext, i64 addrspace(1)* %out, align 8
701 ret void
702}
Matt Arsenault29f30372018-07-05 17:01:20 +0000703
704; FUNC-LABEL: {{^}}empty_struct_arg:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000705; HSA-GFX9: kernarg_segment_byte_size = 0
Matt Arsenault29f30372018-07-05 17:01:20 +0000706define amdgpu_kernel void @empty_struct_arg({} %in) nounwind {
707 ret void
708}
709
710; The correct load offsets for these:
711; load 4 from 0,
712; load 8 from 8
713; load 4 from 24
714; load 8 from 32
715
716; With the SelectionDAG argument lowering, the alignments for the
717; struct members is not properly considered, making these wrong.
718
719; FIXME: Total argument size is computed wrong
720; FUNC-LABEL: {{^}}struct_argument_alignment:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000721; HSA-GFX9: kernarg_segment_byte_size = 40
722; HSA-GFX9: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
723; HSA-GFX9: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x8
724; HSA-GFX9: s_load_dword s{{[0-9]+}}, s[4:5], 0x18
725; HSA-GFX9: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x20
Matt Arsenault29f30372018-07-05 17:01:20 +0000726define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8, {i32, i64} %arg1) {
727 %val0 = extractvalue {i32, i64} %arg0, 0
728 %val1 = extractvalue {i32, i64} %arg0, 1
729 %val2 = extractvalue {i32, i64} %arg1, 0
730 %val3 = extractvalue {i32, i64} %arg1, 1
731 store volatile i32 %val0, i32 addrspace(1)* null
732 store volatile i64 %val1, i64 addrspace(1)* null
733 store volatile i32 %val2, i32 addrspace(1)* null
734 store volatile i64 %val3, i64 addrspace(1)* null
735 ret void
736}
737
738; No padding between i8 and next struct, but round up at end to 4 byte
739; multiple.
740; FUNC-LABEL: {{^}}packed_struct_argument_alignment:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000741; HSA-GFX9: kernarg_segment_byte_size = 28
Matt Arsenaultb5613ec2018-12-07 22:12:17 +0000742; HSA-GFX9: global_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:13
743; HSA-GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:17
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000744; HSA-GFX9: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
745; HSA-GFX9: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x4
Matt Arsenault29f30372018-07-05 17:01:20 +0000746define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0, i8, <{i32, i64}> %arg1) {
747 %val0 = extractvalue <{i32, i64}> %arg0, 0
748 %val1 = extractvalue <{i32, i64}> %arg0, 1
749 %val2 = extractvalue <{i32, i64}> %arg1, 0
750 %val3 = extractvalue <{i32, i64}> %arg1, 1
751 store volatile i32 %val0, i32 addrspace(1)* null
752 store volatile i64 %val1, i64 addrspace(1)* null
753 store volatile i32 %val2, i32 addrspace(1)* null
754 store volatile i64 %val3, i64 addrspace(1)* null
755 ret void
756}
Matt Arsenault4bec7d42018-07-20 09:05:08 +0000757
758; GCN-LABEL: {{^}}struct_argument_alignment_after:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000759; HSA-GFX9: kernarg_segment_byte_size = 64
760; HSA-GFX9: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
761; HSA-GFX9: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x8
762; HSA-GFX9: s_load_dword s{{[0-9]+}}, s[4:5], 0x18
763; HSA-GFX9: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x20
764; HSA-GFX9: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x30
Matt Arsenault4bec7d42018-07-20 09:05:08 +0000765define amdgpu_kernel void @struct_argument_alignment_after({i32, i64} %arg0, i8, {i32, i64} %arg2, i8, <4 x i32> %arg4) {
766 %val0 = extractvalue {i32, i64} %arg0, 0
767 %val1 = extractvalue {i32, i64} %arg0, 1
768 %val2 = extractvalue {i32, i64} %arg2, 0
769 %val3 = extractvalue {i32, i64} %arg2, 1
770 store volatile i32 %val0, i32 addrspace(1)* null
771 store volatile i64 %val1, i64 addrspace(1)* null
772 store volatile i32 %val2, i32 addrspace(1)* null
773 store volatile i64 %val3, i64 addrspace(1)* null
774 store volatile <4 x i32> %arg4, <4 x i32> addrspace(1)* null
775 ret void
776}
777
778; GCN-LABEL: {{^}}array_3xi32:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000779; HSA-GFX9: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
780; HSA-GFX9: s_load_dword s{{[0-9]+}}, s[4:5], 0x4
781; HSA-GFX9: s_load_dword s{{[0-9]+}}, s[4:5], 0x8
782; HSA-GFX9: s_load_dword s{{[0-9]+}}, s[4:5], 0xc
Matt Arsenault4bec7d42018-07-20 09:05:08 +0000783define amdgpu_kernel void @array_3xi32(i16 %arg0, [3 x i32] %arg1) {
784 store volatile i16 %arg0, i16 addrspace(1)* undef
785 store volatile [3 x i32] %arg1, [3 x i32] addrspace(1)* undef
786 ret void
787}
788
789; FIXME: Why not all scalar loads?
790; GCN-LABEL: {{^}}array_3xi16:
Matt Arsenaultfab7d272018-12-07 20:57:43 +0000791; HSA-GFX9: global_load_ushort v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:2
Matt Arsenaultb5613ec2018-12-07 22:12:17 +0000792; HSA-GFX9: global_load_ushort v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:4
793; HSA-GFX9: global_load_ushort v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:6
Matt Arsenault4bec7d42018-07-20 09:05:08 +0000794define amdgpu_kernel void @array_3xi16(i8 %arg0, [3 x i16] %arg1) {
795 store volatile i8 %arg0, i8 addrspace(1)* undef
796 store volatile [3 x i16] %arg1, [3 x i16] addrspace(1)* undef
797 ret void
798}
Matt Arsenaultb5613ec2018-12-07 22:12:17 +0000799
800; GCN-LABEL: {{^}}small_array_round_down_offset:
801; HSA-GFX9: global_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:1
802define amdgpu_kernel void @small_array_round_down_offset(i8, [1 x i8] %arg) {
803 %val = extractvalue [1 x i8] %arg, 0
804 store volatile i8 %val, i8 addrspace(1)* undef
805 ret void
806}