blob: 3a489d58b48b8da66ccd2525d8ad4ecf452e4ccc [file] [log] [blame]
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; FIXME: Manually added checks for metadata nodes at bottom
3; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -o - -amdgpu-lower-kernel-arguments %s | FileCheck -check-prefix=HSA %s
4; RUN: opt -mtriple=amdgcn-- -S -o - -amdgpu-lower-kernel-arguments %s | FileCheck -check-prefix=MESA %s
5
6define amdgpu_kernel void @kern_noargs() {
7; HSA-LABEL: @kern_noargs(
8; HSA-NEXT: ret void
9;
10; MESA-LABEL: @kern_noargs(
11; MESA-NEXT: ret void
12;
13 ret void
14}
15
16define amdgpu_kernel void @kern_i8(i8 %arg) #0 {
17; HSA-LABEL: @kern_i8(
18; HSA-NEXT: [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +000019; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_KERNARG_SEGMENT]], i64 0
20; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
21; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
22; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
23; HSA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1
Matt Arsenault8c4a3522018-06-26 19:10:00 +000024; HSA-NEXT: ret void
25;
26; MESA-LABEL: @kern_i8(
27; MESA-NEXT: [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +000028; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_KERNARG_SEGMENT]], i64 36
29; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
30; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
31; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
32; MESA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1
Matt Arsenault8c4a3522018-06-26 19:10:00 +000033; MESA-NEXT: ret void
34;
35 store i8 %arg, i8 addrspace(1)* undef, align 1
36 ret void
37}
38
39define amdgpu_kernel void @kern_i16(i16 %arg) #0 {
40; HSA-LABEL: @kern_i16(
41; HSA-NEXT: [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +000042; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I16_KERNARG_SEGMENT]], i64 0
43; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
44; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
45; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
46; HSA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1
Matt Arsenault8c4a3522018-06-26 19:10:00 +000047; HSA-NEXT: ret void
48;
49; MESA-LABEL: @kern_i16(
50; MESA-NEXT: [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +000051; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I16_KERNARG_SEGMENT]], i64 36
52; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
53; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
54; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
55; MESA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1
Matt Arsenault8c4a3522018-06-26 19:10:00 +000056; MESA-NEXT: ret void
57;
58 store i16 %arg, i16 addrspace(1)* undef, align 1
59 ret void
60}
61
62define amdgpu_kernel void @kern_f16(half %arg) #0 {
63; HSA-LABEL: @kern_f16(
64; HSA-NEXT: [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +000065; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F16_KERNARG_SEGMENT]], i64 0
66; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
67; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
68; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
69; HSA-NEXT: [[ARG_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
Matt Arsenault8c4a3522018-06-26 19:10:00 +000070; HSA-NEXT: store half [[ARG_LOAD]], half addrspace(1)* undef, align 1
71; HSA-NEXT: ret void
72;
73; MESA-LABEL: @kern_f16(
74; MESA-NEXT: [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +000075; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F16_KERNARG_SEGMENT]], i64 36
76; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
77; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
78; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
79; MESA-NEXT: [[ARG_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
Matt Arsenault8c4a3522018-06-26 19:10:00 +000080; MESA-NEXT: store half [[ARG_LOAD]], half addrspace(1)* undef, align 1
81; MESA-NEXT: ret void
82;
83 store half %arg, half addrspace(1)* undef, align 1
84 ret void
85}
86
87define amdgpu_kernel void @kern_zeroext_i8(i8 zeroext %arg) #0 {
88; HSA-LABEL: @kern_zeroext_i8(
89; HSA-NEXT: [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +000090; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I8_KERNARG_SEGMENT]], i64 0
91; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
92; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
93; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
94; HSA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1
Matt Arsenault8c4a3522018-06-26 19:10:00 +000095; HSA-NEXT: ret void
96;
97; MESA-LABEL: @kern_zeroext_i8(
98; MESA-NEXT: [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +000099; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I8_KERNARG_SEGMENT]], i64 36
100; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
Matt Arsenault72b0e382018-07-28 12:34:25 +0000101; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000102; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
103; MESA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000104; MESA-NEXT: ret void
105;
106 store i8 %arg, i8 addrspace(1)* undef, align 1
107 ret void
108}
109
110define amdgpu_kernel void @kern_zeroext_i16(i16 zeroext %arg) #0 {
111; HSA-LABEL: @kern_zeroext_i16(
112; HSA-NEXT: [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000113; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I16_KERNARG_SEGMENT]], i64 0
114; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
115; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
116; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
117; HSA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000118; HSA-NEXT: ret void
119;
120; MESA-LABEL: @kern_zeroext_i16(
121; MESA-NEXT: [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000122; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I16_KERNARG_SEGMENT]], i64 36
123; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
Matt Arsenault72b0e382018-07-28 12:34:25 +0000124; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000125; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
126; MESA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000127; MESA-NEXT: ret void
128;
129 store i16 %arg, i16 addrspace(1)* undef, align 1
130 ret void
131}
132
133define amdgpu_kernel void @kern_signext_i8(i8 signext %arg) #0 {
134; HSA-LABEL: @kern_signext_i8(
135; HSA-NEXT: [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000136; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I8_KERNARG_SEGMENT]], i64 0
137; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
138; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
139; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
140; HSA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000141; HSA-NEXT: ret void
142;
143; MESA-LABEL: @kern_signext_i8(
144; MESA-NEXT: [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000145; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I8_KERNARG_SEGMENT]], i64 36
146; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
Matt Arsenault72b0e382018-07-28 12:34:25 +0000147; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000148; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
149; MESA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000150; MESA-NEXT: ret void
151;
152 store i8 %arg, i8 addrspace(1)* undef, align 1
153 ret void
154}
155
156define amdgpu_kernel void @kern_signext_i16(i16 signext %arg) #0 {
157; HSA-LABEL: @kern_signext_i16(
158; HSA-NEXT: [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000159; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I16_KERNARG_SEGMENT]], i64 0
160; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
161; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
162; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
163; HSA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000164; HSA-NEXT: ret void
165;
166; MESA-LABEL: @kern_signext_i16(
167; MESA-NEXT: [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000168; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I16_KERNARG_SEGMENT]], i64 36
169; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
Matt Arsenault72b0e382018-07-28 12:34:25 +0000170; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000171; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
172; MESA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000173; MESA-NEXT: ret void
174;
175 store i16 %arg, i16 addrspace(1)* undef, align 1
176 ret void
177}
178
179define amdgpu_kernel void @kern_i8_i8(i8 %arg0, i8 %arg1) {
180; HSA-LABEL: @kern_i8_i8(
181; HSA-NEXT: [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000182; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_I8_KERNARG_SEGMENT]], i64 0
183; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
184; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
185; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
186; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_I8_KERNARG_SEGMENT]], i64 0
187; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
188; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
189; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
190; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
191; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef, align 1
192; HSA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef, align 1
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000193; HSA-NEXT: ret void
194;
195; MESA-LABEL: @kern_i8_i8(
196; MESA-NEXT: [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000197; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_I8_KERNARG_SEGMENT]], i64 36
198; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
199; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
200; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
201; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_I8_KERNARG_SEGMENT]], i64 36
202; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
203; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
204; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000205; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000206; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef, align 1
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000207; MESA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef, align 1
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000208; MESA-NEXT: ret void
209;
210 store volatile i8 %arg0, i8 addrspace(1)* undef, align 1
211 store volatile i8 %arg1, i8 addrspace(1)* undef, align 1
212 ret void
213}
214
215define amdgpu_kernel void @kern_v3i8(<3 x i8> %arg) {
216; HSA-LABEL: @kern_v3i8(
217; HSA-NEXT: [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000218; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I8_KERNARG_SEGMENT]], i64 0
219; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
220; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
221; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24
222; HSA-NEXT: [[ARG_LOAD:%.*]] = bitcast i24 [[TMP2]] to <3 x i8>
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000223; HSA-NEXT: store <3 x i8> [[ARG_LOAD]], <3 x i8> addrspace(1)* undef, align 4
224; HSA-NEXT: ret void
225;
226; MESA-LABEL: @kern_v3i8(
227; MESA-NEXT: [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000228; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I8_KERNARG_SEGMENT]], i64 36
229; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
230; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
231; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24
232; MESA-NEXT: [[ARG_LOAD:%.*]] = bitcast i24 [[TMP2]] to <3 x i8>
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000233; MESA-NEXT: store <3 x i8> [[ARG_LOAD]], <3 x i8> addrspace(1)* undef, align 4
234; MESA-NEXT: ret void
235;
236 store <3 x i8> %arg, <3 x i8> addrspace(1)* undef, align 4
237 ret void
238}
239
240define amdgpu_kernel void @kern_i24(i24 %arg0) {
241; HSA-LABEL: @kern_i24(
242; HSA-NEXT: [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000243; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I24_KERNARG_SEGMENT]], i64 0
244; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
245; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
246; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24
247; HSA-NEXT: store i24 [[TMP2]], i24 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000248; HSA-NEXT: ret void
249;
250; MESA-LABEL: @kern_i24(
251; MESA-NEXT: [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000252; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I24_KERNARG_SEGMENT]], i64 36
253; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
254; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
255; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24
256; MESA-NEXT: store i24 [[TMP2]], i24 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000257; MESA-NEXT: ret void
258;
259 store i24 %arg0, i24 addrspace(1)* undef
260 ret void
261}
262
263define amdgpu_kernel void @kern_i32(i32 %arg0) {
264; HSA-LABEL: @kern_i32(
265; HSA-NEXT: [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000266; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_KERNARG_SEGMENT]], i64 0
267; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
268; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000269; HSA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef
270; HSA-NEXT: ret void
271;
272; MESA-LABEL: @kern_i32(
273; MESA-NEXT: [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000274; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_KERNARG_SEGMENT]], i64 36
275; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
276; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000277; MESA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef
278; MESA-NEXT: ret void
279;
280 store i32 %arg0, i32 addrspace(1)* undef
281 ret void
282}
283
284define amdgpu_kernel void @kern_f32(float %arg0) {
285; HSA-LABEL: @kern_f32(
286; HSA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000287; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F32_KERNARG_SEGMENT]], i64 0
288; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to float addrspace(4)*
289; HSA-NEXT: [[ARG0_LOAD:%.*]] = load float, float addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000290; HSA-NEXT: store float [[ARG0_LOAD]], float addrspace(1)* undef
291; HSA-NEXT: ret void
292;
293; MESA-LABEL: @kern_f32(
294; MESA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000295; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F32_KERNARG_SEGMENT]], i64 36
296; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to float addrspace(4)*
297; MESA-NEXT: [[ARG0_LOAD:%.*]] = load float, float addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000298; MESA-NEXT: store float [[ARG0_LOAD]], float addrspace(1)* undef
299; MESA-NEXT: ret void
300;
301 store float %arg0, float addrspace(1)* undef
302 ret void
303}
304
305define amdgpu_kernel void @kern_v3i32(<3 x i32> %arg0) {
306; HSA-LABEL: @kern_v3i32(
307; HSA-NEXT: [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(16) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000308; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I32_KERNARG_SEGMENT]], i64 0
James Y Knight14359ef2019-02-01 20:44:24 +0000309; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to <4 x i32> addrspace(4)*
310; HSA-NEXT: [[TMP:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
311; HSA-NEXT: [[ARG0_LOAD:%.*]] = shufflevector <4 x i32> [[TMP]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000312; HSA-NEXT: store <3 x i32> [[ARG0_LOAD]], <3 x i32> addrspace(1)* undef, align 4
313; HSA-NEXT: ret void
314;
315; MESA-LABEL: @kern_v3i32(
316; MESA-NEXT: [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(52) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000317; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I32_KERNARG_SEGMENT]], i64 36
James Y Knight14359ef2019-02-01 20:44:24 +0000318; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to <4 x i32> addrspace(4)*
319; MESA-NEXT: [[TMP:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
320; MESA-NEXT: [[ARG0_LOAD:%.*]] = shufflevector <4 x i32> [[TMP]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000321; MESA-NEXT: store <3 x i32> [[ARG0_LOAD]], <3 x i32> addrspace(1)* undef, align 4
322; MESA-NEXT: ret void
323;
324 store <3 x i32> %arg0, <3 x i32> addrspace(1)* undef, align 4
325 ret void
326}
327
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000328define amdgpu_kernel void @kern_v8i32(<8 x i32> %arg) #0 {
329; HSA-LABEL: @kern_v8i32(
330; HSA-NEXT: [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(32) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
331; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I32_KERNARG_SEGMENT]], i64 0
332; HSA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i32> addrspace(4)*
333; HSA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i32>, <8 x i32> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
334; HSA-NEXT: store <8 x i32> [[ARG_LOAD]], <8 x i32> addrspace(1)* undef
335; HSA-NEXT: ret void
336;
337; MESA-LABEL: @kern_v8i32(
338; MESA-NEXT: [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
339; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I32_KERNARG_SEGMENT]], i64 36
340; MESA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i32> addrspace(4)*
341; MESA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i32>, <8 x i32> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
342; MESA-NEXT: store <8 x i32> [[ARG_LOAD]], <8 x i32> addrspace(1)* undef
343; MESA-NEXT: ret void
344;
345 store <8 x i32> %arg, <8 x i32> addrspace(1)* undef
346 ret void
347}
348
349define amdgpu_kernel void @kern_v8i64(<8 x i64> %arg) #0 {
350; HSA-LABEL: @kern_v8i64(
351; HSA-NEXT: [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
352; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I64_KERNARG_SEGMENT]], i64 0
353; HSA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i64> addrspace(4)*
354; HSA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i64>, <8 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
355; HSA-NEXT: store <8 x i64> [[ARG_LOAD]], <8 x i64> addrspace(1)* undef
356; HSA-NEXT: ret void
357;
358; MESA-LABEL: @kern_v8i64(
359; MESA-NEXT: [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(100) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
360; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I64_KERNARG_SEGMENT]], i64 36
361; MESA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i64> addrspace(4)*
362; MESA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i64>, <8 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
363; MESA-NEXT: store <8 x i64> [[ARG_LOAD]], <8 x i64> addrspace(1)* undef
364; MESA-NEXT: ret void
365;
366 store <8 x i64> %arg, <8 x i64> addrspace(1)* undef
367 ret void
368}
369
370define amdgpu_kernel void @kern_v16i64(<16 x i64> %arg) #0 {
371; HSA-LABEL: @kern_v16i64(
372; HSA-NEXT: [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(128) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
373; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V16I64_KERNARG_SEGMENT]], i64 0
374; HSA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <16 x i64> addrspace(4)*
375; HSA-NEXT: [[ARG_LOAD:%.*]] = load <16 x i64>, <16 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
376; HSA-NEXT: store <16 x i64> [[ARG_LOAD]], <16 x i64> addrspace(1)* undef
377; HSA-NEXT: ret void
378;
379; MESA-LABEL: @kern_v16i64(
380; MESA-NEXT: [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(164) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
381; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V16I64_KERNARG_SEGMENT]], i64 36
382; MESA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <16 x i64> addrspace(4)*
383; MESA-NEXT: [[ARG_LOAD:%.*]] = load <16 x i64>, <16 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
384; MESA-NEXT: store <16 x i64> [[ARG_LOAD]], <16 x i64> addrspace(1)* undef
385; MESA-NEXT: ret void
386;
387 store <16 x i64> %arg, <16 x i64> addrspace(1)* undef
388 ret void
389}
390
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000391define amdgpu_kernel void @kern_i32_v3i32(i32 %arg0, <3 x i32> %arg1) {
392; HSA-LABEL: @kern_i32_v3i32(
393; HSA-NEXT: [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(32) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000394; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 0
395; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
396; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
397; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 16
James Y Knight14359ef2019-02-01 20:44:24 +0000398; HSA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to <4 x i32> addrspace(4)*
399; HSA-NEXT: [[TMP:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
400; HSA-NEXT: [[ARG1_LOAD:%.*]] = shufflevector <4 x i32> [[TMP]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000401; HSA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef
402; HSA-NEXT: store <3 x i32> [[ARG1_LOAD]], <3 x i32> addrspace(1)* undef, align 4
403; HSA-NEXT: ret void
404;
405; MESA-LABEL: @kern_i32_v3i32(
406; MESA-NEXT: [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000407; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 36
408; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
409; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
410; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 52
James Y Knight14359ef2019-02-01 20:44:24 +0000411; MESA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to <4 x i32> addrspace(4)*
412; MESA-NEXT: [[TMP:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
413; MESA-NEXT: [[ARG1_LOAD:%.*]] = shufflevector <4 x i32> [[TMP]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000414; MESA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef
415; MESA-NEXT: store <3 x i32> [[ARG1_LOAD]], <3 x i32> addrspace(1)* undef, align 4
416; MESA-NEXT: ret void
417;
418 store i32 %arg0, i32 addrspace(1)* undef
419 store <3 x i32> %arg1, <3 x i32> addrspace(1)* undef, align 4
420 ret void
421}
422
423%struct.a = type { i32, i8, [4 x i8] }
424%struct.b.packed = type { i8, i32, [3 x i16], <2 x double> }
425
426define amdgpu_kernel void @kern_struct_a(%struct.a %arg0) {
427; HSA-LABEL: @kern_struct_a(
428; HSA-NEXT: [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(12) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000429; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_A_KERNARG_SEGMENT]], i64 0
430; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_A:%.*]] addrspace(4)*
431; HSA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_A]], [[STRUCT_A]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000432; HSA-NEXT: store [[STRUCT_A]] %arg0.load, [[STRUCT_A]] addrspace(1)* undef
433; HSA-NEXT: ret void
434;
435; MESA-LABEL: @kern_struct_a(
436; MESA-NEXT: [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(48) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000437; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_A_KERNARG_SEGMENT]], i64 36
438; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_A:%.*]] addrspace(4)*
439; MESA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_A]], [[STRUCT_A]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000440; MESA-NEXT: store [[STRUCT_A]] %arg0.load, [[STRUCT_A]] addrspace(1)* undef
441; MESA-NEXT: ret void
442;
443 store %struct.a %arg0, %struct.a addrspace(1)* undef
444 ret void
445}
446
447define amdgpu_kernel void @kern_struct_b_packed(%struct.b.packed %arg0) #0 {
448; HSA-LABEL: @kern_struct_b_packed(
449; HSA-NEXT: [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(32) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000450; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT]], i64 0
451; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_B_PACKED:%.*]] addrspace(4)*
452; HSA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED]], [[STRUCT_B_PACKED]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000453; HSA-NEXT: store [[STRUCT_B_PACKED]] %arg0.load, [[STRUCT_B_PACKED]] addrspace(1)* undef
454; HSA-NEXT: ret void
455;
456; MESA-LABEL: @kern_struct_b_packed(
457; MESA-NEXT: [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000458; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT]], i64 36
459; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_B_PACKED:%.*]] addrspace(4)*
460; MESA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED]], [[STRUCT_B_PACKED]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000461; MESA-NEXT: store [[STRUCT_B_PACKED]] %arg0.load, [[STRUCT_B_PACKED]] addrspace(1)* undef
462; MESA-NEXT: ret void
463;
464 store %struct.b.packed %arg0, %struct.b.packed addrspace(1)* undef
465 ret void
466}
467
468define amdgpu_kernel void @kern_implicit_arg_num_bytes(i32 %arg0) #1 {
469; HSA-LABEL: @kern_implicit_arg_num_bytes(
470; HSA-NEXT: [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(48) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000471; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT]], i64 0
472; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
473; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000474; HSA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef
475; HSA-NEXT: ret void
476;
477; MESA-LABEL: @kern_implicit_arg_num_bytes(
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000478; MESA-NEXT: [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
479; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT]], i64 36
480; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
481; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000482; MESA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef
483; MESA-NEXT: ret void
484;
485 store i32 %arg0, i32 addrspace(1)* undef
486 ret void
487}
488
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000489define amdgpu_kernel void @kernel_implicitarg_no_struct_align(<16 x i32>, i32 %arg1) #1 {
490; HSA-LABEL: @kernel_implicitarg_no_struct_align(
491; HSA-NEXT: [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(112) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
492; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT]], i64 64
493; HSA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)*
494; HSA-NEXT: [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
495; HSA-NEXT: store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef
496; HSA-NEXT: ret void
497;
498; MESA-LABEL: @kernel_implicitarg_no_struct_align(
499; MESA-NEXT: [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(108) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
500; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT]], i64 100
501; MESA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)*
502; MESA-NEXT: [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
503; MESA-NEXT: store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef
504; MESA-NEXT: ret void
505;
506 store i32 %arg1, i32 addrspace(1)* undef
507 ret void
508}
509
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000510define amdgpu_kernel void @kern_lds_ptr(i32 addrspace(3)* %lds) #0 {
511; HSA-LABEL: @kern_lds_ptr(
512; HSA-NEXT: [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000513; HSA-NEXT: [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_LDS_PTR_KERNARG_SEGMENT]], i64 0
514; HSA-NEXT: [[LDS_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[LDS_KERNARG_OFFSET]] to i32 addrspace(3)* addrspace(4)*
515; HSA-NEXT: [[LDS_LOAD:%.*]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LDS_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000516; HSA-NEXT: store i32 0, i32 addrspace(3)* [[LDS_LOAD]], align 4
517; HSA-NEXT: ret void
518;
519; MESA-LABEL: @kern_lds_ptr(
520; MESA-NEXT: [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000521; MESA-NEXT: [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_LDS_PTR_KERNARG_SEGMENT]], i64 36
522; MESA-NEXT: [[LDS_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[LDS_KERNARG_OFFSET]] to i32 addrspace(3)* addrspace(4)*
523; MESA-NEXT: [[LDS_LOAD:%.*]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LDS_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000524; MESA-NEXT: store i32 0, i32 addrspace(3)* [[LDS_LOAD]], align 4
525; MESA-NEXT: ret void
526;
527 store i32 0, i32 addrspace(3)* %lds, align 4
528 ret void
529}
530
531define amdgpu_kernel void @kern_lds_ptr_si(i32 addrspace(3)* %lds) #2 {
532; HSA-LABEL: @kern_lds_ptr_si(
533; HSA-NEXT: [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000534; HSA-NEXT: store i32 0, i32 addrspace(3)* [[LDS:%.*]], align 4
535; HSA-NEXT: ret void
536;
537; MESA-LABEL: @kern_lds_ptr_si(
538; MESA-NEXT: [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000539; MESA-NEXT: store i32 0, i32 addrspace(3)* [[LDS:%.*]], align 4
540; MESA-NEXT: ret void
541;
542 store i32 0, i32 addrspace(3)* %lds, align 4
543 ret void
544}
545
546define amdgpu_kernel void @kern_realign_i8_i8(i8 %arg0, i8 %arg1) #0 {
547; HSA-LABEL: @kern_realign_i8_i8(
548; HSA-NEXT: [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000549; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 0
550; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
551; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
552; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
553; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 0
554; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
555; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
556; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
557; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
558; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
559; HSA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000560; HSA-NEXT: ret void
561;
562; MESA-LABEL: @kern_realign_i8_i8(
563; MESA-NEXT: [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000564; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 36
565; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
566; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
567; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
568; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 36
569; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
570; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
571; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000572; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000573; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000574; MESA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000575; MESA-NEXT: ret void
576;
577 store volatile i8 %arg0, i8 addrspace(1)* undef
578 store volatile i8 %arg1, i8 addrspace(1)* undef
579 ret void
580}
581
582define amdgpu_kernel void @kern_realign_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2) #0 {
583; HSA-LABEL: @kern_realign_i8_i8_i8(
584; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000585; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0
586; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
587; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
588; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
589; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0
590; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
591; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
592; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
593; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
594; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0
595; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
596; HSA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
597; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000598; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000599; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
600; HSA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000601; HSA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000602; HSA-NEXT: ret void
603;
604; MESA-LABEL: @kern_realign_i8_i8_i8(
605; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000606; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36
607; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
608; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
609; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
610; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36
611; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
612; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
613; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000614; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000615; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36
616; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
617; MESA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
618; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
619; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
620; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000621; MESA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000622; MESA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000623; MESA-NEXT: ret void
624;
625 store volatile i8 %arg0, i8 addrspace(1)* undef
626 store volatile i8 %arg1, i8 addrspace(1)* undef
627 store volatile i8 %arg2, i8 addrspace(1)* undef
628 ret void
629}
630
631define amdgpu_kernel void @kern_realign_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) #0 {
632; HSA-LABEL: @kern_realign_i8_i8_i8_i8(
633; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000634; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
635; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
636; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
637; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
638; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
639; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
640; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
641; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
642; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
643; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
644; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
645; HSA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
646; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000647; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000648; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
649; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
650; HSA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
651; HSA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
652; HSA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8
653; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
654; HSA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000655; HSA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000656; HSA-NEXT: store volatile i8 [[TMP11]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000657; HSA-NEXT: ret void
658;
659; MESA-LABEL: @kern_realign_i8_i8_i8_i8(
660; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000661; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
662; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
663; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
664; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
665; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
666; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
667; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
668; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000669; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000670; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
671; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
672; MESA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
673; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
674; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
675; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
676; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
677; MESA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
678; MESA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
679; MESA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8
680; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000681; MESA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000682; MESA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef
683; MESA-NEXT: store volatile i8 [[TMP11]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000684; MESA-NEXT: ret void
685;
686 store volatile i8 %arg0, i8 addrspace(1)* undef
687 store volatile i8 %arg1, i8 addrspace(1)* undef
688 store volatile i8 %arg2, i8 addrspace(1)* undef
689 store volatile i8 %arg3, i8 addrspace(1)* undef
690 ret void
691}
692
693define amdgpu_kernel void @kern_realign_i8_v3i8(i8 %arg0, <3 x i8> %arg1) #0 {
694; HSA-LABEL: @kern_realign_i8_v3i8(
695; HSA-NEXT: [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000696; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 0
697; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
698; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
699; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
700; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 4
701; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
702; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
703; HSA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i24
704; HSA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i24 [[TMP4]] to <3 x i8>
705; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000706; HSA-NEXT: store volatile <3 x i8> [[ARG1_LOAD]], <3 x i8> addrspace(1)* undef
707; HSA-NEXT: ret void
708;
709; MESA-LABEL: @kern_realign_i8_v3i8(
710; MESA-NEXT: [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000711; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 36
712; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
713; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
714; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
715; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 40
716; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
717; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0
718; MESA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i24
719; MESA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i24 [[TMP4]] to <3 x i8>
720; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000721; MESA-NEXT: store volatile <3 x i8> [[ARG1_LOAD]], <3 x i8> addrspace(1)* undef
722; MESA-NEXT: ret void
723;
724 store volatile i8 %arg0, i8 addrspace(1)* undef
725 store volatile <3 x i8> %arg1, <3 x i8> addrspace(1)* undef
726 ret void
727}
728
729define amdgpu_kernel void @kern_realign_i8_i16(i8 %arg0, i16 %arg1) #0 {
730; HSA-LABEL: @kern_realign_i8_i16(
731; HSA-NEXT: [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000732; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 0
733; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
734; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
735; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
736; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 0
737; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
738; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
739; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
740; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
741; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
742; HSA-NEXT: store volatile i16 [[TMP5]], i16 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000743; HSA-NEXT: ret void
744;
745; MESA-LABEL: @kern_realign_i8_i16(
746; MESA-NEXT: [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000747; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 36
748; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
749; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
750; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
751; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 36
752; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
753; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
754; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
755; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
756; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
757; MESA-NEXT: store volatile i16 [[TMP5]], i16 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000758; MESA-NEXT: ret void
759;
760 store volatile i8 %arg0, i8 addrspace(1)* undef
761 store volatile i16 %arg1, i16 addrspace(1)* undef
762 ret void
763}
764
765define amdgpu_kernel void @kern_realign_i1_i1(i1 %arg0, i1 %arg1) #0 {
766; HSA-LABEL: @kern_realign_i1_i1(
767; HSA-NEXT: [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000768; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 0
769; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
770; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
771; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
772; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 0
773; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
774; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
775; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
776; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
777; HSA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
778; HSA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000779; HSA-NEXT: ret void
780;
781; MESA-LABEL: @kern_realign_i1_i1(
782; MESA-NEXT: [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000783; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 36
784; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
785; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
786; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
787; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 36
788; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
789; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
790; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000791; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000792; MESA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000793; MESA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000794; MESA-NEXT: ret void
795;
796 store volatile i1 %arg0, i1 addrspace(1)* undef
797 store volatile i1 %arg1, i1 addrspace(1)* undef
798 ret void
799}
800
801define amdgpu_kernel void @kern_realign_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2) #0 {
802; HSA-LABEL: @kern_realign_i1_i1_i1(
803; HSA-NEXT: [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000804; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0
805; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
806; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
807; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
808; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0
809; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
810; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
811; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
812; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
813; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0
814; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
815; HSA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
816; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000817; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000818; HSA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
819; HSA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000820; HSA-NEXT: store volatile i1 [[TMP8]], i1 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000821; HSA-NEXT: ret void
822;
823; MESA-LABEL: @kern_realign_i1_i1_i1(
824; MESA-NEXT: [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000825; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36
826; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
827; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
828; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
829; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36
830; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
831; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
832; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000833; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000834; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36
835; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
836; MESA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
837; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
838; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1
839; MESA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000840; MESA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000841; MESA-NEXT: store volatile i1 [[TMP8]], i1 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000842; MESA-NEXT: ret void
843;
844 store volatile i1 %arg0, i1 addrspace(1)* undef
845 store volatile i1 %arg1, i1 addrspace(1)* undef
846 store volatile i1 %arg2, i1 addrspace(1)* undef
847 ret void
848}
849
850define amdgpu_kernel void @kern_realign_i1_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2, i1 %arg3) #0 {
851; HSA-LABEL: @kern_realign_i1_i1_i1_i1(
852; HSA-NEXT: [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000853; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0
854; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
855; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
856; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
857; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0
858; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
859; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
860; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
861; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
862; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0
863; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
864; HSA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
865; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000866; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000867; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0
868; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
869; HSA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
870; HSA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
871; HSA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i1
872; HSA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
873; HSA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000874; HSA-NEXT: store volatile i1 [[TMP8]], i1 addrspace(1)* undef
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000875; HSA-NEXT: store volatile i1 [[TMP11]], i1 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000876; HSA-NEXT: ret void
877;
878; MESA-LABEL: @kern_realign_i1_i1_i1_i1(
879; MESA-NEXT: [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000880; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36
881; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
882; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
883; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
884; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36
885; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
886; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
887; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000888; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000889; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36
890; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
891; MESA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
892; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
893; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1
894; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36
895; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
896; MESA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
897; MESA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
898; MESA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i1
899; MESA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000900; MESA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000901; MESA-NEXT: store volatile i1 [[TMP8]], i1 addrspace(1)* undef
902; MESA-NEXT: store volatile i1 [[TMP11]], i1 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000903; MESA-NEXT: ret void
904;
905 store volatile i1 %arg0, i1 addrspace(1)* undef
906 store volatile i1 %arg1, i1 addrspace(1)* undef
907 store volatile i1 %arg2, i1 addrspace(1)* undef
908 store volatile i1 %arg3, i1 addrspace(1)* undef
909 ret void
910}
911
912define amdgpu_kernel void @kern_realign_i1_v3i1(i1 %arg0, <3 x i1> %arg1) #0 {
913; HSA-LABEL: @kern_realign_i1_v3i1(
914; HSA-NEXT: [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000915; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 0
916; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
917; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
918; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
919; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 4
920; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
921; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
922; HSA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
923; HSA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i3 [[TMP4]] to <3 x i1>
924; HSA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000925; HSA-NEXT: store volatile <3 x i1> [[ARG1_LOAD]], <3 x i1> addrspace(1)* undef
926; HSA-NEXT: ret void
927;
928; MESA-LABEL: @kern_realign_i1_v3i1(
929; MESA-NEXT: [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000930; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 36
931; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
932; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
933; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
934; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 40
935; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
936; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0
937; MESA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
938; MESA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i3 [[TMP4]] to <3 x i1>
939; MESA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000940; MESA-NEXT: store volatile <3 x i1> [[ARG1_LOAD]], <3 x i1> addrspace(1)* undef
941; MESA-NEXT: ret void
942;
943 store volatile i1 %arg0, i1 addrspace(1)* undef
944 store volatile <3 x i1> %arg1, <3 x i1> addrspace(1)* undef
945 ret void
946}
947
948define amdgpu_kernel void @kern_realign_i1_i16(i1 %arg0, i16 %arg1) #0 {
949; HSA-LABEL: @kern_realign_i1_i16(
950; HSA-NEXT: [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000951; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 0
952; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
953; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
954; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
955; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 0
956; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
957; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
958; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
959; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
960; HSA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
961; HSA-NEXT: store volatile i16 [[TMP5]], i16 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000962; HSA-NEXT: ret void
963;
964; MESA-LABEL: @kern_realign_i1_i16(
965; MESA-NEXT: [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000966; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 36
967; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
968; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
969; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
970; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 36
971; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
972; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
973; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
974; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
975; MESA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
976; MESA-NEXT: store volatile i16 [[TMP5]], i16 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000977; MESA-NEXT: ret void
978;
979 store volatile i1 %arg0, i1 addrspace(1)* undef
980 store volatile i16 %arg1, i16 addrspace(1)* undef
981 ret void
982}
983
984define amdgpu_kernel void @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4, i8 %arg5, i8 %arg6, i8 %arg7) #0 {
985; HSA-LABEL: @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(
986; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000987; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
988; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
989; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
990; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
991; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
992; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
993; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
994; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
995; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
996; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
997; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
998; HSA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
999; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001000; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001001; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
1002; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
1003; HSA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
1004; HSA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
1005; HSA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8
1006; HSA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 4
1007; HSA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG5_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
1008; HSA-NEXT: [[TMP12:%.*]] = load i32, i32 addrspace(4)* [[ARG5_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
1009; HSA-NEXT: [[TMP13:%.*]] = lshr i32 [[TMP12]], 8
1010; HSA-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i8
1011; HSA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 4
1012; HSA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG6_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
1013; HSA-NEXT: [[TMP15:%.*]] = load i32, i32 addrspace(4)* [[ARG6_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
1014; HSA-NEXT: [[TMP16:%.*]] = lshr i32 [[TMP15]], 16
1015; HSA-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8
1016; HSA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 4
1017; HSA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG7_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
1018; HSA-NEXT: [[TMP18:%.*]] = load i32, i32 addrspace(4)* [[ARG7_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
1019; HSA-NEXT: [[TMP19:%.*]] = lshr i32 [[TMP18]], 24
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001020; HSA-NEXT: [[TMP20:%.*]] = trunc i32 [[TMP19]] to i8
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001021; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
1022; HSA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001023; HSA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001024; HSA-NEXT: store volatile i8 [[TMP11]], i8 addrspace(1)* undef
1025; HSA-NEXT: store volatile i8 [[TMP14]], i8 addrspace(1)* undef
1026; HSA-NEXT: store volatile i8 [[TMP17]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001027; HSA-NEXT: store volatile i8 [[TMP20]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001028; HSA-NEXT: ret void
1029;
1030; MESA-LABEL: @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(
1031; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001032; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
1033; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
1034; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
1035; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
1036; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
1037; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
1038; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
1039; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001040; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001041; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
1042; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
1043; MESA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
1044; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
1045; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
1046; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
1047; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
1048; MESA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
1049; MESA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
1050; MESA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8
1051; MESA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 40
1052; MESA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG5_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
1053; MESA-NEXT: [[TMP12:%.*]] = load i32, i32 addrspace(4)* [[ARG5_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0
1054; MESA-NEXT: [[TMP13:%.*]] = lshr i32 [[TMP12]], 8
1055; MESA-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i8
1056; MESA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 40
1057; MESA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG6_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
1058; MESA-NEXT: [[TMP15:%.*]] = load i32, i32 addrspace(4)* [[ARG6_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0
1059; MESA-NEXT: [[TMP16:%.*]] = lshr i32 [[TMP15]], 16
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001060; MESA-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001061; MESA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 40
1062; MESA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG7_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
1063; MESA-NEXT: [[TMP18:%.*]] = load i32, i32 addrspace(4)* [[ARG7_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0
1064; MESA-NEXT: [[TMP19:%.*]] = lshr i32 [[TMP18]], 24
1065; MESA-NEXT: [[TMP20:%.*]] = trunc i32 [[TMP19]] to i8
1066; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001067; MESA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001068; MESA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef
1069; MESA-NEXT: store volatile i8 [[TMP11]], i8 addrspace(1)* undef
1070; MESA-NEXT: store volatile i8 [[TMP14]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001071; MESA-NEXT: store volatile i8 [[TMP17]], i8 addrspace(1)* undef
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001072; MESA-NEXT: store volatile i8 [[TMP20]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001073; MESA-NEXT: ret void
1074;
1075 store volatile i8 %arg0, i8 addrspace(1)* undef
1076 store volatile i8 %arg1, i8 addrspace(1)* undef
1077 store volatile i8 %arg2, i8 addrspace(1)* undef
1078 store volatile i8 %arg3, i8 addrspace(1)* undef
1079 store volatile i8 %arg5, i8 addrspace(1)* undef
1080 store volatile i8 %arg6, i8 addrspace(1)* undef
1081 store volatile i8 %arg7, i8 addrspace(1)* undef
1082 ret void
1083}
1084
1085define amdgpu_kernel void @kern_realign_f16_f16(half %arg0, half %arg1) #0 {
1086; HSA-LABEL: @kern_realign_f16_f16(
1087; HSA-NEXT: [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001088; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 0
1089; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
1090; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
1091; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
1092; HSA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
1093; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 0
1094; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
1095; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
1096; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
1097; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
1098; HSA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i16 [[TMP5]] to half
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001099; HSA-NEXT: store volatile half [[ARG0_LOAD]], half addrspace(1)* undef
1100; HSA-NEXT: store volatile half [[ARG1_LOAD]], half addrspace(1)* undef
1101; HSA-NEXT: ret void
1102;
1103; MESA-LABEL: @kern_realign_f16_f16(
1104; MESA-NEXT: [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001105; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 36
1106; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
1107; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
1108; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
1109; MESA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
1110; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 36
1111; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
1112; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
1113; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001114; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001115; MESA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i16 [[TMP5]] to half
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001116; MESA-NEXT: store volatile half [[ARG0_LOAD]], half addrspace(1)* undef
1117; MESA-NEXT: store volatile half [[ARG1_LOAD]], half addrspace(1)* undef
1118; MESA-NEXT: ret void
1119;
1120 store volatile half %arg0, half addrspace(1)* undef
1121 store volatile half %arg1, half addrspace(1)* undef
1122 ret void
1123}
1124
1125define amdgpu_kernel void @kern_global_ptr(i8 addrspace(1)* %ptr) #0 {
1126; HSA-LABEL: @kern_global_ptr(
1127; HSA-NEXT: [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001128; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
1129; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
1130; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001131; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
1132; HSA-NEXT: ret void
1133;
1134; MESA-LABEL: @kern_global_ptr(
1135; MESA-NEXT: [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001136; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
1137; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
1138; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001139; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
1140; MESA-NEXT: ret void
1141;
1142 store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef
1143 ret void
1144}
1145
1146define amdgpu_kernel void @kern_global_ptr_dereferencable(i8 addrspace(1)* dereferenceable(42) %ptr) #0 {
1147; HSA-LABEL: @kern_global_ptr_dereferencable(
1148; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001149; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 0
1150; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
1151; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !dereferenceable !1
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001152; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
1153; HSA-NEXT: ret void
1154;
1155; MESA-LABEL: @kern_global_ptr_dereferencable(
1156; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001157; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 36
1158; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
Matt Arsenault72b0e382018-07-28 12:34:25 +00001159; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !dereferenceable !1
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001160; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
1161; MESA-NEXT: ret void
1162;
1163 store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef
1164 ret void
1165}
1166
1167define amdgpu_kernel void @kern_global_ptr_dereferencable_or_null(i8 addrspace(1)* dereferenceable_or_null(128) %ptr) #0 {
1168; HSA-LABEL: @kern_global_ptr_dereferencable_or_null(
1169; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001170; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 0
1171; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
1172; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !dereferenceable_or_null !2
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001173; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
1174; HSA-NEXT: ret void
1175;
1176; MESA-LABEL: @kern_global_ptr_dereferencable_or_null(
1177; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001178; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 36
1179; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
Matt Arsenault72b0e382018-07-28 12:34:25 +00001180; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !dereferenceable_or_null !2
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001181; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
1182; MESA-NEXT: ret void
1183;
1184 store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef
1185 ret void
1186}
1187
1188define amdgpu_kernel void @kern_nonnull_global_ptr(i8 addrspace(1)* nonnull %ptr) #0 {
1189; HSA-LABEL: @kern_nonnull_global_ptr(
1190; HSA-NEXT: [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001191; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
1192; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
1193; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !nonnull !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001194; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
1195; HSA-NEXT: ret void
1196;
1197; MESA-LABEL: @kern_nonnull_global_ptr(
1198; MESA-NEXT: [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001199; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
1200; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
1201; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !nonnull !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001202; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
1203; MESA-NEXT: ret void
1204;
1205 store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef
1206 ret void
1207}
1208
1209define amdgpu_kernel void @kern_align32_global_ptr(i8 addrspace(1)* align 1024 %ptr) #0 {
1210; HSA-LABEL: @kern_align32_global_ptr(
1211; HSA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001212; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
1213; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
1214; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !align !3
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001215; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
1216; HSA-NEXT: ret void
1217;
1218; MESA-LABEL: @kern_align32_global_ptr(
1219; MESA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001220; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
1221; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
Matt Arsenault72b0e382018-07-28 12:34:25 +00001222; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !align !3
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001223; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
1224; MESA-NEXT: ret void
1225;
1226 store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef
1227 ret void
1228}
1229
1230define amdgpu_kernel void @kern_noalias_global_ptr(i8 addrspace(1)* noalias %ptr) #0 {
1231; HSA-LABEL: @kern_noalias_global_ptr(
1232; HSA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001233; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR:%.*]], i8 addrspace(1)* addrspace(1)* undef
1234; HSA-NEXT: ret void
1235;
1236; MESA-LABEL: @kern_noalias_global_ptr(
1237; MESA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001238; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR:%.*]], i8 addrspace(1)* addrspace(1)* undef
1239; MESA-NEXT: ret void
1240;
1241 store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef
1242 ret void
1243}
1244
1245define amdgpu_kernel void @kern_noalias_global_ptr_x2(i8 addrspace(1)* noalias %ptr0, i8 addrspace(1)* noalias %ptr1) #0 {
1246; HSA-LABEL: @kern_noalias_global_ptr_x2(
1247; HSA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_X2_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(16) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001248; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR0:%.*]], i8 addrspace(1)* addrspace(1)* undef
1249; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR1:%.*]], i8 addrspace(1)* addrspace(1)* undef
1250; HSA-NEXT: ret void
1251;
1252; MESA-LABEL: @kern_noalias_global_ptr_x2(
1253; MESA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_X2_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(52) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001254; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR0:%.*]], i8 addrspace(1)* addrspace(1)* undef
1255; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR1:%.*]], i8 addrspace(1)* addrspace(1)* undef
1256; MESA-NEXT: ret void
1257;
1258 store volatile i8 addrspace(1)* %ptr0, i8 addrspace(1)* addrspace(1)* undef
1259 store volatile i8 addrspace(1)* %ptr1, i8 addrspace(1)* addrspace(1)* undef
1260 ret void
1261}
1262
Matt Arsenault513e0c02018-06-28 10:18:11 +00001263define amdgpu_kernel void @struct_i8_i8_arg({i8, i8} %in) #0 {
1264; HSA-LABEL: @struct_i8_i8_arg(
1265; HSA-NEXT: entry:
1266; HSA-NEXT: [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001267; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT]], i64 0
1268; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i8 } addrspace(4)*
1269; HSA-NEXT: [[IN_LOAD:%.*]] = load { i8, i8 }, { i8, i8 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
Matt Arsenault513e0c02018-06-28 10:18:11 +00001270; HSA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 0
1271; HSA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 1
1272; HSA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4
1273; HSA-NEXT: store volatile i8 [[ELT1]], i8 addrspace(1)* null, align 4
1274; HSA-NEXT: ret void
1275;
1276; MESA-LABEL: @struct_i8_i8_arg(
1277; MESA-NEXT: entry:
1278; MESA-NEXT: [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001279; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT]], i64 36
1280; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i8 } addrspace(4)*
1281; MESA-NEXT: [[IN_LOAD:%.*]] = load { i8, i8 }, { i8, i8 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
Matt Arsenault513e0c02018-06-28 10:18:11 +00001282; MESA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 0
1283; MESA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 1
1284; MESA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4
1285; MESA-NEXT: store volatile i8 [[ELT1]], i8 addrspace(1)* null, align 4
1286; MESA-NEXT: ret void
1287;
1288entry:
1289 %elt0 = extractvalue {i8, i8} %in, 0
1290 %elt1 = extractvalue {i8, i8} %in, 1
1291 store volatile i8 %elt0, i8 addrspace(1)* null, align 4
1292 store volatile i8 %elt1, i8 addrspace(1)* null, align 4
1293 ret void
1294}
1295
1296define amdgpu_kernel void @struct_i8_i16_arg({i8, i16} %in) #0 {
1297; HSA-LABEL: @struct_i8_i16_arg(
1298; HSA-NEXT: entry:
1299; HSA-NEXT: [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001300; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT]], i64 0
1301; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i16 } addrspace(4)*
1302; HSA-NEXT: [[IN_LOAD:%.*]] = load { i8, i16 }, { i8, i16 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
Matt Arsenault513e0c02018-06-28 10:18:11 +00001303; HSA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 0
1304; HSA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 1
1305; HSA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4
1306; HSA-NEXT: store volatile i16 [[ELT1]], i16 addrspace(1)* null, align 4
1307; HSA-NEXT: ret void
1308;
1309; MESA-LABEL: @struct_i8_i16_arg(
1310; MESA-NEXT: entry:
1311; MESA-NEXT: [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001312; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT]], i64 36
1313; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i16 } addrspace(4)*
1314; MESA-NEXT: [[IN_LOAD:%.*]] = load { i8, i16 }, { i8, i16 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
Matt Arsenault513e0c02018-06-28 10:18:11 +00001315; MESA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 0
1316; MESA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 1
1317; MESA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4
1318; MESA-NEXT: store volatile i16 [[ELT1]], i16 addrspace(1)* null, align 4
1319; MESA-NEXT: ret void
1320;
1321entry:
1322 %elt0 = extractvalue {i8, i16} %in, 0
1323 %elt1 = extractvalue {i8, i16} %in, 1
1324 store volatile i8 %elt0, i8 addrspace(1)* null, align 4
1325 store volatile i16 %elt1, i16 addrspace(1)* null, align 4
1326 ret void
1327}
1328
1329define amdgpu_kernel void @array_2xi8_arg([2 x i8] %in) #0 {
1330; HSA-LABEL: @array_2xi8_arg(
1331; HSA-NEXT: entry:
1332; HSA-NEXT: [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001333; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI8_ARG_KERNARG_SEGMENT]], i64 0
1334; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i8] addrspace(4)*
1335; HSA-NEXT: [[IN_LOAD:%.*]] = load [2 x i8], [2 x i8] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
Matt Arsenault513e0c02018-06-28 10:18:11 +00001336; HSA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 0
1337; HSA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 1
1338; HSA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4
1339; HSA-NEXT: store volatile i8 [[ELT1]], i8 addrspace(1)* null, align 4
1340; HSA-NEXT: ret void
1341;
1342; MESA-LABEL: @array_2xi8_arg(
1343; MESA-NEXT: entry:
1344; MESA-NEXT: [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001345; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI8_ARG_KERNARG_SEGMENT]], i64 36
1346; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i8] addrspace(4)*
1347; MESA-NEXT: [[IN_LOAD:%.*]] = load [2 x i8], [2 x i8] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
Matt Arsenault513e0c02018-06-28 10:18:11 +00001348; MESA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 0
1349; MESA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 1
1350; MESA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4
1351; MESA-NEXT: store volatile i8 [[ELT1]], i8 addrspace(1)* null, align 4
1352; MESA-NEXT: ret void
1353;
1354entry:
1355 %elt0 = extractvalue [2 x i8] %in, 0
1356 %elt1 = extractvalue [2 x i8] %in, 1
1357 store volatile i8 %elt0, i8 addrspace(1)* null, align 4
1358 store volatile i8 %elt1, i8 addrspace(1)* null, align 4
1359 ret void
1360}
1361
1362define amdgpu_kernel void @array_2xi1_arg([2 x i1] %in) #0 {
1363; HSA-LABEL: @array_2xi1_arg(
1364; HSA-NEXT: entry:
1365; HSA-NEXT: [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001366; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI1_ARG_KERNARG_SEGMENT]], i64 0
1367; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i1] addrspace(4)*
1368; HSA-NEXT: [[IN_LOAD:%.*]] = load [2 x i1], [2 x i1] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
Matt Arsenault513e0c02018-06-28 10:18:11 +00001369; HSA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 0
1370; HSA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 1
1371; HSA-NEXT: store volatile i1 [[ELT0]], i1 addrspace(1)* null, align 4
1372; HSA-NEXT: store volatile i1 [[ELT1]], i1 addrspace(1)* null, align 4
1373; HSA-NEXT: ret void
1374;
1375; MESA-LABEL: @array_2xi1_arg(
1376; MESA-NEXT: entry:
1377; MESA-NEXT: [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001378; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI1_ARG_KERNARG_SEGMENT]], i64 36
1379; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i1] addrspace(4)*
1380; MESA-NEXT: [[IN_LOAD:%.*]] = load [2 x i1], [2 x i1] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
Matt Arsenault513e0c02018-06-28 10:18:11 +00001381; MESA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 0
1382; MESA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 1
1383; MESA-NEXT: store volatile i1 [[ELT0]], i1 addrspace(1)* null, align 4
1384; MESA-NEXT: store volatile i1 [[ELT1]], i1 addrspace(1)* null, align 4
1385; MESA-NEXT: ret void
1386;
1387entry:
1388 %elt0 = extractvalue [2 x i1] %in, 0
1389 %elt1 = extractvalue [2 x i1] %in, 1
1390 store volatile i1 %elt0, i1 addrspace(1)* null, align 4
1391 store volatile i1 %elt1, i1 addrspace(1)* null, align 4
1392 ret void
1393}
1394
1395define amdgpu_kernel void @only_empty_struct({} %empty) #0 {
1396; HSA-LABEL: @only_empty_struct(
1397; HSA-NEXT: ret void
1398;
1399; MESA-LABEL: @only_empty_struct(
1400; MESA-NEXT: [[ONLY_EMPTY_STRUCT_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(36) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenault513e0c02018-06-28 10:18:11 +00001401; MESA-NEXT: ret void
1402;
1403 ret void
1404}
1405
1406define amdgpu_kernel void @empty_struct_with_other({} %empty, i32 %arg1) #0 {
1407; HSA-LABEL: @empty_struct_with_other(
1408; HSA-NEXT: [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001409; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT]], i64 0
1410; HSA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)*
1411; HSA-NEXT: [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
Matt Arsenault513e0c02018-06-28 10:18:11 +00001412; HSA-NEXT: store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef
1413; HSA-NEXT: ret void
1414;
1415; MESA-LABEL: @empty_struct_with_other(
1416; MESA-NEXT: [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001417; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT]], i64 36
1418; MESA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)*
1419; MESA-NEXT: [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
Matt Arsenault513e0c02018-06-28 10:18:11 +00001420; MESA-NEXT: store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef
1421; MESA-NEXT: ret void
1422;
1423 store i32 %arg1, i32 addrspace(1)* undef
1424 ret void
1425}
1426
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001427attributes #0 = { nounwind "target-cpu"="kaveri" }
1428attributes #1 = { nounwind "target-cpu"="kaveri" "amdgpu-implicitarg-num-bytes"="40" }
1429attributes #2 = { nounwind "target-cpu"="tahiti" }
1430
Matt Arsenault72b0e382018-07-28 12:34:25 +00001431; GCN: 0 = !{}
1432; GCN: !1 = !{i64 42}
1433; GCN: !2 = !{i64 128}
1434; GCN: !3 = !{i64 1024}