blob: 630aa4a96bfb386cebf71d1cf78689f1a45ca061 [file] [log] [blame]
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; FIXME: Manually added checks for metadata nodes at bottom
3; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -o - -amdgpu-lower-kernel-arguments %s | FileCheck -check-prefix=HSA %s
4; RUN: opt -mtriple=amdgcn-- -S -o - -amdgpu-lower-kernel-arguments %s | FileCheck -check-prefix=MESA %s
5
6define amdgpu_kernel void @kern_noargs() {
7; HSA-LABEL: @kern_noargs(
8; HSA-NEXT: ret void
9;
10; MESA-LABEL: @kern_noargs(
11; MESA-NEXT: ret void
12;
13 ret void
14}
15
16define amdgpu_kernel void @kern_i8(i8 %arg) #0 {
17; HSA-LABEL: @kern_i8(
18; HSA-NEXT: [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +000019; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_KERNARG_SEGMENT]], i64 0
20; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
21; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
22; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
23; HSA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1
Matt Arsenault8c4a3522018-06-26 19:10:00 +000024; HSA-NEXT: ret void
25;
26; MESA-LABEL: @kern_i8(
27; MESA-NEXT: [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +000028; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_KERNARG_SEGMENT]], i64 36
29; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
30; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
31; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
32; MESA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1
Matt Arsenault8c4a3522018-06-26 19:10:00 +000033; MESA-NEXT: ret void
34;
35 store i8 %arg, i8 addrspace(1)* undef, align 1
36 ret void
37}
38
39define amdgpu_kernel void @kern_i16(i16 %arg) #0 {
40; HSA-LABEL: @kern_i16(
41; HSA-NEXT: [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +000042; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I16_KERNARG_SEGMENT]], i64 0
43; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
44; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
45; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
46; HSA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1
Matt Arsenault8c4a3522018-06-26 19:10:00 +000047; HSA-NEXT: ret void
48;
49; MESA-LABEL: @kern_i16(
50; MESA-NEXT: [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +000051; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I16_KERNARG_SEGMENT]], i64 36
52; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
53; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
54; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
55; MESA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1
Matt Arsenault8c4a3522018-06-26 19:10:00 +000056; MESA-NEXT: ret void
57;
58 store i16 %arg, i16 addrspace(1)* undef, align 1
59 ret void
60}
61
62define amdgpu_kernel void @kern_f16(half %arg) #0 {
63; HSA-LABEL: @kern_f16(
64; HSA-NEXT: [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +000065; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F16_KERNARG_SEGMENT]], i64 0
66; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
67; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
68; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
69; HSA-NEXT: [[ARG_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
Matt Arsenault8c4a3522018-06-26 19:10:00 +000070; HSA-NEXT: store half [[ARG_LOAD]], half addrspace(1)* undef, align 1
71; HSA-NEXT: ret void
72;
73; MESA-LABEL: @kern_f16(
74; MESA-NEXT: [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +000075; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F16_KERNARG_SEGMENT]], i64 36
76; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
77; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
78; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
79; MESA-NEXT: [[ARG_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
Matt Arsenault8c4a3522018-06-26 19:10:00 +000080; MESA-NEXT: store half [[ARG_LOAD]], half addrspace(1)* undef, align 1
81; MESA-NEXT: ret void
82;
83 store half %arg, half addrspace(1)* undef, align 1
84 ret void
85}
86
87define amdgpu_kernel void @kern_zeroext_i8(i8 zeroext %arg) #0 {
88; HSA-LABEL: @kern_zeroext_i8(
89; HSA-NEXT: [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +000090; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I8_KERNARG_SEGMENT]], i64 0
91; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
92; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
93; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
94; HSA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1
Matt Arsenault8c4a3522018-06-26 19:10:00 +000095; HSA-NEXT: ret void
96;
97; MESA-LABEL: @kern_zeroext_i8(
98; MESA-NEXT: [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +000099; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I8_KERNARG_SEGMENT]], i64 36
100; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
Matt Arsenault72b0e382018-07-28 12:34:25 +0000101; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000102; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
103; MESA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000104; MESA-NEXT: ret void
105;
106 store i8 %arg, i8 addrspace(1)* undef, align 1
107 ret void
108}
109
110define amdgpu_kernel void @kern_zeroext_i16(i16 zeroext %arg) #0 {
111; HSA-LABEL: @kern_zeroext_i16(
112; HSA-NEXT: [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000113; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I16_KERNARG_SEGMENT]], i64 0
114; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
115; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
116; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
117; HSA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000118; HSA-NEXT: ret void
119;
120; MESA-LABEL: @kern_zeroext_i16(
121; MESA-NEXT: [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000122; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I16_KERNARG_SEGMENT]], i64 36
123; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
Matt Arsenault72b0e382018-07-28 12:34:25 +0000124; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000125; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
126; MESA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000127; MESA-NEXT: ret void
128;
129 store i16 %arg, i16 addrspace(1)* undef, align 1
130 ret void
131}
132
133define amdgpu_kernel void @kern_signext_i8(i8 signext %arg) #0 {
134; HSA-LABEL: @kern_signext_i8(
135; HSA-NEXT: [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000136; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I8_KERNARG_SEGMENT]], i64 0
137; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
138; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
139; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
140; HSA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000141; HSA-NEXT: ret void
142;
143; MESA-LABEL: @kern_signext_i8(
144; MESA-NEXT: [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000145; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I8_KERNARG_SEGMENT]], i64 36
146; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
Matt Arsenault72b0e382018-07-28 12:34:25 +0000147; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000148; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
149; MESA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000150; MESA-NEXT: ret void
151;
152 store i8 %arg, i8 addrspace(1)* undef, align 1
153 ret void
154}
155
156define amdgpu_kernel void @kern_signext_i16(i16 signext %arg) #0 {
157; HSA-LABEL: @kern_signext_i16(
158; HSA-NEXT: [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000159; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I16_KERNARG_SEGMENT]], i64 0
160; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
161; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
162; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
163; HSA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000164; HSA-NEXT: ret void
165;
166; MESA-LABEL: @kern_signext_i16(
167; MESA-NEXT: [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000168; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I16_KERNARG_SEGMENT]], i64 36
169; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
Matt Arsenault72b0e382018-07-28 12:34:25 +0000170; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000171; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
172; MESA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000173; MESA-NEXT: ret void
174;
175 store i16 %arg, i16 addrspace(1)* undef, align 1
176 ret void
177}
178
179define amdgpu_kernel void @kern_i8_i8(i8 %arg0, i8 %arg1) {
180; HSA-LABEL: @kern_i8_i8(
181; HSA-NEXT: [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000182; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_I8_KERNARG_SEGMENT]], i64 0
183; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
184; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
185; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
186; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_I8_KERNARG_SEGMENT]], i64 0
187; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
188; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
189; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
190; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
191; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef, align 1
192; HSA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef, align 1
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000193; HSA-NEXT: ret void
194;
195; MESA-LABEL: @kern_i8_i8(
196; MESA-NEXT: [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000197; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_I8_KERNARG_SEGMENT]], i64 36
198; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
199; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
200; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
201; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_I8_KERNARG_SEGMENT]], i64 36
202; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
203; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
204; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000205; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000206; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef, align 1
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000207; MESA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef, align 1
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000208; MESA-NEXT: ret void
209;
210 store volatile i8 %arg0, i8 addrspace(1)* undef, align 1
211 store volatile i8 %arg1, i8 addrspace(1)* undef, align 1
212 ret void
213}
214
215define amdgpu_kernel void @kern_v3i8(<3 x i8> %arg) {
216; HSA-LABEL: @kern_v3i8(
217; HSA-NEXT: [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000218; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I8_KERNARG_SEGMENT]], i64 0
219; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
220; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
221; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24
222; HSA-NEXT: [[ARG_LOAD:%.*]] = bitcast i24 [[TMP2]] to <3 x i8>
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000223; HSA-NEXT: store <3 x i8> [[ARG_LOAD]], <3 x i8> addrspace(1)* undef, align 4
224; HSA-NEXT: ret void
225;
226; MESA-LABEL: @kern_v3i8(
227; MESA-NEXT: [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000228; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I8_KERNARG_SEGMENT]], i64 36
229; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
230; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
231; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24
232; MESA-NEXT: [[ARG_LOAD:%.*]] = bitcast i24 [[TMP2]] to <3 x i8>
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000233; MESA-NEXT: store <3 x i8> [[ARG_LOAD]], <3 x i8> addrspace(1)* undef, align 4
234; MESA-NEXT: ret void
235;
236 store <3 x i8> %arg, <3 x i8> addrspace(1)* undef, align 4
237 ret void
238}
239
240define amdgpu_kernel void @kern_i24(i24 %arg0) {
241; HSA-LABEL: @kern_i24(
242; HSA-NEXT: [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000243; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I24_KERNARG_SEGMENT]], i64 0
244; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
245; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
246; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24
247; HSA-NEXT: store i24 [[TMP2]], i24 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000248; HSA-NEXT: ret void
249;
250; MESA-LABEL: @kern_i24(
251; MESA-NEXT: [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000252; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I24_KERNARG_SEGMENT]], i64 36
253; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
254; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
255; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24
256; MESA-NEXT: store i24 [[TMP2]], i24 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000257; MESA-NEXT: ret void
258;
259 store i24 %arg0, i24 addrspace(1)* undef
260 ret void
261}
262
263define amdgpu_kernel void @kern_i32(i32 %arg0) {
264; HSA-LABEL: @kern_i32(
265; HSA-NEXT: [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000266; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_KERNARG_SEGMENT]], i64 0
267; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
268; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000269; HSA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef
270; HSA-NEXT: ret void
271;
272; MESA-LABEL: @kern_i32(
273; MESA-NEXT: [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000274; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_KERNARG_SEGMENT]], i64 36
275; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
276; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000277; MESA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef
278; MESA-NEXT: ret void
279;
280 store i32 %arg0, i32 addrspace(1)* undef
281 ret void
282}
283
284define amdgpu_kernel void @kern_f32(float %arg0) {
285; HSA-LABEL: @kern_f32(
286; HSA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000287; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F32_KERNARG_SEGMENT]], i64 0
288; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to float addrspace(4)*
289; HSA-NEXT: [[ARG0_LOAD:%.*]] = load float, float addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000290; HSA-NEXT: store float [[ARG0_LOAD]], float addrspace(1)* undef
291; HSA-NEXT: ret void
292;
293; MESA-LABEL: @kern_f32(
294; MESA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000295; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F32_KERNARG_SEGMENT]], i64 36
296; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to float addrspace(4)*
297; MESA-NEXT: [[ARG0_LOAD:%.*]] = load float, float addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000298; MESA-NEXT: store float [[ARG0_LOAD]], float addrspace(1)* undef
299; MESA-NEXT: ret void
300;
301 store float %arg0, float addrspace(1)* undef
302 ret void
303}
304
305define amdgpu_kernel void @kern_v3i32(<3 x i32> %arg0) {
306; HSA-LABEL: @kern_v3i32(
307; HSA-NEXT: [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(16) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000308; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I32_KERNARG_SEGMENT]], i64 0
309; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to <3 x i32> addrspace(4)*
310; HSA-NEXT: [[TMP1:%.*]] = bitcast <3 x i32> addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]] to <4 x i32> addrspace(4)*
311; HSA-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[TMP1]], align 16, !invariant.load !0
312; HSA-NEXT: [[ARG0_LOAD:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000313; HSA-NEXT: store <3 x i32> [[ARG0_LOAD]], <3 x i32> addrspace(1)* undef, align 4
314; HSA-NEXT: ret void
315;
316; MESA-LABEL: @kern_v3i32(
317; MESA-NEXT: [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(52) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000318; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I32_KERNARG_SEGMENT]], i64 36
319; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to <3 x i32> addrspace(4)*
320; MESA-NEXT: [[TMP1:%.*]] = bitcast <3 x i32> addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]] to <4 x i32> addrspace(4)*
321; MESA-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[TMP1]], align 4, !invariant.load !0
322; MESA-NEXT: [[ARG0_LOAD:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000323; MESA-NEXT: store <3 x i32> [[ARG0_LOAD]], <3 x i32> addrspace(1)* undef, align 4
324; MESA-NEXT: ret void
325;
326 store <3 x i32> %arg0, <3 x i32> addrspace(1)* undef, align 4
327 ret void
328}
329
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000330define amdgpu_kernel void @kern_v8i32(<8 x i32> %arg) #0 {
331; HSA-LABEL: @kern_v8i32(
332; HSA-NEXT: [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(32) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
333; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I32_KERNARG_SEGMENT]], i64 0
334; HSA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i32> addrspace(4)*
335; HSA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i32>, <8 x i32> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
336; HSA-NEXT: store <8 x i32> [[ARG_LOAD]], <8 x i32> addrspace(1)* undef
337; HSA-NEXT: ret void
338;
339; MESA-LABEL: @kern_v8i32(
340; MESA-NEXT: [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
341; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I32_KERNARG_SEGMENT]], i64 36
342; MESA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i32> addrspace(4)*
343; MESA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i32>, <8 x i32> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
344; MESA-NEXT: store <8 x i32> [[ARG_LOAD]], <8 x i32> addrspace(1)* undef
345; MESA-NEXT: ret void
346;
347 store <8 x i32> %arg, <8 x i32> addrspace(1)* undef
348 ret void
349}
350
351define amdgpu_kernel void @kern_v8i64(<8 x i64> %arg) #0 {
352; HSA-LABEL: @kern_v8i64(
353; HSA-NEXT: [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
354; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I64_KERNARG_SEGMENT]], i64 0
355; HSA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i64> addrspace(4)*
356; HSA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i64>, <8 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
357; HSA-NEXT: store <8 x i64> [[ARG_LOAD]], <8 x i64> addrspace(1)* undef
358; HSA-NEXT: ret void
359;
360; MESA-LABEL: @kern_v8i64(
361; MESA-NEXT: [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(100) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
362; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I64_KERNARG_SEGMENT]], i64 36
363; MESA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i64> addrspace(4)*
364; MESA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i64>, <8 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
365; MESA-NEXT: store <8 x i64> [[ARG_LOAD]], <8 x i64> addrspace(1)* undef
366; MESA-NEXT: ret void
367;
368 store <8 x i64> %arg, <8 x i64> addrspace(1)* undef
369 ret void
370}
371
372define amdgpu_kernel void @kern_v16i64(<16 x i64> %arg) #0 {
373; HSA-LABEL: @kern_v16i64(
374; HSA-NEXT: [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(128) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
375; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V16I64_KERNARG_SEGMENT]], i64 0
376; HSA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <16 x i64> addrspace(4)*
377; HSA-NEXT: [[ARG_LOAD:%.*]] = load <16 x i64>, <16 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
378; HSA-NEXT: store <16 x i64> [[ARG_LOAD]], <16 x i64> addrspace(1)* undef
379; HSA-NEXT: ret void
380;
381; MESA-LABEL: @kern_v16i64(
382; MESA-NEXT: [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(164) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
383; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V16I64_KERNARG_SEGMENT]], i64 36
384; MESA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <16 x i64> addrspace(4)*
385; MESA-NEXT: [[ARG_LOAD:%.*]] = load <16 x i64>, <16 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
386; MESA-NEXT: store <16 x i64> [[ARG_LOAD]], <16 x i64> addrspace(1)* undef
387; MESA-NEXT: ret void
388;
389 store <16 x i64> %arg, <16 x i64> addrspace(1)* undef
390 ret void
391}
392
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000393define amdgpu_kernel void @kern_i32_v3i32(i32 %arg0, <3 x i32> %arg1) {
394; HSA-LABEL: @kern_i32_v3i32(
395; HSA-NEXT: [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(32) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000396; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 0
397; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
398; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
399; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 16
400; HSA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to <3 x i32> addrspace(4)*
401; HSA-NEXT: [[TMP1:%.*]] = bitcast <3 x i32> addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]] to <4 x i32> addrspace(4)*
402; HSA-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[TMP1]], align 16, !invariant.load !0
403; HSA-NEXT: [[ARG1_LOAD:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000404; HSA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef
405; HSA-NEXT: store <3 x i32> [[ARG1_LOAD]], <3 x i32> addrspace(1)* undef, align 4
406; HSA-NEXT: ret void
407;
408; MESA-LABEL: @kern_i32_v3i32(
409; MESA-NEXT: [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000410; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 36
411; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
412; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
413; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 52
414; MESA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to <3 x i32> addrspace(4)*
415; MESA-NEXT: [[TMP1:%.*]] = bitcast <3 x i32> addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]] to <4 x i32> addrspace(4)*
416; MESA-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[TMP1]], align 4, !invariant.load !0
417; MESA-NEXT: [[ARG1_LOAD:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000418; MESA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef
419; MESA-NEXT: store <3 x i32> [[ARG1_LOAD]], <3 x i32> addrspace(1)* undef, align 4
420; MESA-NEXT: ret void
421;
422 store i32 %arg0, i32 addrspace(1)* undef
423 store <3 x i32> %arg1, <3 x i32> addrspace(1)* undef, align 4
424 ret void
425}
426
427%struct.a = type { i32, i8, [4 x i8] }
428%struct.b.packed = type { i8, i32, [3 x i16], <2 x double> }
429
430define amdgpu_kernel void @kern_struct_a(%struct.a %arg0) {
431; HSA-LABEL: @kern_struct_a(
432; HSA-NEXT: [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(12) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000433; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_A_KERNARG_SEGMENT]], i64 0
434; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_A:%.*]] addrspace(4)*
435; HSA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_A]], [[STRUCT_A]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000436; HSA-NEXT: store [[STRUCT_A]] %arg0.load, [[STRUCT_A]] addrspace(1)* undef
437; HSA-NEXT: ret void
438;
439; MESA-LABEL: @kern_struct_a(
440; MESA-NEXT: [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(48) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000441; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_A_KERNARG_SEGMENT]], i64 36
442; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_A:%.*]] addrspace(4)*
443; MESA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_A]], [[STRUCT_A]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000444; MESA-NEXT: store [[STRUCT_A]] %arg0.load, [[STRUCT_A]] addrspace(1)* undef
445; MESA-NEXT: ret void
446;
447 store %struct.a %arg0, %struct.a addrspace(1)* undef
448 ret void
449}
450
451define amdgpu_kernel void @kern_struct_b_packed(%struct.b.packed %arg0) #0 {
452; HSA-LABEL: @kern_struct_b_packed(
453; HSA-NEXT: [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(32) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000454; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT]], i64 0
455; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_B_PACKED:%.*]] addrspace(4)*
456; HSA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED]], [[STRUCT_B_PACKED]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000457; HSA-NEXT: store [[STRUCT_B_PACKED]] %arg0.load, [[STRUCT_B_PACKED]] addrspace(1)* undef
458; HSA-NEXT: ret void
459;
460; MESA-LABEL: @kern_struct_b_packed(
461; MESA-NEXT: [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000462; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT]], i64 36
463; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_B_PACKED:%.*]] addrspace(4)*
464; MESA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED]], [[STRUCT_B_PACKED]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000465; MESA-NEXT: store [[STRUCT_B_PACKED]] %arg0.load, [[STRUCT_B_PACKED]] addrspace(1)* undef
466; MESA-NEXT: ret void
467;
468 store %struct.b.packed %arg0, %struct.b.packed addrspace(1)* undef
469 ret void
470}
471
472define amdgpu_kernel void @kern_implicit_arg_num_bytes(i32 %arg0) #1 {
473; HSA-LABEL: @kern_implicit_arg_num_bytes(
474; HSA-NEXT: [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(48) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000475; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT]], i64 0
476; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
477; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000478; HSA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef
479; HSA-NEXT: ret void
480;
481; MESA-LABEL: @kern_implicit_arg_num_bytes(
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000482; MESA-NEXT: [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
483; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT]], i64 36
484; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
485; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000486; MESA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef
487; MESA-NEXT: ret void
488;
489 store i32 %arg0, i32 addrspace(1)* undef
490 ret void
491}
492
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000493define amdgpu_kernel void @kernel_implicitarg_no_struct_align(<16 x i32>, i32 %arg1) #1 {
494; HSA-LABEL: @kernel_implicitarg_no_struct_align(
495; HSA-NEXT: [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(112) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
496; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT]], i64 64
497; HSA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)*
498; HSA-NEXT: [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
499; HSA-NEXT: store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef
500; HSA-NEXT: ret void
501;
502; MESA-LABEL: @kernel_implicitarg_no_struct_align(
503; MESA-NEXT: [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(108) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
504; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT]], i64 100
505; MESA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)*
506; MESA-NEXT: [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
507; MESA-NEXT: store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef
508; MESA-NEXT: ret void
509;
510 store i32 %arg1, i32 addrspace(1)* undef
511 ret void
512}
513
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000514define amdgpu_kernel void @kern_lds_ptr(i32 addrspace(3)* %lds) #0 {
515; HSA-LABEL: @kern_lds_ptr(
516; HSA-NEXT: [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000517; HSA-NEXT: [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_LDS_PTR_KERNARG_SEGMENT]], i64 0
518; HSA-NEXT: [[LDS_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[LDS_KERNARG_OFFSET]] to i32 addrspace(3)* addrspace(4)*
519; HSA-NEXT: [[LDS_LOAD:%.*]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LDS_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000520; HSA-NEXT: store i32 0, i32 addrspace(3)* [[LDS_LOAD]], align 4
521; HSA-NEXT: ret void
522;
523; MESA-LABEL: @kern_lds_ptr(
524; MESA-NEXT: [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000525; MESA-NEXT: [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_LDS_PTR_KERNARG_SEGMENT]], i64 36
526; MESA-NEXT: [[LDS_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[LDS_KERNARG_OFFSET]] to i32 addrspace(3)* addrspace(4)*
527; MESA-NEXT: [[LDS_LOAD:%.*]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LDS_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000528; MESA-NEXT: store i32 0, i32 addrspace(3)* [[LDS_LOAD]], align 4
529; MESA-NEXT: ret void
530;
531 store i32 0, i32 addrspace(3)* %lds, align 4
532 ret void
533}
534
535define amdgpu_kernel void @kern_lds_ptr_si(i32 addrspace(3)* %lds) #2 {
536; HSA-LABEL: @kern_lds_ptr_si(
537; HSA-NEXT: [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000538; HSA-NEXT: store i32 0, i32 addrspace(3)* [[LDS:%.*]], align 4
539; HSA-NEXT: ret void
540;
541; MESA-LABEL: @kern_lds_ptr_si(
542; MESA-NEXT: [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000543; MESA-NEXT: store i32 0, i32 addrspace(3)* [[LDS:%.*]], align 4
544; MESA-NEXT: ret void
545;
546 store i32 0, i32 addrspace(3)* %lds, align 4
547 ret void
548}
549
550define amdgpu_kernel void @kern_realign_i8_i8(i8 %arg0, i8 %arg1) #0 {
551; HSA-LABEL: @kern_realign_i8_i8(
552; HSA-NEXT: [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000553; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 0
554; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
555; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
556; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
557; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 0
558; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
559; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
560; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
561; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
562; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
563; HSA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000564; HSA-NEXT: ret void
565;
566; MESA-LABEL: @kern_realign_i8_i8(
567; MESA-NEXT: [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000568; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 36
569; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
570; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
571; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
572; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 36
573; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
574; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
575; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000576; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000577; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000578; MESA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000579; MESA-NEXT: ret void
580;
581 store volatile i8 %arg0, i8 addrspace(1)* undef
582 store volatile i8 %arg1, i8 addrspace(1)* undef
583 ret void
584}
585
586define amdgpu_kernel void @kern_realign_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2) #0 {
587; HSA-LABEL: @kern_realign_i8_i8_i8(
588; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000589; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0
590; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
591; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
592; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
593; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0
594; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
595; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
596; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
597; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
598; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0
599; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
600; HSA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
601; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000602; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000603; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
604; HSA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000605; HSA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000606; HSA-NEXT: ret void
607;
608; MESA-LABEL: @kern_realign_i8_i8_i8(
609; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000610; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36
611; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
612; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
613; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
614; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36
615; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
616; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
617; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000618; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000619; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36
620; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
621; MESA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
622; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
623; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
624; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000625; MESA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000626; MESA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000627; MESA-NEXT: ret void
628;
629 store volatile i8 %arg0, i8 addrspace(1)* undef
630 store volatile i8 %arg1, i8 addrspace(1)* undef
631 store volatile i8 %arg2, i8 addrspace(1)* undef
632 ret void
633}
634
635define amdgpu_kernel void @kern_realign_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) #0 {
636; HSA-LABEL: @kern_realign_i8_i8_i8_i8(
637; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000638; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
639; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
640; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
641; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
642; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
643; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
644; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
645; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
646; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
647; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
648; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
649; HSA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
650; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000651; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000652; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
653; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
654; HSA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
655; HSA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
656; HSA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8
657; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
658; HSA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000659; HSA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000660; HSA-NEXT: store volatile i8 [[TMP11]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000661; HSA-NEXT: ret void
662;
663; MESA-LABEL: @kern_realign_i8_i8_i8_i8(
664; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000665; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
666; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
667; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
668; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
669; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
670; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
671; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
672; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000673; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000674; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
675; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
676; MESA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
677; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
678; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
679; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
680; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
681; MESA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
682; MESA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
683; MESA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8
684; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000685; MESA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000686; MESA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef
687; MESA-NEXT: store volatile i8 [[TMP11]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000688; MESA-NEXT: ret void
689;
690 store volatile i8 %arg0, i8 addrspace(1)* undef
691 store volatile i8 %arg1, i8 addrspace(1)* undef
692 store volatile i8 %arg2, i8 addrspace(1)* undef
693 store volatile i8 %arg3, i8 addrspace(1)* undef
694 ret void
695}
696
697define amdgpu_kernel void @kern_realign_i8_v3i8(i8 %arg0, <3 x i8> %arg1) #0 {
698; HSA-LABEL: @kern_realign_i8_v3i8(
699; HSA-NEXT: [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000700; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 0
701; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
702; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
703; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
704; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 4
705; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
706; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
707; HSA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i24
708; HSA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i24 [[TMP4]] to <3 x i8>
709; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000710; HSA-NEXT: store volatile <3 x i8> [[ARG1_LOAD]], <3 x i8> addrspace(1)* undef
711; HSA-NEXT: ret void
712;
713; MESA-LABEL: @kern_realign_i8_v3i8(
714; MESA-NEXT: [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000715; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 36
716; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
717; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
718; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
719; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 40
720; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
721; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0
722; MESA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i24
723; MESA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i24 [[TMP4]] to <3 x i8>
724; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000725; MESA-NEXT: store volatile <3 x i8> [[ARG1_LOAD]], <3 x i8> addrspace(1)* undef
726; MESA-NEXT: ret void
727;
728 store volatile i8 %arg0, i8 addrspace(1)* undef
729 store volatile <3 x i8> %arg1, <3 x i8> addrspace(1)* undef
730 ret void
731}
732
733define amdgpu_kernel void @kern_realign_i8_i16(i8 %arg0, i16 %arg1) #0 {
734; HSA-LABEL: @kern_realign_i8_i16(
735; HSA-NEXT: [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000736; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 0
737; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
738; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
739; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
740; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 0
741; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
742; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
743; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
744; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
745; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
746; HSA-NEXT: store volatile i16 [[TMP5]], i16 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000747; HSA-NEXT: ret void
748;
749; MESA-LABEL: @kern_realign_i8_i16(
750; MESA-NEXT: [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000751; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 36
752; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
753; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
754; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
755; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 36
756; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
757; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
758; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
759; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
760; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
761; MESA-NEXT: store volatile i16 [[TMP5]], i16 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000762; MESA-NEXT: ret void
763;
764 store volatile i8 %arg0, i8 addrspace(1)* undef
765 store volatile i16 %arg1, i16 addrspace(1)* undef
766 ret void
767}
768
769define amdgpu_kernel void @kern_realign_i1_i1(i1 %arg0, i1 %arg1) #0 {
770; HSA-LABEL: @kern_realign_i1_i1(
771; HSA-NEXT: [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000772; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 0
773; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
774; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
775; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
776; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 0
777; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
778; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
779; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
780; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
781; HSA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
782; HSA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000783; HSA-NEXT: ret void
784;
785; MESA-LABEL: @kern_realign_i1_i1(
786; MESA-NEXT: [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000787; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 36
788; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
789; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
790; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
791; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 36
792; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
793; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
794; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000795; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000796; MESA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000797; MESA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000798; MESA-NEXT: ret void
799;
800 store volatile i1 %arg0, i1 addrspace(1)* undef
801 store volatile i1 %arg1, i1 addrspace(1)* undef
802 ret void
803}
804
805define amdgpu_kernel void @kern_realign_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2) #0 {
806; HSA-LABEL: @kern_realign_i1_i1_i1(
807; HSA-NEXT: [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000808; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0
809; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
810; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
811; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
812; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0
813; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
814; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
815; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
816; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
817; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0
818; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
819; HSA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
820; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000821; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000822; HSA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
823; HSA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000824; HSA-NEXT: store volatile i1 [[TMP8]], i1 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000825; HSA-NEXT: ret void
826;
827; MESA-LABEL: @kern_realign_i1_i1_i1(
828; MESA-NEXT: [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000829; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36
830; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
831; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
832; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
833; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36
834; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
835; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
836; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000837; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000838; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36
839; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
840; MESA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
841; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
842; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1
843; MESA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000844; MESA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000845; MESA-NEXT: store volatile i1 [[TMP8]], i1 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000846; MESA-NEXT: ret void
847;
848 store volatile i1 %arg0, i1 addrspace(1)* undef
849 store volatile i1 %arg1, i1 addrspace(1)* undef
850 store volatile i1 %arg2, i1 addrspace(1)* undef
851 ret void
852}
853
854define amdgpu_kernel void @kern_realign_i1_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2, i1 %arg3) #0 {
855; HSA-LABEL: @kern_realign_i1_i1_i1_i1(
856; HSA-NEXT: [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000857; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0
858; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
859; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
860; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
861; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0
862; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
863; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
864; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
865; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
866; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0
867; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
868; HSA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
869; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000870; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000871; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0
872; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
873; HSA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
874; HSA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
875; HSA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i1
876; HSA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
877; HSA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000878; HSA-NEXT: store volatile i1 [[TMP8]], i1 addrspace(1)* undef
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000879; HSA-NEXT: store volatile i1 [[TMP11]], i1 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000880; HSA-NEXT: ret void
881;
882; MESA-LABEL: @kern_realign_i1_i1_i1_i1(
883; MESA-NEXT: [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000884; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36
885; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
886; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
887; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
888; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36
889; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
890; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
891; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000892; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000893; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36
894; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
895; MESA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
896; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
897; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1
898; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36
899; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
900; MESA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
901; MESA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
902; MESA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i1
903; MESA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000904; MESA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000905; MESA-NEXT: store volatile i1 [[TMP8]], i1 addrspace(1)* undef
906; MESA-NEXT: store volatile i1 [[TMP11]], i1 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000907; MESA-NEXT: ret void
908;
909 store volatile i1 %arg0, i1 addrspace(1)* undef
910 store volatile i1 %arg1, i1 addrspace(1)* undef
911 store volatile i1 %arg2, i1 addrspace(1)* undef
912 store volatile i1 %arg3, i1 addrspace(1)* undef
913 ret void
914}
915
916define amdgpu_kernel void @kern_realign_i1_v3i1(i1 %arg0, <3 x i1> %arg1) #0 {
917; HSA-LABEL: @kern_realign_i1_v3i1(
918; HSA-NEXT: [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000919; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 0
920; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
921; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
922; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
923; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 4
924; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
925; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
926; HSA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
927; HSA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i3 [[TMP4]] to <3 x i1>
928; HSA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000929; HSA-NEXT: store volatile <3 x i1> [[ARG1_LOAD]], <3 x i1> addrspace(1)* undef
930; HSA-NEXT: ret void
931;
932; MESA-LABEL: @kern_realign_i1_v3i1(
933; MESA-NEXT: [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000934; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 36
935; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
936; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
937; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
938; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 40
939; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
940; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0
941; MESA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
942; MESA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i3 [[TMP4]] to <3 x i1>
943; MESA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000944; MESA-NEXT: store volatile <3 x i1> [[ARG1_LOAD]], <3 x i1> addrspace(1)* undef
945; MESA-NEXT: ret void
946;
947 store volatile i1 %arg0, i1 addrspace(1)* undef
948 store volatile <3 x i1> %arg1, <3 x i1> addrspace(1)* undef
949 ret void
950}
951
952define amdgpu_kernel void @kern_realign_i1_i16(i1 %arg0, i16 %arg1) #0 {
953; HSA-LABEL: @kern_realign_i1_i16(
954; HSA-NEXT: [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000955; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 0
956; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
957; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
958; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
959; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 0
960; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
961; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
962; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
963; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
964; HSA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
965; HSA-NEXT: store volatile i16 [[TMP5]], i16 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000966; HSA-NEXT: ret void
967;
968; MESA-LABEL: @kern_realign_i1_i16(
969; MESA-NEXT: [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000970; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 36
971; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
972; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
973; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1
974; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 36
975; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
976; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
977; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
978; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
979; MESA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef
980; MESA-NEXT: store volatile i16 [[TMP5]], i16 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +0000981; MESA-NEXT: ret void
982;
983 store volatile i1 %arg0, i1 addrspace(1)* undef
984 store volatile i16 %arg1, i16 addrspace(1)* undef
985 ret void
986}
987
988define amdgpu_kernel void @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4, i8 %arg5, i8 %arg6, i8 %arg7) #0 {
989; HSA-LABEL: @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(
990; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +0000991; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
992; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
993; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
994; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
995; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
996; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
997; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
998; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
999; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
1000; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
1001; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
1002; HSA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
1003; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001004; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001005; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
1006; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
1007; HSA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
1008; HSA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
1009; HSA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8
1010; HSA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 4
1011; HSA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG5_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
1012; HSA-NEXT: [[TMP12:%.*]] = load i32, i32 addrspace(4)* [[ARG5_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
1013; HSA-NEXT: [[TMP13:%.*]] = lshr i32 [[TMP12]], 8
1014; HSA-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i8
1015; HSA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 4
1016; HSA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG6_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
1017; HSA-NEXT: [[TMP15:%.*]] = load i32, i32 addrspace(4)* [[ARG6_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
1018; HSA-NEXT: [[TMP16:%.*]] = lshr i32 [[TMP15]], 16
1019; HSA-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8
1020; HSA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 4
1021; HSA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG7_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
1022; HSA-NEXT: [[TMP18:%.*]] = load i32, i32 addrspace(4)* [[ARG7_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
1023; HSA-NEXT: [[TMP19:%.*]] = lshr i32 [[TMP18]], 24
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001024; HSA-NEXT: [[TMP20:%.*]] = trunc i32 [[TMP19]] to i8
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001025; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
1026; HSA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001027; HSA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001028; HSA-NEXT: store volatile i8 [[TMP11]], i8 addrspace(1)* undef
1029; HSA-NEXT: store volatile i8 [[TMP14]], i8 addrspace(1)* undef
1030; HSA-NEXT: store volatile i8 [[TMP17]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001031; HSA-NEXT: store volatile i8 [[TMP20]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001032; HSA-NEXT: ret void
1033;
1034; MESA-LABEL: @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(
1035; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001036; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
1037; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
1038; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
1039; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
1040; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
1041; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
1042; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
1043; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001044; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001045; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
1046; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
1047; MESA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
1048; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
1049; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
1050; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36
1051; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
1052; MESA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
1053; MESA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
1054; MESA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8
1055; MESA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 40
1056; MESA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG5_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
1057; MESA-NEXT: [[TMP12:%.*]] = load i32, i32 addrspace(4)* [[ARG5_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0
1058; MESA-NEXT: [[TMP13:%.*]] = lshr i32 [[TMP12]], 8
1059; MESA-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i8
1060; MESA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 40
1061; MESA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG6_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
1062; MESA-NEXT: [[TMP15:%.*]] = load i32, i32 addrspace(4)* [[ARG6_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0
1063; MESA-NEXT: [[TMP16:%.*]] = lshr i32 [[TMP15]], 16
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001064; MESA-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001065; MESA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 40
1066; MESA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG7_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
1067; MESA-NEXT: [[TMP18:%.*]] = load i32, i32 addrspace(4)* [[ARG7_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0
1068; MESA-NEXT: [[TMP19:%.*]] = lshr i32 [[TMP18]], 24
1069; MESA-NEXT: [[TMP20:%.*]] = trunc i32 [[TMP19]] to i8
1070; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001071; MESA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001072; MESA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef
1073; MESA-NEXT: store volatile i8 [[TMP11]], i8 addrspace(1)* undef
1074; MESA-NEXT: store volatile i8 [[TMP14]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001075; MESA-NEXT: store volatile i8 [[TMP17]], i8 addrspace(1)* undef
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001076; MESA-NEXT: store volatile i8 [[TMP20]], i8 addrspace(1)* undef
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001077; MESA-NEXT: ret void
1078;
1079 store volatile i8 %arg0, i8 addrspace(1)* undef
1080 store volatile i8 %arg1, i8 addrspace(1)* undef
1081 store volatile i8 %arg2, i8 addrspace(1)* undef
1082 store volatile i8 %arg3, i8 addrspace(1)* undef
1083 store volatile i8 %arg5, i8 addrspace(1)* undef
1084 store volatile i8 %arg6, i8 addrspace(1)* undef
1085 store volatile i8 %arg7, i8 addrspace(1)* undef
1086 ret void
1087}
1088
1089define amdgpu_kernel void @kern_realign_f16_f16(half %arg0, half %arg1) #0 {
1090; HSA-LABEL: @kern_realign_f16_f16(
1091; HSA-NEXT: [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001092; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 0
1093; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
1094; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
1095; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
1096; HSA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
1097; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 0
1098; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
1099; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
1100; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
1101; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
1102; HSA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i16 [[TMP5]] to half
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001103; HSA-NEXT: store volatile half [[ARG0_LOAD]], half addrspace(1)* undef
1104; HSA-NEXT: store volatile half [[ARG1_LOAD]], half addrspace(1)* undef
1105; HSA-NEXT: ret void
1106;
1107; MESA-LABEL: @kern_realign_f16_f16(
1108; MESA-NEXT: [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001109; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 36
1110; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
1111; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
1112; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
1113; MESA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
1114; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 36
1115; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
1116; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
1117; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001118; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001119; MESA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i16 [[TMP5]] to half
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001120; MESA-NEXT: store volatile half [[ARG0_LOAD]], half addrspace(1)* undef
1121; MESA-NEXT: store volatile half [[ARG1_LOAD]], half addrspace(1)* undef
1122; MESA-NEXT: ret void
1123;
1124 store volatile half %arg0, half addrspace(1)* undef
1125 store volatile half %arg1, half addrspace(1)* undef
1126 ret void
1127}
1128
1129define amdgpu_kernel void @kern_global_ptr(i8 addrspace(1)* %ptr) #0 {
1130; HSA-LABEL: @kern_global_ptr(
1131; HSA-NEXT: [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001132; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
1133; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
1134; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001135; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
1136; HSA-NEXT: ret void
1137;
1138; MESA-LABEL: @kern_global_ptr(
1139; MESA-NEXT: [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001140; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
1141; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
1142; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001143; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
1144; MESA-NEXT: ret void
1145;
1146 store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef
1147 ret void
1148}
1149
1150define amdgpu_kernel void @kern_global_ptr_dereferencable(i8 addrspace(1)* dereferenceable(42) %ptr) #0 {
1151; HSA-LABEL: @kern_global_ptr_dereferencable(
1152; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001153; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 0
1154; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
1155; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !dereferenceable !1
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001156; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
1157; HSA-NEXT: ret void
1158;
1159; MESA-LABEL: @kern_global_ptr_dereferencable(
1160; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001161; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 36
1162; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
Matt Arsenault72b0e382018-07-28 12:34:25 +00001163; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !dereferenceable !1
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001164; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
1165; MESA-NEXT: ret void
1166;
1167 store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef
1168 ret void
1169}
1170
1171define amdgpu_kernel void @kern_global_ptr_dereferencable_or_null(i8 addrspace(1)* dereferenceable_or_null(128) %ptr) #0 {
1172; HSA-LABEL: @kern_global_ptr_dereferencable_or_null(
1173; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001174; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 0
1175; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
1176; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !dereferenceable_or_null !2
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001177; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
1178; HSA-NEXT: ret void
1179;
1180; MESA-LABEL: @kern_global_ptr_dereferencable_or_null(
1181; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001182; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 36
1183; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
Matt Arsenault72b0e382018-07-28 12:34:25 +00001184; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !dereferenceable_or_null !2
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001185; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
1186; MESA-NEXT: ret void
1187;
1188 store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef
1189 ret void
1190}
1191
1192define amdgpu_kernel void @kern_nonnull_global_ptr(i8 addrspace(1)* nonnull %ptr) #0 {
1193; HSA-LABEL: @kern_nonnull_global_ptr(
1194; HSA-NEXT: [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001195; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
1196; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
1197; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !nonnull !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001198; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
1199; HSA-NEXT: ret void
1200;
1201; MESA-LABEL: @kern_nonnull_global_ptr(
1202; MESA-NEXT: [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001203; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
1204; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
1205; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !nonnull !0
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001206; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
1207; MESA-NEXT: ret void
1208;
1209 store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef
1210 ret void
1211}
1212
1213define amdgpu_kernel void @kern_align32_global_ptr(i8 addrspace(1)* align 1024 %ptr) #0 {
1214; HSA-LABEL: @kern_align32_global_ptr(
1215; HSA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001216; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
1217; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
1218; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !align !3
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001219; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
1220; HSA-NEXT: ret void
1221;
1222; MESA-LABEL: @kern_align32_global_ptr(
1223; MESA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001224; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
1225; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
Matt Arsenault72b0e382018-07-28 12:34:25 +00001226; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !align !3
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001227; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
1228; MESA-NEXT: ret void
1229;
1230 store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef
1231 ret void
1232}
1233
1234define amdgpu_kernel void @kern_noalias_global_ptr(i8 addrspace(1)* noalias %ptr) #0 {
1235; HSA-LABEL: @kern_noalias_global_ptr(
1236; HSA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001237; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR:%.*]], i8 addrspace(1)* addrspace(1)* undef
1238; HSA-NEXT: ret void
1239;
1240; MESA-LABEL: @kern_noalias_global_ptr(
1241; MESA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001242; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR:%.*]], i8 addrspace(1)* addrspace(1)* undef
1243; MESA-NEXT: ret void
1244;
1245 store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef
1246 ret void
1247}
1248
1249define amdgpu_kernel void @kern_noalias_global_ptr_x2(i8 addrspace(1)* noalias %ptr0, i8 addrspace(1)* noalias %ptr1) #0 {
1250; HSA-LABEL: @kern_noalias_global_ptr_x2(
1251; HSA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_X2_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(16) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001252; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR0:%.*]], i8 addrspace(1)* addrspace(1)* undef
1253; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR1:%.*]], i8 addrspace(1)* addrspace(1)* undef
1254; HSA-NEXT: ret void
1255;
1256; MESA-LABEL: @kern_noalias_global_ptr_x2(
1257; MESA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_X2_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(52) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001258; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR0:%.*]], i8 addrspace(1)* addrspace(1)* undef
1259; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR1:%.*]], i8 addrspace(1)* addrspace(1)* undef
1260; MESA-NEXT: ret void
1261;
1262 store volatile i8 addrspace(1)* %ptr0, i8 addrspace(1)* addrspace(1)* undef
1263 store volatile i8 addrspace(1)* %ptr1, i8 addrspace(1)* addrspace(1)* undef
1264 ret void
1265}
1266
Matt Arsenault513e0c02018-06-28 10:18:11 +00001267define amdgpu_kernel void @struct_i8_i8_arg({i8, i8} %in) #0 {
1268; HSA-LABEL: @struct_i8_i8_arg(
1269; HSA-NEXT: entry:
1270; HSA-NEXT: [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001271; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT]], i64 0
1272; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i8 } addrspace(4)*
1273; HSA-NEXT: [[IN_LOAD:%.*]] = load { i8, i8 }, { i8, i8 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
Matt Arsenault513e0c02018-06-28 10:18:11 +00001274; HSA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 0
1275; HSA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 1
1276; HSA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4
1277; HSA-NEXT: store volatile i8 [[ELT1]], i8 addrspace(1)* null, align 4
1278; HSA-NEXT: ret void
1279;
1280; MESA-LABEL: @struct_i8_i8_arg(
1281; MESA-NEXT: entry:
1282; MESA-NEXT: [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001283; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT]], i64 36
1284; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i8 } addrspace(4)*
1285; MESA-NEXT: [[IN_LOAD:%.*]] = load { i8, i8 }, { i8, i8 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
Matt Arsenault513e0c02018-06-28 10:18:11 +00001286; MESA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 0
1287; MESA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 1
1288; MESA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4
1289; MESA-NEXT: store volatile i8 [[ELT1]], i8 addrspace(1)* null, align 4
1290; MESA-NEXT: ret void
1291;
1292entry:
1293 %elt0 = extractvalue {i8, i8} %in, 0
1294 %elt1 = extractvalue {i8, i8} %in, 1
1295 store volatile i8 %elt0, i8 addrspace(1)* null, align 4
1296 store volatile i8 %elt1, i8 addrspace(1)* null, align 4
1297 ret void
1298}
1299
1300define amdgpu_kernel void @struct_i8_i16_arg({i8, i16} %in) #0 {
1301; HSA-LABEL: @struct_i8_i16_arg(
1302; HSA-NEXT: entry:
1303; HSA-NEXT: [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001304; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT]], i64 0
1305; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i16 } addrspace(4)*
1306; HSA-NEXT: [[IN_LOAD:%.*]] = load { i8, i16 }, { i8, i16 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
Matt Arsenault513e0c02018-06-28 10:18:11 +00001307; HSA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 0
1308; HSA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 1
1309; HSA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4
1310; HSA-NEXT: store volatile i16 [[ELT1]], i16 addrspace(1)* null, align 4
1311; HSA-NEXT: ret void
1312;
1313; MESA-LABEL: @struct_i8_i16_arg(
1314; MESA-NEXT: entry:
1315; MESA-NEXT: [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001316; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT]], i64 36
1317; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i16 } addrspace(4)*
1318; MESA-NEXT: [[IN_LOAD:%.*]] = load { i8, i16 }, { i8, i16 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
Matt Arsenault513e0c02018-06-28 10:18:11 +00001319; MESA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 0
1320; MESA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 1
1321; MESA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4
1322; MESA-NEXT: store volatile i16 [[ELT1]], i16 addrspace(1)* null, align 4
1323; MESA-NEXT: ret void
1324;
1325entry:
1326 %elt0 = extractvalue {i8, i16} %in, 0
1327 %elt1 = extractvalue {i8, i16} %in, 1
1328 store volatile i8 %elt0, i8 addrspace(1)* null, align 4
1329 store volatile i16 %elt1, i16 addrspace(1)* null, align 4
1330 ret void
1331}
1332
1333define amdgpu_kernel void @array_2xi8_arg([2 x i8] %in) #0 {
1334; HSA-LABEL: @array_2xi8_arg(
1335; HSA-NEXT: entry:
1336; HSA-NEXT: [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001337; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI8_ARG_KERNARG_SEGMENT]], i64 0
1338; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i8] addrspace(4)*
1339; HSA-NEXT: [[IN_LOAD:%.*]] = load [2 x i8], [2 x i8] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
Matt Arsenault513e0c02018-06-28 10:18:11 +00001340; HSA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 0
1341; HSA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 1
1342; HSA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4
1343; HSA-NEXT: store volatile i8 [[ELT1]], i8 addrspace(1)* null, align 4
1344; HSA-NEXT: ret void
1345;
1346; MESA-LABEL: @array_2xi8_arg(
1347; MESA-NEXT: entry:
1348; MESA-NEXT: [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001349; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI8_ARG_KERNARG_SEGMENT]], i64 36
1350; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i8] addrspace(4)*
1351; MESA-NEXT: [[IN_LOAD:%.*]] = load [2 x i8], [2 x i8] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
Matt Arsenault513e0c02018-06-28 10:18:11 +00001352; MESA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 0
1353; MESA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 1
1354; MESA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4
1355; MESA-NEXT: store volatile i8 [[ELT1]], i8 addrspace(1)* null, align 4
1356; MESA-NEXT: ret void
1357;
1358entry:
1359 %elt0 = extractvalue [2 x i8] %in, 0
1360 %elt1 = extractvalue [2 x i8] %in, 1
1361 store volatile i8 %elt0, i8 addrspace(1)* null, align 4
1362 store volatile i8 %elt1, i8 addrspace(1)* null, align 4
1363 ret void
1364}
1365
1366define amdgpu_kernel void @array_2xi1_arg([2 x i1] %in) #0 {
1367; HSA-LABEL: @array_2xi1_arg(
1368; HSA-NEXT: entry:
1369; HSA-NEXT: [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001370; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI1_ARG_KERNARG_SEGMENT]], i64 0
1371; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i1] addrspace(4)*
1372; HSA-NEXT: [[IN_LOAD:%.*]] = load [2 x i1], [2 x i1] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
Matt Arsenault513e0c02018-06-28 10:18:11 +00001373; HSA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 0
1374; HSA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 1
1375; HSA-NEXT: store volatile i1 [[ELT0]], i1 addrspace(1)* null, align 4
1376; HSA-NEXT: store volatile i1 [[ELT1]], i1 addrspace(1)* null, align 4
1377; HSA-NEXT: ret void
1378;
1379; MESA-LABEL: @array_2xi1_arg(
1380; MESA-NEXT: entry:
1381; MESA-NEXT: [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001382; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI1_ARG_KERNARG_SEGMENT]], i64 36
1383; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i1] addrspace(4)*
1384; MESA-NEXT: [[IN_LOAD:%.*]] = load [2 x i1], [2 x i1] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
Matt Arsenault513e0c02018-06-28 10:18:11 +00001385; MESA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 0
1386; MESA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 1
1387; MESA-NEXT: store volatile i1 [[ELT0]], i1 addrspace(1)* null, align 4
1388; MESA-NEXT: store volatile i1 [[ELT1]], i1 addrspace(1)* null, align 4
1389; MESA-NEXT: ret void
1390;
1391entry:
1392 %elt0 = extractvalue [2 x i1] %in, 0
1393 %elt1 = extractvalue [2 x i1] %in, 1
1394 store volatile i1 %elt0, i1 addrspace(1)* null, align 4
1395 store volatile i1 %elt1, i1 addrspace(1)* null, align 4
1396 ret void
1397}
1398
1399define amdgpu_kernel void @only_empty_struct({} %empty) #0 {
1400; HSA-LABEL: @only_empty_struct(
1401; HSA-NEXT: ret void
1402;
1403; MESA-LABEL: @only_empty_struct(
1404; MESA-NEXT: [[ONLY_EMPTY_STRUCT_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(36) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenault513e0c02018-06-28 10:18:11 +00001405; MESA-NEXT: ret void
1406;
1407 ret void
1408}
1409
1410define amdgpu_kernel void @empty_struct_with_other({} %empty, i32 %arg1) #0 {
1411; HSA-LABEL: @empty_struct_with_other(
1412; HSA-NEXT: [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001413; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT]], i64 0
1414; HSA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)*
1415; HSA-NEXT: [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
Matt Arsenault513e0c02018-06-28 10:18:11 +00001416; HSA-NEXT: store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef
1417; HSA-NEXT: ret void
1418;
1419; MESA-LABEL: @empty_struct_with_other(
1420; MESA-NEXT: [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
Matt Arsenaultf5be3ad2018-06-29 17:31:42 +00001421; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT]], i64 36
1422; MESA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)*
1423; MESA-NEXT: [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
Matt Arsenault513e0c02018-06-28 10:18:11 +00001424; MESA-NEXT: store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef
1425; MESA-NEXT: ret void
1426;
1427 store i32 %arg1, i32 addrspace(1)* undef
1428 ret void
1429}
1430
Matt Arsenault8c4a3522018-06-26 19:10:00 +00001431attributes #0 = { nounwind "target-cpu"="kaveri" }
1432attributes #1 = { nounwind "target-cpu"="kaveri" "amdgpu-implicitarg-num-bytes"="40" }
1433attributes #2 = { nounwind "target-cpu"="tahiti" }
1434
Matt Arsenault72b0e382018-07-28 12:34:25 +00001435; GCN: 0 = !{}
1436; GCN: !1 = !{i64 42}
1437; GCN: !2 = !{i64 128}
1438; GCN: !3 = !{i64 1024}