blob: b5dc47adbc2c51bfdc2ac058db8afa956496c593 [file] [log] [blame]
Matt Arsenault77ce5532017-02-07 04:28:02 +00001// REQUIRES: amdgpu-registered-target
Yaxun Liuaf3d4db2017-05-23 16:15:53 +00002// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
Yaxun Liu6d96f1632017-05-18 18:51:09 +00003// RUN: %clang_cc1 -triple amdgcn-unknown-unknown-opencl -S -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
Matt Arsenault77ce5532017-02-07 04:28:02 +00004
5#pragma OPENCL EXTENSION cl_khr_fp64 : enable
6#pragma OPENCL EXTENSION cl_khr_fp16 : enable
7
8typedef char __attribute__((ext_vector_type(2))) char2;
9typedef char __attribute__((ext_vector_type(3))) char3;
10typedef char __attribute__((ext_vector_type(4))) char4;
11typedef char __attribute__((ext_vector_type(8))) char8;
12typedef char __attribute__((ext_vector_type(16))) char16;
13
14typedef short __attribute__((ext_vector_type(2))) short2;
15typedef short __attribute__((ext_vector_type(3))) short3;
16typedef short __attribute__((ext_vector_type(4))) short4;
17typedef short __attribute__((ext_vector_type(8))) short8;
18typedef short __attribute__((ext_vector_type(16))) short16;
19
20typedef int __attribute__((ext_vector_type(2))) int2;
21typedef int __attribute__((ext_vector_type(3))) int3;
22typedef int __attribute__((ext_vector_type(4))) int4;
23typedef int __attribute__((ext_vector_type(8))) int8;
24typedef int __attribute__((ext_vector_type(16))) int16;
25
26typedef long __attribute__((ext_vector_type(2))) long2;
27typedef long __attribute__((ext_vector_type(3))) long3;
28typedef long __attribute__((ext_vector_type(4))) long4;
29typedef long __attribute__((ext_vector_type(8))) long8;
30typedef long __attribute__((ext_vector_type(16))) long16;
31
32typedef half __attribute__((ext_vector_type(2))) half2;
33typedef half __attribute__((ext_vector_type(3))) half3;
34typedef half __attribute__((ext_vector_type(4))) half4;
35typedef half __attribute__((ext_vector_type(8))) half8;
36typedef half __attribute__((ext_vector_type(16))) half16;
37
38typedef float __attribute__((ext_vector_type(2))) float2;
39typedef float __attribute__((ext_vector_type(3))) float3;
40typedef float __attribute__((ext_vector_type(4))) float4;
41typedef float __attribute__((ext_vector_type(8))) float8;
42typedef float __attribute__((ext_vector_type(16))) float16;
43
44typedef double __attribute__((ext_vector_type(2))) double2;
45typedef double __attribute__((ext_vector_type(3))) double3;
46typedef double __attribute__((ext_vector_type(4))) double4;
47typedef double __attribute__((ext_vector_type(8))) double8;
48typedef double __attribute__((ext_vector_type(16))) double16;
49
50// CHECK: @local_memory_alignment_global.lds_i8 = internal addrspace(3) global [4 x i8] undef, align 1
51// CHECK: @local_memory_alignment_global.lds_v2i8 = internal addrspace(3) global [4 x <2 x i8>] undef, align 2
52// CHECK: @local_memory_alignment_global.lds_v3i8 = internal addrspace(3) global [4 x <3 x i8>] undef, align 4
53// CHECK: @local_memory_alignment_global.lds_v4i8 = internal addrspace(3) global [4 x <4 x i8>] undef, align 4
54// CHECK: @local_memory_alignment_global.lds_v8i8 = internal addrspace(3) global [4 x <8 x i8>] undef, align 8
55// CHECK: @local_memory_alignment_global.lds_v16i8 = internal addrspace(3) global [4 x <16 x i8>] undef, align 16
56// CHECK: @local_memory_alignment_global.lds_i16 = internal addrspace(3) global [4 x i16] undef, align 2
57// CHECK: @local_memory_alignment_global.lds_v2i16 = internal addrspace(3) global [4 x <2 x i16>] undef, align 4
58// CHECK: @local_memory_alignment_global.lds_v3i16 = internal addrspace(3) global [4 x <3 x i16>] undef, align 8
59// CHECK: @local_memory_alignment_global.lds_v4i16 = internal addrspace(3) global [4 x <4 x i16>] undef, align 8
60// CHECK: @local_memory_alignment_global.lds_v8i16 = internal addrspace(3) global [4 x <8 x i16>] undef, align 16
61// CHECK: @local_memory_alignment_global.lds_v16i16 = internal addrspace(3) global [4 x <16 x i16>] undef, align 32
62// CHECK: @local_memory_alignment_global.lds_i32 = internal addrspace(3) global [4 x i32] undef, align 4
63// CHECK: @local_memory_alignment_global.lds_v2i32 = internal addrspace(3) global [4 x <2 x i32>] undef, align 8
64// CHECK: @local_memory_alignment_global.lds_v3i32 = internal addrspace(3) global [4 x <3 x i32>] undef, align 16
65// CHECK: @local_memory_alignment_global.lds_v4i32 = internal addrspace(3) global [4 x <4 x i32>] undef, align 16
66// CHECK: @local_memory_alignment_global.lds_v8i32 = internal addrspace(3) global [4 x <8 x i32>] undef, align 32
67// CHECK: @local_memory_alignment_global.lds_v16i32 = internal addrspace(3) global [4 x <16 x i32>] undef, align 64
68// CHECK: @local_memory_alignment_global.lds_i64 = internal addrspace(3) global [4 x i64] undef, align 8
69// CHECK: @local_memory_alignment_global.lds_v2i64 = internal addrspace(3) global [4 x <2 x i64>] undef, align 16
70// CHECK: @local_memory_alignment_global.lds_v3i64 = internal addrspace(3) global [4 x <3 x i64>] undef, align 32
71// CHECK: @local_memory_alignment_global.lds_v4i64 = internal addrspace(3) global [4 x <4 x i64>] undef, align 32
72// CHECK: @local_memory_alignment_global.lds_v8i64 = internal addrspace(3) global [4 x <8 x i64>] undef, align 64
73// CHECK: @local_memory_alignment_global.lds_v16i64 = internal addrspace(3) global [4 x <16 x i64>] undef, align 128
74// CHECK: @local_memory_alignment_global.lds_f16 = internal addrspace(3) global [4 x half] undef, align 2
75// CHECK: @local_memory_alignment_global.lds_v2f16 = internal addrspace(3) global [4 x <2 x half>] undef, align 4
76// CHECK: @local_memory_alignment_global.lds_v3f16 = internal addrspace(3) global [4 x <3 x half>] undef, align 8
77// CHECK: @local_memory_alignment_global.lds_v4f16 = internal addrspace(3) global [4 x <4 x half>] undef, align 8
78// CHECK: @local_memory_alignment_global.lds_v8f16 = internal addrspace(3) global [4 x <8 x half>] undef, align 16
79// CHECK: @local_memory_alignment_global.lds_v16f16 = internal addrspace(3) global [4 x <16 x half>] undef, align 32
80// CHECK: @local_memory_alignment_global.lds_f32 = internal addrspace(3) global [4 x float] undef, align 4
81// CHECK: @local_memory_alignment_global.lds_v2f32 = internal addrspace(3) global [4 x <2 x float>] undef, align 8
82// CHECK: @local_memory_alignment_global.lds_v3f32 = internal addrspace(3) global [4 x <3 x float>] undef, align 16
83// CHECK: @local_memory_alignment_global.lds_v4f32 = internal addrspace(3) global [4 x <4 x float>] undef, align 16
84// CHECK: @local_memory_alignment_global.lds_v8f32 = internal addrspace(3) global [4 x <8 x float>] undef, align 32
85// CHECK: @local_memory_alignment_global.lds_v16f32 = internal addrspace(3) global [4 x <16 x float>] undef, align 64
86// CHECK: @local_memory_alignment_global.lds_f64 = internal addrspace(3) global [4 x double] undef, align 8
87// CHECK: @local_memory_alignment_global.lds_v2f64 = internal addrspace(3) global [4 x <2 x double>] undef, align 16
88// CHECK: @local_memory_alignment_global.lds_v3f64 = internal addrspace(3) global [4 x <3 x double>] undef, align 32
89// CHECK: @local_memory_alignment_global.lds_v4f64 = internal addrspace(3) global [4 x <4 x double>] undef, align 32
90// CHECK: @local_memory_alignment_global.lds_v8f64 = internal addrspace(3) global [4 x <8 x double>] undef, align 64
91// CHECK: @local_memory_alignment_global.lds_v16f64 = internal addrspace(3) global [4 x <16 x double>] undef, align 128
92
93
94// CHECK-LABEL: @local_memory_alignment_global(
95// CHECK: store volatile i8 0, i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @local_memory_alignment_global.lds_i8, i32 0, i32 0), align 1
96// CHECK: store volatile <2 x i8> zeroinitializer, <2 x i8> addrspace(3)* getelementptr inbounds ([4 x <2 x i8>], [4 x <2 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v2i8, i32 0, i32 0), align 2
97// CHECK: store volatile <4 x i8> <i8 0, i8 0, i8 0, i8 undef>, <4 x i8> addrspace(3)* bitcast ([4 x <3 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v3i8 to <4 x i8> addrspace(3)*), align 4
98// CHECK: store volatile <4 x i8> zeroinitializer, <4 x i8> addrspace(3)* getelementptr inbounds ([4 x <4 x i8>], [4 x <4 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v4i8, i32 0, i32 0), align 4
99// CHECK: store volatile <8 x i8> zeroinitializer, <8 x i8> addrspace(3)* getelementptr inbounds ([4 x <8 x i8>], [4 x <8 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v8i8, i32 0, i32 0), align 8
100// CHECK: store volatile <16 x i8> zeroinitializer, <16 x i8> addrspace(3)* getelementptr inbounds ([4 x <16 x i8>], [4 x <16 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v16i8, i32 0, i32 0), align 16
101// CHECK: store volatile i16 0, i16 addrspace(3)* getelementptr inbounds ([4 x i16], [4 x i16] addrspace(3)* @local_memory_alignment_global.lds_i16, i32 0, i32 0), align 2
102// CHECK: store volatile <2 x i16> zeroinitializer, <2 x i16> addrspace(3)* getelementptr inbounds ([4 x <2 x i16>], [4 x <2 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v2i16, i32 0, i32 0), align 4
103// CHECK: store volatile <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, <4 x i16> addrspace(3)* bitcast ([4 x <3 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v3i16 to <4 x i16> addrspace(3)*), align 8
104// CHECK: store volatile <4 x i16> zeroinitializer, <4 x i16> addrspace(3)* getelementptr inbounds ([4 x <4 x i16>], [4 x <4 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v4i16, i32 0, i32 0), align 8
105// CHECK: store volatile <8 x i16> zeroinitializer, <8 x i16> addrspace(3)* getelementptr inbounds ([4 x <8 x i16>], [4 x <8 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v8i16, i32 0, i32 0), align 16
106// CHECK: store volatile <16 x i16> zeroinitializer, <16 x i16> addrspace(3)* getelementptr inbounds ([4 x <16 x i16>], [4 x <16 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v16i16, i32 0, i32 0), align 32
107// CHECK: store volatile i32 0, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @local_memory_alignment_global.lds_i32, i32 0, i32 0), align 4
108// CHECK: store volatile <2 x i32> zeroinitializer, <2 x i32> addrspace(3)* getelementptr inbounds ([4 x <2 x i32>], [4 x <2 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v2i32, i32 0, i32 0), align 8
109// CHECK: store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, <4 x i32> addrspace(3)* bitcast ([4 x <3 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v3i32 to <4 x i32> addrspace(3)*), align 16
110// CHECK: store volatile <4 x i32> zeroinitializer, <4 x i32> addrspace(3)* getelementptr inbounds ([4 x <4 x i32>], [4 x <4 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v4i32, i32 0, i32 0), align 16
111// CHECK: store volatile <8 x i32> zeroinitializer, <8 x i32> addrspace(3)* getelementptr inbounds ([4 x <8 x i32>], [4 x <8 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v8i32, i32 0, i32 0), align 32
112// CHECK: store volatile <16 x i32> zeroinitializer, <16 x i32> addrspace(3)* getelementptr inbounds ([4 x <16 x i32>], [4 x <16 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v16i32, i32 0, i32 0), align 64
113// CHECK: store volatile i64 0, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @local_memory_alignment_global.lds_i64, i32 0, i32 0), align 8
114// CHECK: store volatile <2 x i64> zeroinitializer, <2 x i64> addrspace(3)* getelementptr inbounds ([4 x <2 x i64>], [4 x <2 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v2i64, i32 0, i32 0), align 16
115// CHECK: store volatile <4 x i64> <i64 0, i64 0, i64 0, i64 undef>, <4 x i64> addrspace(3)* bitcast ([4 x <3 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v3i64 to <4 x i64> addrspace(3)*), align 32
116// CHECK: store volatile <4 x i64> zeroinitializer, <4 x i64> addrspace(3)* getelementptr inbounds ([4 x <4 x i64>], [4 x <4 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v4i64, i32 0, i32 0), align 32
117// CHECK: store volatile <8 x i64> zeroinitializer, <8 x i64> addrspace(3)* getelementptr inbounds ([4 x <8 x i64>], [4 x <8 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v8i64, i32 0, i32 0), align 64
118// CHECK: store volatile <16 x i64> zeroinitializer, <16 x i64> addrspace(3)* getelementptr inbounds ([4 x <16 x i64>], [4 x <16 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v16i64, i32 0, i32 0), align 128
119// CHECK: store volatile half 0xH0000, half addrspace(3)* getelementptr inbounds ([4 x half], [4 x half] addrspace(3)* @local_memory_alignment_global.lds_f16, i32 0, i32 0), align 2
120// CHECK: store volatile <2 x half> zeroinitializer, <2 x half> addrspace(3)* getelementptr inbounds ([4 x <2 x half>], [4 x <2 x half>] addrspace(3)* @local_memory_alignment_global.lds_v2f16, i32 0, i32 0), align 4
121// CHECK: store volatile <4 x half> <half 0xH0000, half 0xH0000, half 0xH0000, half undef>, <4 x half> addrspace(3)* bitcast ([4 x <3 x half>] addrspace(3)* @local_memory_alignment_global.lds_v3f16 to <4 x half> addrspace(3)*), align 8
122// CHECK: store volatile <4 x half> zeroinitializer, <4 x half> addrspace(3)* getelementptr inbounds ([4 x <4 x half>], [4 x <4 x half>] addrspace(3)* @local_memory_alignment_global.lds_v4f16, i32 0, i32 0), align 8
123// CHECK: store volatile <8 x half> zeroinitializer, <8 x half> addrspace(3)* getelementptr inbounds ([4 x <8 x half>], [4 x <8 x half>] addrspace(3)* @local_memory_alignment_global.lds_v8f16, i32 0, i32 0), align 16
124// CHECK: store volatile <16 x half> zeroinitializer, <16 x half> addrspace(3)* getelementptr inbounds ([4 x <16 x half>], [4 x <16 x half>] addrspace(3)* @local_memory_alignment_global.lds_v16f16, i32 0, i32 0), align 32
125// CHECK: store volatile float 0.000000e+00, float addrspace(3)* getelementptr inbounds ([4 x float], [4 x float] addrspace(3)* @local_memory_alignment_global.lds_f32, i32 0, i32 0), align 4
126// CHECK: store volatile <2 x float> zeroinitializer, <2 x float> addrspace(3)* getelementptr inbounds ([4 x <2 x float>], [4 x <2 x float>] addrspace(3)* @local_memory_alignment_global.lds_v2f32, i32 0, i32 0), align 8
127// CHECK: store volatile <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, <4 x float> addrspace(3)* bitcast ([4 x <3 x float>] addrspace(3)* @local_memory_alignment_global.lds_v3f32 to <4 x float> addrspace(3)*), align 16
128// CHECK: store volatile <4 x float> zeroinitializer, <4 x float> addrspace(3)* getelementptr inbounds ([4 x <4 x float>], [4 x <4 x float>] addrspace(3)* @local_memory_alignment_global.lds_v4f32, i32 0, i32 0), align 16
129// CHECK: store volatile <8 x float> zeroinitializer, <8 x float> addrspace(3)* getelementptr inbounds ([4 x <8 x float>], [4 x <8 x float>] addrspace(3)* @local_memory_alignment_global.lds_v8f32, i32 0, i32 0), align 32
130// CHECK: store volatile <16 x float> zeroinitializer, <16 x float> addrspace(3)* getelementptr inbounds ([4 x <16 x float>], [4 x <16 x float>] addrspace(3)* @local_memory_alignment_global.lds_v16f32, i32 0, i32 0), align 64
131// CHECK: store volatile double 0.000000e+00, double addrspace(3)* getelementptr inbounds ([4 x double], [4 x double] addrspace(3)* @local_memory_alignment_global.lds_f64, i32 0, i32 0), align 8
132// CHECK: store volatile <2 x double> zeroinitializer, <2 x double> addrspace(3)* getelementptr inbounds ([4 x <2 x double>], [4 x <2 x double>] addrspace(3)* @local_memory_alignment_global.lds_v2f64, i32 0, i32 0), align 16
133// CHECK: store volatile <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double undef>, <4 x double> addrspace(3)* bitcast ([4 x <3 x double>] addrspace(3)* @local_memory_alignment_global.lds_v3f64 to <4 x double> addrspace(3)*), align 32
134// CHECK: store volatile <4 x double> zeroinitializer, <4 x double> addrspace(3)* getelementptr inbounds ([4 x <4 x double>], [4 x <4 x double>] addrspace(3)* @local_memory_alignment_global.lds_v4f64, i32 0, i32 0), align 32
135// CHECK: store volatile <8 x double> zeroinitializer, <8 x double> addrspace(3)* getelementptr inbounds ([4 x <8 x double>], [4 x <8 x double>] addrspace(3)* @local_memory_alignment_global.lds_v8f64, i32 0, i32 0), align 64
136// CHECK: store volatile <16 x double> zeroinitializer, <16 x double> addrspace(3)* getelementptr inbounds ([4 x <16 x double>], [4 x <16 x double>] addrspace(3)* @local_memory_alignment_global.lds_v16f64, i32 0, i32 0), align 128
137kernel void local_memory_alignment_global()
138{
139 volatile local char lds_i8[4];
140 volatile local char2 lds_v2i8[4];
141 volatile local char3 lds_v3i8[4];
142 volatile local char4 lds_v4i8[4];
143 volatile local char8 lds_v8i8[4];
144 volatile local char16 lds_v16i8[4];
145
146 volatile local short lds_i16[4];
147 volatile local short2 lds_v2i16[4];
148 volatile local short3 lds_v3i16[4];
149 volatile local short4 lds_v4i16[4];
150 volatile local short8 lds_v8i16[4];
151 volatile local short16 lds_v16i16[4];
152
153 volatile local int lds_i32[4];
154 volatile local int2 lds_v2i32[4];
155 volatile local int3 lds_v3i32[4];
156 volatile local int4 lds_v4i32[4];
157 volatile local int8 lds_v8i32[4];
158 volatile local int16 lds_v16i32[4];
159
160 volatile local long lds_i64[4];
161 volatile local long2 lds_v2i64[4];
162 volatile local long3 lds_v3i64[4];
163 volatile local long4 lds_v4i64[4];
164 volatile local long8 lds_v8i64[4];
165 volatile local long16 lds_v16i64[4];
166
167 volatile local half lds_f16[4];
168 volatile local half2 lds_v2f16[4];
169 volatile local half3 lds_v3f16[4];
170 volatile local half4 lds_v4f16[4];
171 volatile local half8 lds_v8f16[4];
172 volatile local half16 lds_v16f16[4];
173
174 volatile local float lds_f32[4];
175 volatile local float2 lds_v2f32[4];
176 volatile local float3 lds_v3f32[4];
177 volatile local float4 lds_v4f32[4];
178 volatile local float8 lds_v8f32[4];
179 volatile local float16 lds_v16f32[4];
180
181 volatile local double lds_f64[4];
182 volatile local double2 lds_v2f64[4];
183 volatile local double3 lds_v3f64[4];
184 volatile local double4 lds_v4f64[4];
185 volatile local double8 lds_v8f64[4];
186 volatile local double16 lds_v16f64[4];
187
188 *lds_i8 = 0;
189 *lds_v2i8 = 0;
190 *lds_v3i8 = 0;
191 *lds_v4i8 = 0;
192 *lds_v8i8 = 0;
193 *lds_v16i8 = 0;
194
195 *lds_i16 = 0;
196 *lds_v2i16 = 0;
197 *lds_v3i16 = 0;
198 *lds_v4i16 = 0;
199 *lds_v8i16 = 0;
200 *lds_v16i16 = 0;
201
202 *lds_i32 = 0;
203 *lds_v2i32 = 0;
204 *lds_v3i32 = 0;
205 *lds_v4i32 = 0;
206 *lds_v8i32 = 0;
207 *lds_v16i32 = 0;
208
209 *lds_i64 = 0;
210 *lds_v2i64 = 0;
211 *lds_v3i64 = 0;
212 *lds_v4i64 = 0;
213 *lds_v8i64 = 0;
214 *lds_v16i64 = 0;
215
216 *lds_f16 = 0;
217 *lds_v2f16 = 0;
218 *lds_v3f16 = 0;
219 *lds_v4f16 = 0;
220 *lds_v8f16 = 0;
221 *lds_v16f16 = 0;
222
223 *lds_f32 = 0;
224 *lds_v2f32 = 0;
225 *lds_v3f32 = 0;
226 *lds_v4f32 = 0;
227 *lds_v8f32 = 0;
228 *lds_v16f32 = 0;
229
230 *lds_f64 = 0;
231 *lds_v2f64 = 0;
232 *lds_v3f64 = 0;
233 *lds_v4f64 = 0;
234 *lds_v8f64 = 0;
235 *lds_v16f64 = 0;
236}
237
238kernel void local_memory_alignment_arg(
239 volatile local char* lds_i8,
240 volatile local char2* lds_v2i8,
241 volatile local char3* lds_v3i8,
242 volatile local char4* lds_v4i8,
243 volatile local char8* lds_v8i8,
244 volatile local char16* lds_v16i8,
245
246 volatile local short* lds_i16,
247 volatile local short2* lds_v2i16,
248 volatile local short3* lds_v3i16,
249 volatile local short4* lds_v4i16,
250 volatile local short8* lds_v8i16,
251 volatile local short16* lds_v16i16,
252
253 volatile local int* lds_i32,
254 volatile local int2* lds_v2i32,
255 volatile local int3* lds_v3i32,
256 volatile local int4* lds_v4i32,
257 volatile local int8* lds_v8i32,
258 volatile local int16* lds_v16i32,
259
260 volatile local long* lds_i64,
261 volatile local long2* lds_v2i64,
262 volatile local long3* lds_v3i64,
263 volatile local long4* lds_v4i64,
264 volatile local long8* lds_v8i64,
265 volatile local long16* lds_v16i64,
266
267 volatile local half* lds_f16,
268 volatile local half2* lds_v2f16,
269 volatile local half3* lds_v3f16,
270 volatile local half4* lds_v4f16,
271 volatile local half8* lds_v8f16,
272 volatile local half16* lds_v16f16,
273
274 volatile local float* lds_f32,
275 volatile local float2* lds_v2f32,
276 volatile local float3* lds_v3f32,
277 volatile local float4* lds_v4f32,
278 volatile local float8* lds_v8f32,
279 volatile local float16* lds_v16f32,
280
281 volatile local double* lds_f64,
282 volatile local double2* lds_v2f64,
283 volatile local double3* lds_v3f64,
284 volatile local double4* lds_v4f64,
285 volatile local double8* lds_v8f64,
286 volatile local double16* lds_v16f64)
287{
288 *lds_i8 = 0;
289 *lds_v2i8 = 0;
290 *lds_v3i8 = 0;
291 *lds_v4i8 = 0;
292 *lds_v8i8 = 0;
293 *lds_v16i8 = 0;
294
295 *lds_i16 = 0;
296 *lds_v2i16 = 0;
297 *lds_v3i16 = 0;
298 *lds_v4i16 = 0;
299 *lds_v8i16 = 0;
300 *lds_v16i16 = 0;
301
302 *lds_i32 = 0;
303 *lds_v2i32 = 0;
304 *lds_v3i32 = 0;
305 *lds_v4i32 = 0;
306 *lds_v8i32 = 0;
307 *lds_v16i32 = 0;
308
309 *lds_i64 = 0;
310 *lds_v2i64 = 0;
311 *lds_v3i64 = 0;
312 *lds_v4i64 = 0;
313 *lds_v8i64 = 0;
314 *lds_v16i64 = 0;
315
316 *lds_f16 = 0;
317 *lds_v2f16 = 0;
318 *lds_v3f16 = 0;
319 *lds_v4f16 = 0;
320 *lds_v8f16 = 0;
321 *lds_v16f16 = 0;
322
323 *lds_f32 = 0;
324 *lds_v2f32 = 0;
325 *lds_v3f32 = 0;
326 *lds_v4f32 = 0;
327 *lds_v8f32 = 0;
328 *lds_v16f32 = 0;
329
330 *lds_f64 = 0;
331 *lds_v2f64 = 0;
332 *lds_v3f64 = 0;
333 *lds_v4f64 = 0;
334 *lds_v8f64 = 0;
335 *lds_v16f64 = 0;
336}
337
338// CHECK-LABEL: @private_memory_alignment_alloca(
Yaxun Liuf5f45e52018-02-02 16:08:24 +0000339// CHECK: %private_i8 = alloca [4 x i8], align 1, addrspace(5)
340// CHECK: %private_v2i8 = alloca [4 x <2 x i8>], align 2, addrspace(5)
341// CHECK: %private_v3i8 = alloca [4 x <3 x i8>], align 4, addrspace(5)
342// CHECK: %private_v4i8 = alloca [4 x <4 x i8>], align 4, addrspace(5)
343// CHECK: %private_v8i8 = alloca [4 x <8 x i8>], align 8, addrspace(5)
344// CHECK: %private_v16i8 = alloca [4 x <16 x i8>], align 16, addrspace(5)
345// CHECK: %private_i16 = alloca [4 x i16], align 2, addrspace(5)
346// CHECK: %private_v2i16 = alloca [4 x <2 x i16>], align 4, addrspace(5)
347// CHECK: %private_v3i16 = alloca [4 x <3 x i16>], align 8, addrspace(5)
348// CHECK: %private_v4i16 = alloca [4 x <4 x i16>], align 8, addrspace(5)
349// CHECK: %private_v8i16 = alloca [4 x <8 x i16>], align 16, addrspace(5)
350// CHECK: %private_v16i16 = alloca [4 x <16 x i16>], align 32, addrspace(5)
351// CHECK: %private_i32 = alloca [4 x i32], align 4, addrspace(5)
352// CHECK: %private_v2i32 = alloca [4 x <2 x i32>], align 8, addrspace(5)
353// CHECK: %private_v3i32 = alloca [4 x <3 x i32>], align 16, addrspace(5)
354// CHECK: %private_v4i32 = alloca [4 x <4 x i32>], align 16, addrspace(5)
355// CHECK: %private_v8i32 = alloca [4 x <8 x i32>], align 32, addrspace(5)
356// CHECK: %private_v16i32 = alloca [4 x <16 x i32>], align 64, addrspace(5)
357// CHECK: %private_i64 = alloca [4 x i64], align 8, addrspace(5)
358// CHECK: %private_v2i64 = alloca [4 x <2 x i64>], align 16, addrspace(5)
359// CHECK: %private_v3i64 = alloca [4 x <3 x i64>], align 32, addrspace(5)
360// CHECK: %private_v4i64 = alloca [4 x <4 x i64>], align 32, addrspace(5)
361// CHECK: %private_v8i64 = alloca [4 x <8 x i64>], align 64, addrspace(5)
362// CHECK: %private_v16i64 = alloca [4 x <16 x i64>], align 128, addrspace(5)
363// CHECK: %private_f16 = alloca [4 x half], align 2, addrspace(5)
364// CHECK: %private_v2f16 = alloca [4 x <2 x half>], align 4, addrspace(5)
365// CHECK: %private_v3f16 = alloca [4 x <3 x half>], align 8, addrspace(5)
366// CHECK: %private_v4f16 = alloca [4 x <4 x half>], align 8, addrspace(5)
367// CHECK: %private_v8f16 = alloca [4 x <8 x half>], align 16, addrspace(5)
368// CHECK: %private_v16f16 = alloca [4 x <16 x half>], align 32, addrspace(5)
369// CHECK: %private_f32 = alloca [4 x float], align 4, addrspace(5)
370// CHECK: %private_v2f32 = alloca [4 x <2 x float>], align 8, addrspace(5)
371// CHECK: %private_v3f32 = alloca [4 x <3 x float>], align 16, addrspace(5)
372// CHECK: %private_v4f32 = alloca [4 x <4 x float>], align 16, addrspace(5)
373// CHECK: %private_v8f32 = alloca [4 x <8 x float>], align 32, addrspace(5)
374// CHECK: %private_v16f32 = alloca [4 x <16 x float>], align 64, addrspace(5)
375// CHECK: %private_f64 = alloca [4 x double], align 8, addrspace(5)
376// CHECK: %private_v2f64 = alloca [4 x <2 x double>], align 16, addrspace(5)
377// CHECK: %private_v3f64 = alloca [4 x <3 x double>], align 32, addrspace(5)
378// CHECK: %private_v4f64 = alloca [4 x <4 x double>], align 32, addrspace(5)
379// CHECK: %private_v8f64 = alloca [4 x <8 x double>], align 64, addrspace(5)
380// CHECK: %private_v16f64 = alloca [4 x <16 x double>], align 128, addrspace(5)
Matt Arsenault77ce5532017-02-07 04:28:02 +0000381
Yaxun Liuf5f45e52018-02-02 16:08:24 +0000382// CHECK: store volatile i8 0, i8 addrspace(5)* %arraydecay, align 1
383// CHECK: store volatile <2 x i8> zeroinitializer, <2 x i8> addrspace(5)* %arraydecay{{[0-9]+}}, align 2
384// CHECK: store volatile <4 x i8> <i8 0, i8 0, i8 0, i8 undef>, <4 x i8> addrspace(5)* %storetmp, align 4
385// CHECK: store volatile <4 x i8> zeroinitializer, <4 x i8> addrspace(5)* %arraydecay{{[0-9]+}}, align 4
386// CHECK: store volatile <8 x i8> zeroinitializer, <8 x i8> addrspace(5)* %arraydecay{{[0-9]+}}, align 8
387// CHECK: store volatile <16 x i8> zeroinitializer, <16 x i8> addrspace(5)* %arraydecay{{[0-9]+}}, align 16
388// CHECK: store volatile i16 0, i16 addrspace(5)* %arraydecay{{[0-9]+}}, align 2
389// CHECK: store volatile <2 x i16> zeroinitializer, <2 x i16> addrspace(5)* %arraydecay{{[0-9]+}}, align 4
390// CHECK: store volatile <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, <4 x i16> addrspace(5)* %storetmp{{[0-9]+}}, align 8
391// CHECK: store volatile <4 x i16> zeroinitializer, <4 x i16> addrspace(5)* %arraydecay{{[0-9]+}}, align 8
392// CHECK: store volatile <8 x i16> zeroinitializer, <8 x i16> addrspace(5)* %arraydecay{{[0-9]+}}, align 16
393// CHECK: store volatile <16 x i16> zeroinitializer, <16 x i16> addrspace(5)* %arraydecay{{[0-9]+}}, align 32
394// CHECK: store volatile i32 0, i32 addrspace(5)* %arraydecay{{[0-9]+}}, align 4
395// CHECK: store volatile <2 x i32> zeroinitializer, <2 x i32> addrspace(5)* %arraydecay{{[0-9]+}}, align 8
396// CHECK: store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, <4 x i32> addrspace(5)* %storetmp16, align 16
397// CHECK: store volatile <4 x i32> zeroinitializer, <4 x i32> addrspace(5)* %arraydecay{{[0-9]+}}, align 16
398// CHECK: store volatile <8 x i32> zeroinitializer, <8 x i32> addrspace(5)* %arraydecay{{[0-9]+}}, align 32
399// CHECK: store volatile <16 x i32> zeroinitializer, <16 x i32> addrspace(5)* %arraydecay{{[0-9]+}}, align 64
400// CHECK: store volatile i64 0, i64 addrspace(5)* %arraydecay{{[0-9]+}}, align 8
401// CHECK: store volatile <2 x i64> zeroinitializer, <2 x i64> addrspace(5)* %arraydecay{{[0-9]+}}, align 16
402// CHECK: store volatile <4 x i64> <i64 0, i64 0, i64 0, i64 undef>, <4 x i64> addrspace(5)* %storetmp23, align 32
403// CHECK: store volatile <4 x i64> zeroinitializer, <4 x i64> addrspace(5)* %arraydecay{{[0-9]+}}, align 32
404// CHECK: store volatile <8 x i64> zeroinitializer, <8 x i64> addrspace(5)* %arraydecay{{[0-9]+}}, align 64
405// CHECK: store volatile <16 x i64> zeroinitializer, <16 x i64> addrspace(5)* %arraydecay{{[0-9]+}}, align 128
406// CHECK: store volatile half 0xH0000, half addrspace(5)* %arraydecay{{[0-9]+}}, align 2
407// CHECK: store volatile <2 x half> zeroinitializer, <2 x half> addrspace(5)* %arraydecay{{[0-9]+}}, align 4
408// CHECK: store volatile <4 x half> <half 0xH0000, half 0xH0000, half 0xH0000, half undef>, <4 x half> addrspace(5)* %storetmp{{[0-9]+}}, align 8
409// CHECK: store volatile <4 x half> zeroinitializer, <4 x half> addrspace(5)* %arraydecay{{[0-9]+}}, align 8
410// CHECK: store volatile <8 x half> zeroinitializer, <8 x half> addrspace(5)* %arraydecay{{[0-9]+}}, align 16
411// CHECK: store volatile <16 x half> zeroinitializer, <16 x half> addrspace(5)* %arraydecay{{[0-9]+}}, align 32
412// CHECK: store volatile float 0.000000e+00, float addrspace(5)* %arraydecay34, align 4
413// CHECK: store volatile <2 x float> zeroinitializer, <2 x float> addrspace(5)* %arraydecay{{[0-9]+}}, align 8
414// CHECK: store volatile <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, <4 x float> addrspace(5)* %storetmp{{[0-9]+}}, align 16
415// CHECK: store volatile <4 x float> zeroinitializer, <4 x float> addrspace(5)* %arraydecay{{[0-9]+}}, align 16
416// CHECK: store volatile <8 x float> zeroinitializer, <8 x float> addrspace(5)* %arraydecay{{[0-9]+}}, align 32
417// CHECK: store volatile <16 x float> zeroinitializer, <16 x float> addrspace(5)* %arraydecay{{[0-9]+}}, align 64
418// CHECK: store volatile double 0.000000e+00, double addrspace(5)* %arraydecay{{[0-9]+}}, align 8
419// CHECK: store volatile <2 x double> zeroinitializer, <2 x double> addrspace(5)* %arraydecay{{[0-9]+}}, align 16
420// CHECK: store volatile <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double undef>, <4 x double> addrspace(5)* %storetmp{{[0-9]+}}, align 32
421// CHECK: store volatile <4 x double> zeroinitializer, <4 x double> addrspace(5)* %arraydecay{{[0-9]+}}, align 32
422// CHECK: store volatile <8 x double> zeroinitializer, <8 x double> addrspace(5)* %arraydecay{{[0-9]+}}, align 64
423// CHECK: store volatile <16 x double> zeroinitializer, <16 x double> addrspace(5)* %arraydecay{{[0-9]+}}, align 128
Matt Arsenault77ce5532017-02-07 04:28:02 +0000424kernel void private_memory_alignment_alloca()
425{
426 volatile private char private_i8[4];
427 volatile private char2 private_v2i8[4];
428 volatile private char3 private_v3i8[4];
429 volatile private char4 private_v4i8[4];
430 volatile private char8 private_v8i8[4];
431 volatile private char16 private_v16i8[4];
432
433 volatile private short private_i16[4];
434 volatile private short2 private_v2i16[4];
435 volatile private short3 private_v3i16[4];
436 volatile private short4 private_v4i16[4];
437 volatile private short8 private_v8i16[4];
438 volatile private short16 private_v16i16[4];
439
440 volatile private int private_i32[4];
441 volatile private int2 private_v2i32[4];
442 volatile private int3 private_v3i32[4];
443 volatile private int4 private_v4i32[4];
444 volatile private int8 private_v8i32[4];
445 volatile private int16 private_v16i32[4];
446
447 volatile private long private_i64[4];
448 volatile private long2 private_v2i64[4];
449 volatile private long3 private_v3i64[4];
450 volatile private long4 private_v4i64[4];
451 volatile private long8 private_v8i64[4];
452 volatile private long16 private_v16i64[4];
453
454 volatile private half private_f16[4];
455 volatile private half2 private_v2f16[4];
456 volatile private half3 private_v3f16[4];
457 volatile private half4 private_v4f16[4];
458 volatile private half8 private_v8f16[4];
459 volatile private half16 private_v16f16[4];
460
461 volatile private float private_f32[4];
462 volatile private float2 private_v2f32[4];
463 volatile private float3 private_v3f32[4];
464 volatile private float4 private_v4f32[4];
465 volatile private float8 private_v8f32[4];
466 volatile private float16 private_v16f32[4];
467
468 volatile private double private_f64[4];
469 volatile private double2 private_v2f64[4];
470 volatile private double3 private_v3f64[4];
471 volatile private double4 private_v4f64[4];
472 volatile private double8 private_v8f64[4];
473 volatile private double16 private_v16f64[4];
474
475 *private_i8 = 0;
476 *private_v2i8 = 0;
477 *private_v3i8 = 0;
478 *private_v4i8 = 0;
479 *private_v8i8 = 0;
480 *private_v16i8 = 0;
481
482 *private_i16 = 0;
483 *private_v2i16 = 0;
484 *private_v3i16 = 0;
485 *private_v4i16 = 0;
486 *private_v8i16 = 0;
487 *private_v16i16 = 0;
488
489 *private_i32 = 0;
490 *private_v2i32 = 0;
491 *private_v3i32 = 0;
492 *private_v4i32 = 0;
493 *private_v8i32 = 0;
494 *private_v16i32 = 0;
495
496 *private_i64 = 0;
497 *private_v2i64 = 0;
498 *private_v3i64 = 0;
499 *private_v4i64 = 0;
500 *private_v8i64 = 0;
501 *private_v16i64 = 0;
502
503 *private_f16 = 0;
504 *private_v2f16 = 0;
505 *private_v3f16 = 0;
506 *private_v4f16 = 0;
507 *private_v8f16 = 0;
508 *private_v16f16 = 0;
509
510 *private_f32 = 0;
511 *private_v2f32 = 0;
512 *private_v3f32 = 0;
513 *private_v4f32 = 0;
514 *private_v8f32 = 0;
515 *private_v16f32 = 0;
516
517 *private_f64 = 0;
518 *private_v2f64 = 0;
519 *private_v3f64 = 0;
520 *private_v4f64 = 0;
521 *private_v8f64 = 0;
522 *private_v16f64 = 0;
523}