Matt Arsenault | 77ce553 | 2017-02-07 04:28:02 +0000 | [diff] [blame] | 1 | // REQUIRES: amdgpu-registered-target |
Yaxun Liu | af3d4db | 2017-05-23 16:15:53 +0000 | [diff] [blame] | 2 | // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s |
Yaxun Liu | 6d96f163 | 2017-05-18 18:51:09 +0000 | [diff] [blame] | 3 | // RUN: %clang_cc1 -triple amdgcn-unknown-unknown-opencl -S -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s |
Matt Arsenault | 77ce553 | 2017-02-07 04:28:02 +0000 | [diff] [blame] | 4 | |
| 5 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable |
| 6 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable |
| 7 | |
| 8 | typedef char __attribute__((ext_vector_type(2))) char2; |
| 9 | typedef char __attribute__((ext_vector_type(3))) char3; |
| 10 | typedef char __attribute__((ext_vector_type(4))) char4; |
| 11 | typedef char __attribute__((ext_vector_type(8))) char8; |
| 12 | typedef char __attribute__((ext_vector_type(16))) char16; |
| 13 | |
| 14 | typedef short __attribute__((ext_vector_type(2))) short2; |
| 15 | typedef short __attribute__((ext_vector_type(3))) short3; |
| 16 | typedef short __attribute__((ext_vector_type(4))) short4; |
| 17 | typedef short __attribute__((ext_vector_type(8))) short8; |
| 18 | typedef short __attribute__((ext_vector_type(16))) short16; |
| 19 | |
| 20 | typedef int __attribute__((ext_vector_type(2))) int2; |
| 21 | typedef int __attribute__((ext_vector_type(3))) int3; |
| 22 | typedef int __attribute__((ext_vector_type(4))) int4; |
| 23 | typedef int __attribute__((ext_vector_type(8))) int8; |
| 24 | typedef int __attribute__((ext_vector_type(16))) int16; |
| 25 | |
| 26 | typedef long __attribute__((ext_vector_type(2))) long2; |
| 27 | typedef long __attribute__((ext_vector_type(3))) long3; |
| 28 | typedef long __attribute__((ext_vector_type(4))) long4; |
| 29 | typedef long __attribute__((ext_vector_type(8))) long8; |
| 30 | typedef long __attribute__((ext_vector_type(16))) long16; |
| 31 | |
| 32 | typedef half __attribute__((ext_vector_type(2))) half2; |
| 33 | typedef half __attribute__((ext_vector_type(3))) half3; |
| 34 | typedef half __attribute__((ext_vector_type(4))) half4; |
| 35 | typedef half __attribute__((ext_vector_type(8))) half8; |
| 36 | typedef half __attribute__((ext_vector_type(16))) half16; |
| 37 | |
| 38 | typedef float __attribute__((ext_vector_type(2))) float2; |
| 39 | typedef float __attribute__((ext_vector_type(3))) float3; |
| 40 | typedef float __attribute__((ext_vector_type(4))) float4; |
| 41 | typedef float __attribute__((ext_vector_type(8))) float8; |
| 42 | typedef float __attribute__((ext_vector_type(16))) float16; |
| 43 | |
| 44 | typedef double __attribute__((ext_vector_type(2))) double2; |
| 45 | typedef double __attribute__((ext_vector_type(3))) double3; |
| 46 | typedef double __attribute__((ext_vector_type(4))) double4; |
| 47 | typedef double __attribute__((ext_vector_type(8))) double8; |
| 48 | typedef double __attribute__((ext_vector_type(16))) double16; |
| 49 | |
| 50 | // CHECK: @local_memory_alignment_global.lds_i8 = internal addrspace(3) global [4 x i8] undef, align 1 |
| 51 | // CHECK: @local_memory_alignment_global.lds_v2i8 = internal addrspace(3) global [4 x <2 x i8>] undef, align 2 |
| 52 | // CHECK: @local_memory_alignment_global.lds_v3i8 = internal addrspace(3) global [4 x <3 x i8>] undef, align 4 |
| 53 | // CHECK: @local_memory_alignment_global.lds_v4i8 = internal addrspace(3) global [4 x <4 x i8>] undef, align 4 |
| 54 | // CHECK: @local_memory_alignment_global.lds_v8i8 = internal addrspace(3) global [4 x <8 x i8>] undef, align 8 |
| 55 | // CHECK: @local_memory_alignment_global.lds_v16i8 = internal addrspace(3) global [4 x <16 x i8>] undef, align 16 |
| 56 | // CHECK: @local_memory_alignment_global.lds_i16 = internal addrspace(3) global [4 x i16] undef, align 2 |
| 57 | // CHECK: @local_memory_alignment_global.lds_v2i16 = internal addrspace(3) global [4 x <2 x i16>] undef, align 4 |
| 58 | // CHECK: @local_memory_alignment_global.lds_v3i16 = internal addrspace(3) global [4 x <3 x i16>] undef, align 8 |
| 59 | // CHECK: @local_memory_alignment_global.lds_v4i16 = internal addrspace(3) global [4 x <4 x i16>] undef, align 8 |
| 60 | // CHECK: @local_memory_alignment_global.lds_v8i16 = internal addrspace(3) global [4 x <8 x i16>] undef, align 16 |
| 61 | // CHECK: @local_memory_alignment_global.lds_v16i16 = internal addrspace(3) global [4 x <16 x i16>] undef, align 32 |
| 62 | // CHECK: @local_memory_alignment_global.lds_i32 = internal addrspace(3) global [4 x i32] undef, align 4 |
| 63 | // CHECK: @local_memory_alignment_global.lds_v2i32 = internal addrspace(3) global [4 x <2 x i32>] undef, align 8 |
| 64 | // CHECK: @local_memory_alignment_global.lds_v3i32 = internal addrspace(3) global [4 x <3 x i32>] undef, align 16 |
| 65 | // CHECK: @local_memory_alignment_global.lds_v4i32 = internal addrspace(3) global [4 x <4 x i32>] undef, align 16 |
| 66 | // CHECK: @local_memory_alignment_global.lds_v8i32 = internal addrspace(3) global [4 x <8 x i32>] undef, align 32 |
| 67 | // CHECK: @local_memory_alignment_global.lds_v16i32 = internal addrspace(3) global [4 x <16 x i32>] undef, align 64 |
| 68 | // CHECK: @local_memory_alignment_global.lds_i64 = internal addrspace(3) global [4 x i64] undef, align 8 |
| 69 | // CHECK: @local_memory_alignment_global.lds_v2i64 = internal addrspace(3) global [4 x <2 x i64>] undef, align 16 |
| 70 | // CHECK: @local_memory_alignment_global.lds_v3i64 = internal addrspace(3) global [4 x <3 x i64>] undef, align 32 |
| 71 | // CHECK: @local_memory_alignment_global.lds_v4i64 = internal addrspace(3) global [4 x <4 x i64>] undef, align 32 |
| 72 | // CHECK: @local_memory_alignment_global.lds_v8i64 = internal addrspace(3) global [4 x <8 x i64>] undef, align 64 |
| 73 | // CHECK: @local_memory_alignment_global.lds_v16i64 = internal addrspace(3) global [4 x <16 x i64>] undef, align 128 |
| 74 | // CHECK: @local_memory_alignment_global.lds_f16 = internal addrspace(3) global [4 x half] undef, align 2 |
| 75 | // CHECK: @local_memory_alignment_global.lds_v2f16 = internal addrspace(3) global [4 x <2 x half>] undef, align 4 |
| 76 | // CHECK: @local_memory_alignment_global.lds_v3f16 = internal addrspace(3) global [4 x <3 x half>] undef, align 8 |
| 77 | // CHECK: @local_memory_alignment_global.lds_v4f16 = internal addrspace(3) global [4 x <4 x half>] undef, align 8 |
| 78 | // CHECK: @local_memory_alignment_global.lds_v8f16 = internal addrspace(3) global [4 x <8 x half>] undef, align 16 |
| 79 | // CHECK: @local_memory_alignment_global.lds_v16f16 = internal addrspace(3) global [4 x <16 x half>] undef, align 32 |
| 80 | // CHECK: @local_memory_alignment_global.lds_f32 = internal addrspace(3) global [4 x float] undef, align 4 |
| 81 | // CHECK: @local_memory_alignment_global.lds_v2f32 = internal addrspace(3) global [4 x <2 x float>] undef, align 8 |
| 82 | // CHECK: @local_memory_alignment_global.lds_v3f32 = internal addrspace(3) global [4 x <3 x float>] undef, align 16 |
| 83 | // CHECK: @local_memory_alignment_global.lds_v4f32 = internal addrspace(3) global [4 x <4 x float>] undef, align 16 |
| 84 | // CHECK: @local_memory_alignment_global.lds_v8f32 = internal addrspace(3) global [4 x <8 x float>] undef, align 32 |
| 85 | // CHECK: @local_memory_alignment_global.lds_v16f32 = internal addrspace(3) global [4 x <16 x float>] undef, align 64 |
| 86 | // CHECK: @local_memory_alignment_global.lds_f64 = internal addrspace(3) global [4 x double] undef, align 8 |
| 87 | // CHECK: @local_memory_alignment_global.lds_v2f64 = internal addrspace(3) global [4 x <2 x double>] undef, align 16 |
| 88 | // CHECK: @local_memory_alignment_global.lds_v3f64 = internal addrspace(3) global [4 x <3 x double>] undef, align 32 |
| 89 | // CHECK: @local_memory_alignment_global.lds_v4f64 = internal addrspace(3) global [4 x <4 x double>] undef, align 32 |
| 90 | // CHECK: @local_memory_alignment_global.lds_v8f64 = internal addrspace(3) global [4 x <8 x double>] undef, align 64 |
| 91 | // CHECK: @local_memory_alignment_global.lds_v16f64 = internal addrspace(3) global [4 x <16 x double>] undef, align 128 |
| 92 | |
| 93 | |
| 94 | // CHECK-LABEL: @local_memory_alignment_global( |
| 95 | // CHECK: store volatile i8 0, i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @local_memory_alignment_global.lds_i8, i32 0, i32 0), align 1 |
| 96 | // CHECK: store volatile <2 x i8> zeroinitializer, <2 x i8> addrspace(3)* getelementptr inbounds ([4 x <2 x i8>], [4 x <2 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v2i8, i32 0, i32 0), align 2 |
| 97 | // CHECK: store volatile <4 x i8> <i8 0, i8 0, i8 0, i8 undef>, <4 x i8> addrspace(3)* bitcast ([4 x <3 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v3i8 to <4 x i8> addrspace(3)*), align 4 |
| 98 | // CHECK: store volatile <4 x i8> zeroinitializer, <4 x i8> addrspace(3)* getelementptr inbounds ([4 x <4 x i8>], [4 x <4 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v4i8, i32 0, i32 0), align 4 |
| 99 | // CHECK: store volatile <8 x i8> zeroinitializer, <8 x i8> addrspace(3)* getelementptr inbounds ([4 x <8 x i8>], [4 x <8 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v8i8, i32 0, i32 0), align 8 |
| 100 | // CHECK: store volatile <16 x i8> zeroinitializer, <16 x i8> addrspace(3)* getelementptr inbounds ([4 x <16 x i8>], [4 x <16 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v16i8, i32 0, i32 0), align 16 |
| 101 | // CHECK: store volatile i16 0, i16 addrspace(3)* getelementptr inbounds ([4 x i16], [4 x i16] addrspace(3)* @local_memory_alignment_global.lds_i16, i32 0, i32 0), align 2 |
| 102 | // CHECK: store volatile <2 x i16> zeroinitializer, <2 x i16> addrspace(3)* getelementptr inbounds ([4 x <2 x i16>], [4 x <2 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v2i16, i32 0, i32 0), align 4 |
| 103 | // CHECK: store volatile <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, <4 x i16> addrspace(3)* bitcast ([4 x <3 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v3i16 to <4 x i16> addrspace(3)*), align 8 |
| 104 | // CHECK: store volatile <4 x i16> zeroinitializer, <4 x i16> addrspace(3)* getelementptr inbounds ([4 x <4 x i16>], [4 x <4 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v4i16, i32 0, i32 0), align 8 |
| 105 | // CHECK: store volatile <8 x i16> zeroinitializer, <8 x i16> addrspace(3)* getelementptr inbounds ([4 x <8 x i16>], [4 x <8 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v8i16, i32 0, i32 0), align 16 |
| 106 | // CHECK: store volatile <16 x i16> zeroinitializer, <16 x i16> addrspace(3)* getelementptr inbounds ([4 x <16 x i16>], [4 x <16 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v16i16, i32 0, i32 0), align 32 |
| 107 | // CHECK: store volatile i32 0, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @local_memory_alignment_global.lds_i32, i32 0, i32 0), align 4 |
| 108 | // CHECK: store volatile <2 x i32> zeroinitializer, <2 x i32> addrspace(3)* getelementptr inbounds ([4 x <2 x i32>], [4 x <2 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v2i32, i32 0, i32 0), align 8 |
| 109 | // CHECK: store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, <4 x i32> addrspace(3)* bitcast ([4 x <3 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v3i32 to <4 x i32> addrspace(3)*), align 16 |
| 110 | // CHECK: store volatile <4 x i32> zeroinitializer, <4 x i32> addrspace(3)* getelementptr inbounds ([4 x <4 x i32>], [4 x <4 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v4i32, i32 0, i32 0), align 16 |
| 111 | // CHECK: store volatile <8 x i32> zeroinitializer, <8 x i32> addrspace(3)* getelementptr inbounds ([4 x <8 x i32>], [4 x <8 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v8i32, i32 0, i32 0), align 32 |
| 112 | // CHECK: store volatile <16 x i32> zeroinitializer, <16 x i32> addrspace(3)* getelementptr inbounds ([4 x <16 x i32>], [4 x <16 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v16i32, i32 0, i32 0), align 64 |
| 113 | // CHECK: store volatile i64 0, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @local_memory_alignment_global.lds_i64, i32 0, i32 0), align 8 |
| 114 | // CHECK: store volatile <2 x i64> zeroinitializer, <2 x i64> addrspace(3)* getelementptr inbounds ([4 x <2 x i64>], [4 x <2 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v2i64, i32 0, i32 0), align 16 |
| 115 | // CHECK: store volatile <4 x i64> <i64 0, i64 0, i64 0, i64 undef>, <4 x i64> addrspace(3)* bitcast ([4 x <3 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v3i64 to <4 x i64> addrspace(3)*), align 32 |
| 116 | // CHECK: store volatile <4 x i64> zeroinitializer, <4 x i64> addrspace(3)* getelementptr inbounds ([4 x <4 x i64>], [4 x <4 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v4i64, i32 0, i32 0), align 32 |
| 117 | // CHECK: store volatile <8 x i64> zeroinitializer, <8 x i64> addrspace(3)* getelementptr inbounds ([4 x <8 x i64>], [4 x <8 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v8i64, i32 0, i32 0), align 64 |
| 118 | // CHECK: store volatile <16 x i64> zeroinitializer, <16 x i64> addrspace(3)* getelementptr inbounds ([4 x <16 x i64>], [4 x <16 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v16i64, i32 0, i32 0), align 128 |
| 119 | // CHECK: store volatile half 0xH0000, half addrspace(3)* getelementptr inbounds ([4 x half], [4 x half] addrspace(3)* @local_memory_alignment_global.lds_f16, i32 0, i32 0), align 2 |
| 120 | // CHECK: store volatile <2 x half> zeroinitializer, <2 x half> addrspace(3)* getelementptr inbounds ([4 x <2 x half>], [4 x <2 x half>] addrspace(3)* @local_memory_alignment_global.lds_v2f16, i32 0, i32 0), align 4 |
| 121 | // CHECK: store volatile <4 x half> <half 0xH0000, half 0xH0000, half 0xH0000, half undef>, <4 x half> addrspace(3)* bitcast ([4 x <3 x half>] addrspace(3)* @local_memory_alignment_global.lds_v3f16 to <4 x half> addrspace(3)*), align 8 |
| 122 | // CHECK: store volatile <4 x half> zeroinitializer, <4 x half> addrspace(3)* getelementptr inbounds ([4 x <4 x half>], [4 x <4 x half>] addrspace(3)* @local_memory_alignment_global.lds_v4f16, i32 0, i32 0), align 8 |
| 123 | // CHECK: store volatile <8 x half> zeroinitializer, <8 x half> addrspace(3)* getelementptr inbounds ([4 x <8 x half>], [4 x <8 x half>] addrspace(3)* @local_memory_alignment_global.lds_v8f16, i32 0, i32 0), align 16 |
| 124 | // CHECK: store volatile <16 x half> zeroinitializer, <16 x half> addrspace(3)* getelementptr inbounds ([4 x <16 x half>], [4 x <16 x half>] addrspace(3)* @local_memory_alignment_global.lds_v16f16, i32 0, i32 0), align 32 |
| 125 | // CHECK: store volatile float 0.000000e+00, float addrspace(3)* getelementptr inbounds ([4 x float], [4 x float] addrspace(3)* @local_memory_alignment_global.lds_f32, i32 0, i32 0), align 4 |
| 126 | // CHECK: store volatile <2 x float> zeroinitializer, <2 x float> addrspace(3)* getelementptr inbounds ([4 x <2 x float>], [4 x <2 x float>] addrspace(3)* @local_memory_alignment_global.lds_v2f32, i32 0, i32 0), align 8 |
| 127 | // CHECK: store volatile <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, <4 x float> addrspace(3)* bitcast ([4 x <3 x float>] addrspace(3)* @local_memory_alignment_global.lds_v3f32 to <4 x float> addrspace(3)*), align 16 |
| 128 | // CHECK: store volatile <4 x float> zeroinitializer, <4 x float> addrspace(3)* getelementptr inbounds ([4 x <4 x float>], [4 x <4 x float>] addrspace(3)* @local_memory_alignment_global.lds_v4f32, i32 0, i32 0), align 16 |
| 129 | // CHECK: store volatile <8 x float> zeroinitializer, <8 x float> addrspace(3)* getelementptr inbounds ([4 x <8 x float>], [4 x <8 x float>] addrspace(3)* @local_memory_alignment_global.lds_v8f32, i32 0, i32 0), align 32 |
| 130 | // CHECK: store volatile <16 x float> zeroinitializer, <16 x float> addrspace(3)* getelementptr inbounds ([4 x <16 x float>], [4 x <16 x float>] addrspace(3)* @local_memory_alignment_global.lds_v16f32, i32 0, i32 0), align 64 |
| 131 | // CHECK: store volatile double 0.000000e+00, double addrspace(3)* getelementptr inbounds ([4 x double], [4 x double] addrspace(3)* @local_memory_alignment_global.lds_f64, i32 0, i32 0), align 8 |
| 132 | // CHECK: store volatile <2 x double> zeroinitializer, <2 x double> addrspace(3)* getelementptr inbounds ([4 x <2 x double>], [4 x <2 x double>] addrspace(3)* @local_memory_alignment_global.lds_v2f64, i32 0, i32 0), align 16 |
| 133 | // CHECK: store volatile <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double undef>, <4 x double> addrspace(3)* bitcast ([4 x <3 x double>] addrspace(3)* @local_memory_alignment_global.lds_v3f64 to <4 x double> addrspace(3)*), align 32 |
| 134 | // CHECK: store volatile <4 x double> zeroinitializer, <4 x double> addrspace(3)* getelementptr inbounds ([4 x <4 x double>], [4 x <4 x double>] addrspace(3)* @local_memory_alignment_global.lds_v4f64, i32 0, i32 0), align 32 |
| 135 | // CHECK: store volatile <8 x double> zeroinitializer, <8 x double> addrspace(3)* getelementptr inbounds ([4 x <8 x double>], [4 x <8 x double>] addrspace(3)* @local_memory_alignment_global.lds_v8f64, i32 0, i32 0), align 64 |
| 136 | // CHECK: store volatile <16 x double> zeroinitializer, <16 x double> addrspace(3)* getelementptr inbounds ([4 x <16 x double>], [4 x <16 x double>] addrspace(3)* @local_memory_alignment_global.lds_v16f64, i32 0, i32 0), align 128 |
| 137 | kernel void local_memory_alignment_global() |
| 138 | { |
| 139 | volatile local char lds_i8[4]; |
| 140 | volatile local char2 lds_v2i8[4]; |
| 141 | volatile local char3 lds_v3i8[4]; |
| 142 | volatile local char4 lds_v4i8[4]; |
| 143 | volatile local char8 lds_v8i8[4]; |
| 144 | volatile local char16 lds_v16i8[4]; |
| 145 | |
| 146 | volatile local short lds_i16[4]; |
| 147 | volatile local short2 lds_v2i16[4]; |
| 148 | volatile local short3 lds_v3i16[4]; |
| 149 | volatile local short4 lds_v4i16[4]; |
| 150 | volatile local short8 lds_v8i16[4]; |
| 151 | volatile local short16 lds_v16i16[4]; |
| 152 | |
| 153 | volatile local int lds_i32[4]; |
| 154 | volatile local int2 lds_v2i32[4]; |
| 155 | volatile local int3 lds_v3i32[4]; |
| 156 | volatile local int4 lds_v4i32[4]; |
| 157 | volatile local int8 lds_v8i32[4]; |
| 158 | volatile local int16 lds_v16i32[4]; |
| 159 | |
| 160 | volatile local long lds_i64[4]; |
| 161 | volatile local long2 lds_v2i64[4]; |
| 162 | volatile local long3 lds_v3i64[4]; |
| 163 | volatile local long4 lds_v4i64[4]; |
| 164 | volatile local long8 lds_v8i64[4]; |
| 165 | volatile local long16 lds_v16i64[4]; |
| 166 | |
| 167 | volatile local half lds_f16[4]; |
| 168 | volatile local half2 lds_v2f16[4]; |
| 169 | volatile local half3 lds_v3f16[4]; |
| 170 | volatile local half4 lds_v4f16[4]; |
| 171 | volatile local half8 lds_v8f16[4]; |
| 172 | volatile local half16 lds_v16f16[4]; |
| 173 | |
| 174 | volatile local float lds_f32[4]; |
| 175 | volatile local float2 lds_v2f32[4]; |
| 176 | volatile local float3 lds_v3f32[4]; |
| 177 | volatile local float4 lds_v4f32[4]; |
| 178 | volatile local float8 lds_v8f32[4]; |
| 179 | volatile local float16 lds_v16f32[4]; |
| 180 | |
| 181 | volatile local double lds_f64[4]; |
| 182 | volatile local double2 lds_v2f64[4]; |
| 183 | volatile local double3 lds_v3f64[4]; |
| 184 | volatile local double4 lds_v4f64[4]; |
| 185 | volatile local double8 lds_v8f64[4]; |
| 186 | volatile local double16 lds_v16f64[4]; |
| 187 | |
| 188 | *lds_i8 = 0; |
| 189 | *lds_v2i8 = 0; |
| 190 | *lds_v3i8 = 0; |
| 191 | *lds_v4i8 = 0; |
| 192 | *lds_v8i8 = 0; |
| 193 | *lds_v16i8 = 0; |
| 194 | |
| 195 | *lds_i16 = 0; |
| 196 | *lds_v2i16 = 0; |
| 197 | *lds_v3i16 = 0; |
| 198 | *lds_v4i16 = 0; |
| 199 | *lds_v8i16 = 0; |
| 200 | *lds_v16i16 = 0; |
| 201 | |
| 202 | *lds_i32 = 0; |
| 203 | *lds_v2i32 = 0; |
| 204 | *lds_v3i32 = 0; |
| 205 | *lds_v4i32 = 0; |
| 206 | *lds_v8i32 = 0; |
| 207 | *lds_v16i32 = 0; |
| 208 | |
| 209 | *lds_i64 = 0; |
| 210 | *lds_v2i64 = 0; |
| 211 | *lds_v3i64 = 0; |
| 212 | *lds_v4i64 = 0; |
| 213 | *lds_v8i64 = 0; |
| 214 | *lds_v16i64 = 0; |
| 215 | |
| 216 | *lds_f16 = 0; |
| 217 | *lds_v2f16 = 0; |
| 218 | *lds_v3f16 = 0; |
| 219 | *lds_v4f16 = 0; |
| 220 | *lds_v8f16 = 0; |
| 221 | *lds_v16f16 = 0; |
| 222 | |
| 223 | *lds_f32 = 0; |
| 224 | *lds_v2f32 = 0; |
| 225 | *lds_v3f32 = 0; |
| 226 | *lds_v4f32 = 0; |
| 227 | *lds_v8f32 = 0; |
| 228 | *lds_v16f32 = 0; |
| 229 | |
| 230 | *lds_f64 = 0; |
| 231 | *lds_v2f64 = 0; |
| 232 | *lds_v3f64 = 0; |
| 233 | *lds_v4f64 = 0; |
| 234 | *lds_v8f64 = 0; |
| 235 | *lds_v16f64 = 0; |
| 236 | } |
| 237 | |
| 238 | kernel void local_memory_alignment_arg( |
| 239 | volatile local char* lds_i8, |
| 240 | volatile local char2* lds_v2i8, |
| 241 | volatile local char3* lds_v3i8, |
| 242 | volatile local char4* lds_v4i8, |
| 243 | volatile local char8* lds_v8i8, |
| 244 | volatile local char16* lds_v16i8, |
| 245 | |
| 246 | volatile local short* lds_i16, |
| 247 | volatile local short2* lds_v2i16, |
| 248 | volatile local short3* lds_v3i16, |
| 249 | volatile local short4* lds_v4i16, |
| 250 | volatile local short8* lds_v8i16, |
| 251 | volatile local short16* lds_v16i16, |
| 252 | |
| 253 | volatile local int* lds_i32, |
| 254 | volatile local int2* lds_v2i32, |
| 255 | volatile local int3* lds_v3i32, |
| 256 | volatile local int4* lds_v4i32, |
| 257 | volatile local int8* lds_v8i32, |
| 258 | volatile local int16* lds_v16i32, |
| 259 | |
| 260 | volatile local long* lds_i64, |
| 261 | volatile local long2* lds_v2i64, |
| 262 | volatile local long3* lds_v3i64, |
| 263 | volatile local long4* lds_v4i64, |
| 264 | volatile local long8* lds_v8i64, |
| 265 | volatile local long16* lds_v16i64, |
| 266 | |
| 267 | volatile local half* lds_f16, |
| 268 | volatile local half2* lds_v2f16, |
| 269 | volatile local half3* lds_v3f16, |
| 270 | volatile local half4* lds_v4f16, |
| 271 | volatile local half8* lds_v8f16, |
| 272 | volatile local half16* lds_v16f16, |
| 273 | |
| 274 | volatile local float* lds_f32, |
| 275 | volatile local float2* lds_v2f32, |
| 276 | volatile local float3* lds_v3f32, |
| 277 | volatile local float4* lds_v4f32, |
| 278 | volatile local float8* lds_v8f32, |
| 279 | volatile local float16* lds_v16f32, |
| 280 | |
| 281 | volatile local double* lds_f64, |
| 282 | volatile local double2* lds_v2f64, |
| 283 | volatile local double3* lds_v3f64, |
| 284 | volatile local double4* lds_v4f64, |
| 285 | volatile local double8* lds_v8f64, |
| 286 | volatile local double16* lds_v16f64) |
| 287 | { |
| 288 | *lds_i8 = 0; |
| 289 | *lds_v2i8 = 0; |
| 290 | *lds_v3i8 = 0; |
| 291 | *lds_v4i8 = 0; |
| 292 | *lds_v8i8 = 0; |
| 293 | *lds_v16i8 = 0; |
| 294 | |
| 295 | *lds_i16 = 0; |
| 296 | *lds_v2i16 = 0; |
| 297 | *lds_v3i16 = 0; |
| 298 | *lds_v4i16 = 0; |
| 299 | *lds_v8i16 = 0; |
| 300 | *lds_v16i16 = 0; |
| 301 | |
| 302 | *lds_i32 = 0; |
| 303 | *lds_v2i32 = 0; |
| 304 | *lds_v3i32 = 0; |
| 305 | *lds_v4i32 = 0; |
| 306 | *lds_v8i32 = 0; |
| 307 | *lds_v16i32 = 0; |
| 308 | |
| 309 | *lds_i64 = 0; |
| 310 | *lds_v2i64 = 0; |
| 311 | *lds_v3i64 = 0; |
| 312 | *lds_v4i64 = 0; |
| 313 | *lds_v8i64 = 0; |
| 314 | *lds_v16i64 = 0; |
| 315 | |
| 316 | *lds_f16 = 0; |
| 317 | *lds_v2f16 = 0; |
| 318 | *lds_v3f16 = 0; |
| 319 | *lds_v4f16 = 0; |
| 320 | *lds_v8f16 = 0; |
| 321 | *lds_v16f16 = 0; |
| 322 | |
| 323 | *lds_f32 = 0; |
| 324 | *lds_v2f32 = 0; |
| 325 | *lds_v3f32 = 0; |
| 326 | *lds_v4f32 = 0; |
| 327 | *lds_v8f32 = 0; |
| 328 | *lds_v16f32 = 0; |
| 329 | |
| 330 | *lds_f64 = 0; |
| 331 | *lds_v2f64 = 0; |
| 332 | *lds_v3f64 = 0; |
| 333 | *lds_v4f64 = 0; |
| 334 | *lds_v8f64 = 0; |
| 335 | *lds_v16f64 = 0; |
| 336 | } |
| 337 | |
| 338 | // CHECK-LABEL: @private_memory_alignment_alloca( |
Yaxun Liu | f5f45e5 | 2018-02-02 16:08:24 +0000 | [diff] [blame] | 339 | // CHECK: %private_i8 = alloca [4 x i8], align 1, addrspace(5) |
| 340 | // CHECK: %private_v2i8 = alloca [4 x <2 x i8>], align 2, addrspace(5) |
| 341 | // CHECK: %private_v3i8 = alloca [4 x <3 x i8>], align 4, addrspace(5) |
| 342 | // CHECK: %private_v4i8 = alloca [4 x <4 x i8>], align 4, addrspace(5) |
| 343 | // CHECK: %private_v8i8 = alloca [4 x <8 x i8>], align 8, addrspace(5) |
| 344 | // CHECK: %private_v16i8 = alloca [4 x <16 x i8>], align 16, addrspace(5) |
| 345 | // CHECK: %private_i16 = alloca [4 x i16], align 2, addrspace(5) |
| 346 | // CHECK: %private_v2i16 = alloca [4 x <2 x i16>], align 4, addrspace(5) |
| 347 | // CHECK: %private_v3i16 = alloca [4 x <3 x i16>], align 8, addrspace(5) |
| 348 | // CHECK: %private_v4i16 = alloca [4 x <4 x i16>], align 8, addrspace(5) |
| 349 | // CHECK: %private_v8i16 = alloca [4 x <8 x i16>], align 16, addrspace(5) |
| 350 | // CHECK: %private_v16i16 = alloca [4 x <16 x i16>], align 32, addrspace(5) |
| 351 | // CHECK: %private_i32 = alloca [4 x i32], align 4, addrspace(5) |
| 352 | // CHECK: %private_v2i32 = alloca [4 x <2 x i32>], align 8, addrspace(5) |
| 353 | // CHECK: %private_v3i32 = alloca [4 x <3 x i32>], align 16, addrspace(5) |
| 354 | // CHECK: %private_v4i32 = alloca [4 x <4 x i32>], align 16, addrspace(5) |
| 355 | // CHECK: %private_v8i32 = alloca [4 x <8 x i32>], align 32, addrspace(5) |
| 356 | // CHECK: %private_v16i32 = alloca [4 x <16 x i32>], align 64, addrspace(5) |
| 357 | // CHECK: %private_i64 = alloca [4 x i64], align 8, addrspace(5) |
| 358 | // CHECK: %private_v2i64 = alloca [4 x <2 x i64>], align 16, addrspace(5) |
| 359 | // CHECK: %private_v3i64 = alloca [4 x <3 x i64>], align 32, addrspace(5) |
| 360 | // CHECK: %private_v4i64 = alloca [4 x <4 x i64>], align 32, addrspace(5) |
| 361 | // CHECK: %private_v8i64 = alloca [4 x <8 x i64>], align 64, addrspace(5) |
| 362 | // CHECK: %private_v16i64 = alloca [4 x <16 x i64>], align 128, addrspace(5) |
| 363 | // CHECK: %private_f16 = alloca [4 x half], align 2, addrspace(5) |
| 364 | // CHECK: %private_v2f16 = alloca [4 x <2 x half>], align 4, addrspace(5) |
| 365 | // CHECK: %private_v3f16 = alloca [4 x <3 x half>], align 8, addrspace(5) |
| 366 | // CHECK: %private_v4f16 = alloca [4 x <4 x half>], align 8, addrspace(5) |
| 367 | // CHECK: %private_v8f16 = alloca [4 x <8 x half>], align 16, addrspace(5) |
| 368 | // CHECK: %private_v16f16 = alloca [4 x <16 x half>], align 32, addrspace(5) |
| 369 | // CHECK: %private_f32 = alloca [4 x float], align 4, addrspace(5) |
| 370 | // CHECK: %private_v2f32 = alloca [4 x <2 x float>], align 8, addrspace(5) |
| 371 | // CHECK: %private_v3f32 = alloca [4 x <3 x float>], align 16, addrspace(5) |
| 372 | // CHECK: %private_v4f32 = alloca [4 x <4 x float>], align 16, addrspace(5) |
| 373 | // CHECK: %private_v8f32 = alloca [4 x <8 x float>], align 32, addrspace(5) |
| 374 | // CHECK: %private_v16f32 = alloca [4 x <16 x float>], align 64, addrspace(5) |
| 375 | // CHECK: %private_f64 = alloca [4 x double], align 8, addrspace(5) |
| 376 | // CHECK: %private_v2f64 = alloca [4 x <2 x double>], align 16, addrspace(5) |
| 377 | // CHECK: %private_v3f64 = alloca [4 x <3 x double>], align 32, addrspace(5) |
| 378 | // CHECK: %private_v4f64 = alloca [4 x <4 x double>], align 32, addrspace(5) |
| 379 | // CHECK: %private_v8f64 = alloca [4 x <8 x double>], align 64, addrspace(5) |
| 380 | // CHECK: %private_v16f64 = alloca [4 x <16 x double>], align 128, addrspace(5) |
Matt Arsenault | 77ce553 | 2017-02-07 04:28:02 +0000 | [diff] [blame] | 381 | |
Yaxun Liu | f5f45e5 | 2018-02-02 16:08:24 +0000 | [diff] [blame] | 382 | // CHECK: store volatile i8 0, i8 addrspace(5)* %arraydecay, align 1 |
| 383 | // CHECK: store volatile <2 x i8> zeroinitializer, <2 x i8> addrspace(5)* %arraydecay{{[0-9]+}}, align 2 |
| 384 | // CHECK: store volatile <4 x i8> <i8 0, i8 0, i8 0, i8 undef>, <4 x i8> addrspace(5)* %storetmp, align 4 |
| 385 | // CHECK: store volatile <4 x i8> zeroinitializer, <4 x i8> addrspace(5)* %arraydecay{{[0-9]+}}, align 4 |
| 386 | // CHECK: store volatile <8 x i8> zeroinitializer, <8 x i8> addrspace(5)* %arraydecay{{[0-9]+}}, align 8 |
| 387 | // CHECK: store volatile <16 x i8> zeroinitializer, <16 x i8> addrspace(5)* %arraydecay{{[0-9]+}}, align 16 |
| 388 | // CHECK: store volatile i16 0, i16 addrspace(5)* %arraydecay{{[0-9]+}}, align 2 |
| 389 | // CHECK: store volatile <2 x i16> zeroinitializer, <2 x i16> addrspace(5)* %arraydecay{{[0-9]+}}, align 4 |
| 390 | // CHECK: store volatile <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, <4 x i16> addrspace(5)* %storetmp{{[0-9]+}}, align 8 |
| 391 | // CHECK: store volatile <4 x i16> zeroinitializer, <4 x i16> addrspace(5)* %arraydecay{{[0-9]+}}, align 8 |
| 392 | // CHECK: store volatile <8 x i16> zeroinitializer, <8 x i16> addrspace(5)* %arraydecay{{[0-9]+}}, align 16 |
| 393 | // CHECK: store volatile <16 x i16> zeroinitializer, <16 x i16> addrspace(5)* %arraydecay{{[0-9]+}}, align 32 |
| 394 | // CHECK: store volatile i32 0, i32 addrspace(5)* %arraydecay{{[0-9]+}}, align 4 |
| 395 | // CHECK: store volatile <2 x i32> zeroinitializer, <2 x i32> addrspace(5)* %arraydecay{{[0-9]+}}, align 8 |
| 396 | // CHECK: store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, <4 x i32> addrspace(5)* %storetmp16, align 16 |
| 397 | // CHECK: store volatile <4 x i32> zeroinitializer, <4 x i32> addrspace(5)* %arraydecay{{[0-9]+}}, align 16 |
| 398 | // CHECK: store volatile <8 x i32> zeroinitializer, <8 x i32> addrspace(5)* %arraydecay{{[0-9]+}}, align 32 |
| 399 | // CHECK: store volatile <16 x i32> zeroinitializer, <16 x i32> addrspace(5)* %arraydecay{{[0-9]+}}, align 64 |
| 400 | // CHECK: store volatile i64 0, i64 addrspace(5)* %arraydecay{{[0-9]+}}, align 8 |
| 401 | // CHECK: store volatile <2 x i64> zeroinitializer, <2 x i64> addrspace(5)* %arraydecay{{[0-9]+}}, align 16 |
| 402 | // CHECK: store volatile <4 x i64> <i64 0, i64 0, i64 0, i64 undef>, <4 x i64> addrspace(5)* %storetmp23, align 32 |
| 403 | // CHECK: store volatile <4 x i64> zeroinitializer, <4 x i64> addrspace(5)* %arraydecay{{[0-9]+}}, align 32 |
| 404 | // CHECK: store volatile <8 x i64> zeroinitializer, <8 x i64> addrspace(5)* %arraydecay{{[0-9]+}}, align 64 |
| 405 | // CHECK: store volatile <16 x i64> zeroinitializer, <16 x i64> addrspace(5)* %arraydecay{{[0-9]+}}, align 128 |
| 406 | // CHECK: store volatile half 0xH0000, half addrspace(5)* %arraydecay{{[0-9]+}}, align 2 |
| 407 | // CHECK: store volatile <2 x half> zeroinitializer, <2 x half> addrspace(5)* %arraydecay{{[0-9]+}}, align 4 |
| 408 | // CHECK: store volatile <4 x half> <half 0xH0000, half 0xH0000, half 0xH0000, half undef>, <4 x half> addrspace(5)* %storetmp{{[0-9]+}}, align 8 |
| 409 | // CHECK: store volatile <4 x half> zeroinitializer, <4 x half> addrspace(5)* %arraydecay{{[0-9]+}}, align 8 |
| 410 | // CHECK: store volatile <8 x half> zeroinitializer, <8 x half> addrspace(5)* %arraydecay{{[0-9]+}}, align 16 |
| 411 | // CHECK: store volatile <16 x half> zeroinitializer, <16 x half> addrspace(5)* %arraydecay{{[0-9]+}}, align 32 |
| 412 | // CHECK: store volatile float 0.000000e+00, float addrspace(5)* %arraydecay34, align 4 |
| 413 | // CHECK: store volatile <2 x float> zeroinitializer, <2 x float> addrspace(5)* %arraydecay{{[0-9]+}}, align 8 |
| 414 | // CHECK: store volatile <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, <4 x float> addrspace(5)* %storetmp{{[0-9]+}}, align 16 |
| 415 | // CHECK: store volatile <4 x float> zeroinitializer, <4 x float> addrspace(5)* %arraydecay{{[0-9]+}}, align 16 |
| 416 | // CHECK: store volatile <8 x float> zeroinitializer, <8 x float> addrspace(5)* %arraydecay{{[0-9]+}}, align 32 |
| 417 | // CHECK: store volatile <16 x float> zeroinitializer, <16 x float> addrspace(5)* %arraydecay{{[0-9]+}}, align 64 |
| 418 | // CHECK: store volatile double 0.000000e+00, double addrspace(5)* %arraydecay{{[0-9]+}}, align 8 |
| 419 | // CHECK: store volatile <2 x double> zeroinitializer, <2 x double> addrspace(5)* %arraydecay{{[0-9]+}}, align 16 |
| 420 | // CHECK: store volatile <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double undef>, <4 x double> addrspace(5)* %storetmp{{[0-9]+}}, align 32 |
| 421 | // CHECK: store volatile <4 x double> zeroinitializer, <4 x double> addrspace(5)* %arraydecay{{[0-9]+}}, align 32 |
| 422 | // CHECK: store volatile <8 x double> zeroinitializer, <8 x double> addrspace(5)* %arraydecay{{[0-9]+}}, align 64 |
| 423 | // CHECK: store volatile <16 x double> zeroinitializer, <16 x double> addrspace(5)* %arraydecay{{[0-9]+}}, align 128 |
Matt Arsenault | 77ce553 | 2017-02-07 04:28:02 +0000 | [diff] [blame] | 424 | kernel void private_memory_alignment_alloca() |
| 425 | { |
| 426 | volatile private char private_i8[4]; |
| 427 | volatile private char2 private_v2i8[4]; |
| 428 | volatile private char3 private_v3i8[4]; |
| 429 | volatile private char4 private_v4i8[4]; |
| 430 | volatile private char8 private_v8i8[4]; |
| 431 | volatile private char16 private_v16i8[4]; |
| 432 | |
| 433 | volatile private short private_i16[4]; |
| 434 | volatile private short2 private_v2i16[4]; |
| 435 | volatile private short3 private_v3i16[4]; |
| 436 | volatile private short4 private_v4i16[4]; |
| 437 | volatile private short8 private_v8i16[4]; |
| 438 | volatile private short16 private_v16i16[4]; |
| 439 | |
| 440 | volatile private int private_i32[4]; |
| 441 | volatile private int2 private_v2i32[4]; |
| 442 | volatile private int3 private_v3i32[4]; |
| 443 | volatile private int4 private_v4i32[4]; |
| 444 | volatile private int8 private_v8i32[4]; |
| 445 | volatile private int16 private_v16i32[4]; |
| 446 | |
| 447 | volatile private long private_i64[4]; |
| 448 | volatile private long2 private_v2i64[4]; |
| 449 | volatile private long3 private_v3i64[4]; |
| 450 | volatile private long4 private_v4i64[4]; |
| 451 | volatile private long8 private_v8i64[4]; |
| 452 | volatile private long16 private_v16i64[4]; |
| 453 | |
| 454 | volatile private half private_f16[4]; |
| 455 | volatile private half2 private_v2f16[4]; |
| 456 | volatile private half3 private_v3f16[4]; |
| 457 | volatile private half4 private_v4f16[4]; |
| 458 | volatile private half8 private_v8f16[4]; |
| 459 | volatile private half16 private_v16f16[4]; |
| 460 | |
| 461 | volatile private float private_f32[4]; |
| 462 | volatile private float2 private_v2f32[4]; |
| 463 | volatile private float3 private_v3f32[4]; |
| 464 | volatile private float4 private_v4f32[4]; |
| 465 | volatile private float8 private_v8f32[4]; |
| 466 | volatile private float16 private_v16f32[4]; |
| 467 | |
| 468 | volatile private double private_f64[4]; |
| 469 | volatile private double2 private_v2f64[4]; |
| 470 | volatile private double3 private_v3f64[4]; |
| 471 | volatile private double4 private_v4f64[4]; |
| 472 | volatile private double8 private_v8f64[4]; |
| 473 | volatile private double16 private_v16f64[4]; |
| 474 | |
| 475 | *private_i8 = 0; |
| 476 | *private_v2i8 = 0; |
| 477 | *private_v3i8 = 0; |
| 478 | *private_v4i8 = 0; |
| 479 | *private_v8i8 = 0; |
| 480 | *private_v16i8 = 0; |
| 481 | |
| 482 | *private_i16 = 0; |
| 483 | *private_v2i16 = 0; |
| 484 | *private_v3i16 = 0; |
| 485 | *private_v4i16 = 0; |
| 486 | *private_v8i16 = 0; |
| 487 | *private_v16i16 = 0; |
| 488 | |
| 489 | *private_i32 = 0; |
| 490 | *private_v2i32 = 0; |
| 491 | *private_v3i32 = 0; |
| 492 | *private_v4i32 = 0; |
| 493 | *private_v8i32 = 0; |
| 494 | *private_v16i32 = 0; |
| 495 | |
| 496 | *private_i64 = 0; |
| 497 | *private_v2i64 = 0; |
| 498 | *private_v3i64 = 0; |
| 499 | *private_v4i64 = 0; |
| 500 | *private_v8i64 = 0; |
| 501 | *private_v16i64 = 0; |
| 502 | |
| 503 | *private_f16 = 0; |
| 504 | *private_v2f16 = 0; |
| 505 | *private_v3f16 = 0; |
| 506 | *private_v4f16 = 0; |
| 507 | *private_v8f16 = 0; |
| 508 | *private_v16f16 = 0; |
| 509 | |
| 510 | *private_f32 = 0; |
| 511 | *private_v2f32 = 0; |
| 512 | *private_v3f32 = 0; |
| 513 | *private_v4f32 = 0; |
| 514 | *private_v8f32 = 0; |
| 515 | *private_v16f32 = 0; |
| 516 | |
| 517 | *private_f64 = 0; |
| 518 | *private_v2f64 = 0; |
| 519 | *private_v3f64 = 0; |
| 520 | *private_v4f64 = 0; |
| 521 | *private_v8f64 = 0; |
| 522 | *private_v16f64 = 0; |
| 523 | } |