| Matt Arsenault | 26f8f3d | 2015-11-30 21:16:03 +0000 | [diff] [blame^] | 1 | ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s | 
|  | 2 | ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI  -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s | 
|  | 3 | ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s | 
|  | 4 |  | 
|  | 5 |  | 
|  | 6 | ; FUNC-LABEL: {{^}}local_size_x: | 
|  | 7 | ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] | 
|  | 8 | ; EG: MOV [[VAL]], KC0[1].Z | 
|  | 9 |  | 
|  | 10 | ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6 | 
|  | 11 | ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18 | 
|  | 12 | ; CI-HSA: s_load_dword [[XY:s[0-9]+]], s[4:5], 0x1 | 
|  | 13 | ; VI-HSA: s_load_dword [[XY:s[0-9]+]], s[4:5], 0x4 | 
|  | 14 |  | 
|  | 15 | ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] | 
|  | 16 | ; GCN: buffer_store_dword [[VVAL]] | 
|  | 17 | define void @local_size_x(i32 addrspace(1)* %out) { | 
|  | 18 | entry: | 
|  | 19 | %0 = call i32 @llvm.r600.read.local.size.x() #0 | 
|  | 20 | store i32 %0, i32 addrspace(1)* %out | 
|  | 21 | ret void | 
|  | 22 | } | 
|  | 23 |  | 
|  | 24 | ; FUNC-LABEL: {{^}}local_size_y: | 
|  | 25 | ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] | 
|  | 26 | ; EG: MOV [[VAL]], KC0[1].W | 
|  | 27 |  | 
|  | 28 | ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7 | 
|  | 29 | ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c | 
|  | 30 | ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] | 
|  | 31 | ; GCN: buffer_store_dword [[VVAL]] | 
|  | 32 | define void @local_size_y(i32 addrspace(1)* %out) { | 
|  | 33 | entry: | 
|  | 34 | %0 = call i32 @llvm.r600.read.local.size.y() #0 | 
|  | 35 | store i32 %0, i32 addrspace(1)* %out | 
|  | 36 | ret void | 
|  | 37 | } | 
|  | 38 |  | 
|  | 39 | ; FUNC-LABEL: {{^}}local_size_z: | 
|  | 40 | ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] | 
|  | 41 | ; EG: MOV [[VAL]], KC0[2].X | 
|  | 42 |  | 
|  | 43 | ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 | 
|  | 44 | ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20 | 
|  | 45 | ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] | 
|  | 46 | ; GCN: buffer_store_dword [[VVAL]] | 
|  | 47 | define void @local_size_z(i32 addrspace(1)* %out) { | 
|  | 48 | entry: | 
|  | 49 | %0 = call i32 @llvm.r600.read.local.size.z() #0 | 
|  | 50 | store i32 %0, i32 addrspace(1)* %out | 
|  | 51 | ret void | 
|  | 52 | } | 
|  | 53 |  | 
|  | 54 | ; FUNC-LABEL: {{^}}local_size_xy: | 
|  | 55 | ; SI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x6 | 
|  | 56 | ; SI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x7 | 
|  | 57 | ; VI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x18 | 
|  | 58 | ; VI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x1c | 
|  | 59 | ; GCN-DAG: v_mov_b32_e32 [[VY:v[0-9]+]], [[Y]] | 
|  | 60 | ; GCN: v_mul_u32_u24_e32 [[VAL:v[0-9]+]], [[X]], [[VY]] | 
|  | 61 | ; GCN: buffer_store_dword [[VAL]] | 
|  | 62 | define void @local_size_xy(i32 addrspace(1)* %out) { | 
|  | 63 | entry: | 
|  | 64 | %x = call i32 @llvm.r600.read.local.size.x() #0 | 
|  | 65 | %y = call i32 @llvm.r600.read.local.size.y() #0 | 
|  | 66 | %val = mul i32 %x, %y | 
|  | 67 | store i32 %val, i32 addrspace(1)* %out | 
|  | 68 | ret void | 
|  | 69 | } | 
|  | 70 |  | 
|  | 71 | ; FUNC-LABEL: {{^}}local_size_xz: | 
|  | 72 |  | 
|  | 73 | ; SI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x6 | 
|  | 74 | ; SI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x8 | 
|  | 75 | ; VI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x18 | 
|  | 76 | ; VI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x20 | 
|  | 77 | ; HSA-DAG: s_and_b32 [[X:s[0-9]+]], [[XY]], 0xffff | 
|  | 78 | ; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]] | 
|  | 79 | ; GCN: v_mul_u32_u24_e32 [[VAL:v[0-9]+]], [[X]], [[VZ]] | 
|  | 80 | ; GCN: buffer_store_dword [[VAL]] | 
|  | 81 | define void @local_size_xz(i32 addrspace(1)* %out) { | 
|  | 82 | entry: | 
|  | 83 | %x = call i32 @llvm.r600.read.local.size.x() #0 | 
|  | 84 | %z = call i32 @llvm.r600.read.local.size.z() #0 | 
|  | 85 | %val = mul i32 %x, %z | 
|  | 86 | store i32 %val, i32 addrspace(1)* %out | 
|  | 87 | ret void | 
|  | 88 | } | 
|  | 89 |  | 
|  | 90 | ; FUNC-LABEL: {{^}}local_size_yz: | 
|  | 91 | ; HSA: enable_sgpr_private_segment_buffer = 1 | 
|  | 92 | ; HSA: enable_sgpr_dispatch_ptr = 1 | 
|  | 93 |  | 
|  | 94 | ; SI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x7 | 
|  | 95 | ; SI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x8 | 
|  | 96 | ; VI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x1c | 
|  | 97 | ; VI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x20 | 
|  | 98 | ; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]] | 
|  | 99 | ; GCN: v_mul_u32_u24_e32 [[VAL:v[0-9]+]], [[Y]], [[VZ]] | 
|  | 100 | ; GCN: buffer_store_dword [[VAL]] | 
|  | 101 | define void @local_size_yz(i32 addrspace(1)* %out) { | 
|  | 102 | entry: | 
|  | 103 | %y = call i32 @llvm.r600.read.local.size.y() #0 | 
|  | 104 | %z = call i32 @llvm.r600.read.local.size.z() #0 | 
|  | 105 | %val = mul i32 %y, %z | 
|  | 106 | store i32 %val, i32 addrspace(1)* %out | 
|  | 107 | ret void | 
|  | 108 | } | 
|  | 109 |  | 
|  | 110 | ; FUNC-LABEL: {{^}}local_size_xyz: | 
|  | 111 | ; HSA: enable_sgpr_private_segment_buffer = 1 | 
|  | 112 | ; HSA: enable_sgpr_dispatch_ptr = 1 | 
|  | 113 |  | 
|  | 114 | ; SI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x6 | 
|  | 115 | ; SI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x7 | 
|  | 116 | ; SI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x8 | 
|  | 117 | ; VI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x18 | 
|  | 118 | ; VI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x1c | 
|  | 119 | ; VI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x20 | 
|  | 120 | ; GCN-DAG: v_mov_b32_e32 [[VY:v[0-9]+]], [[Y]] | 
|  | 121 | ; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]] | 
|  | 122 | ; GCN: v_mad_u32_u24 [[VAL:v[0-9]+]], [[X]], [[VY]], [[VZ]] | 
|  | 123 | ; GCN: buffer_store_dword [[VAL]] | 
|  | 124 | define void @local_size_xyz(i32 addrspace(1)* %out) { | 
|  | 125 | entry: | 
|  | 126 | %x = call i32 @llvm.r600.read.local.size.x() #0 | 
|  | 127 | %y = call i32 @llvm.r600.read.local.size.y() #0 | 
|  | 128 | %z = call i32 @llvm.r600.read.local.size.z() #0 | 
|  | 129 | %xy = mul i32 %x, %y | 
|  | 130 | %xyz = add i32 %xy, %z | 
|  | 131 | store i32 %xyz, i32 addrspace(1)* %out | 
|  | 132 | ret void | 
|  | 133 | } | 
|  | 134 |  | 
|  | 135 | ; FUNC-LABEL: {{^}}local_size_x_known_bits: | 
|  | 136 | ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6 | 
|  | 137 | ; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18 | 
|  | 138 | ; GCN-NOT: 0xffff | 
|  | 139 | ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] | 
|  | 140 | ; GCN-NEXT: buffer_store_dword [[VVAL]] | 
|  | 141 | define void @local_size_x_known_bits(i32 addrspace(1)* %out) { | 
|  | 142 | entry: | 
|  | 143 | %size = call i32 @llvm.r600.read.local.size.x() #0 | 
|  | 144 | %shl = shl i32 %size, 16 | 
|  | 145 | %shr = lshr i32 %shl, 16 | 
|  | 146 | store i32 %shr, i32 addrspace(1)* %out | 
|  | 147 | ret void | 
|  | 148 | } | 
|  | 149 |  | 
|  | 150 | ; FUNC-LABEL: {{^}}local_size_y_known_bits: | 
|  | 151 | ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7 | 
|  | 152 | ; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c | 
|  | 153 | ; GCN-NOT: 0xffff | 
|  | 154 | ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] | 
|  | 155 | ; GCN-NEXT: buffer_store_dword [[VVAL]] | 
|  | 156 | define void @local_size_y_known_bits(i32 addrspace(1)* %out) { | 
|  | 157 | entry: | 
|  | 158 | %size = call i32 @llvm.r600.read.local.size.y() #0 | 
|  | 159 | %shl = shl i32 %size, 16 | 
|  | 160 | %shr = lshr i32 %shl, 16 | 
|  | 161 | store i32 %shr, i32 addrspace(1)* %out | 
|  | 162 | ret void | 
|  | 163 | } | 
|  | 164 |  | 
|  | 165 | ; FUNC-LABEL: {{^}}local_size_z_known_bits: | 
|  | 166 | ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 | 
|  | 167 | ; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20 | 
|  | 168 | ; GCN-NOT: 0xffff | 
|  | 169 | ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] | 
|  | 170 | ; GCN-NEXT: buffer_store_dword [[VVAL]] | 
|  | 171 | define void @local_size_z_known_bits(i32 addrspace(1)* %out) { | 
|  | 172 | entry: | 
|  | 173 | %size = call i32 @llvm.r600.read.local.size.z() #0 | 
|  | 174 | %shl = shl i32 %size, 16 | 
|  | 175 | %shr = lshr i32 %shl, 16 | 
|  | 176 | store i32 %shr, i32 addrspace(1)* %out | 
|  | 177 | ret void | 
|  | 178 | } | 
|  | 179 |  | 
|  | 180 | declare i32 @llvm.r600.read.local.size.x() #0 | 
|  | 181 | declare i32 @llvm.r600.read.local.size.y() #0 | 
|  | 182 | declare i32 @llvm.r600.read.local.size.z() #0 | 
|  | 183 |  | 
|  | 184 | attributes #0 = { nounwind readnone } |