| Tim Renouf | 35484c9 | 2018-08-21 11:06:05 +0000 | [diff] [blame] | 1 | ;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefixes=GCN,VERDE %s | 
|  | 2 | ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN %s | 
|  | 3 |  | 
|  | 4 | ; GCN-LABEL: {{^}}tbuffer_store: | 
|  | 5 | ; GCN: v_mov_b32_e32 [[ZEROREG:v[0-9]+]], 0 | 
|  | 6 | ; GCN: tbuffer_store_format_xyzw v[0:3], [[ZEROREG]], s[0:3], dfmt:12, nfmt:2, 0 idxen | 
|  | 7 | ; GCN: tbuffer_store_format_xyzw v[4:7], [[ZEROREG]], s[0:3], dfmt:13, nfmt:3, 0 idxen glc | 
|  | 8 | ; GCN: tbuffer_store_format_xyzw v[8:11], [[ZEROREG]], s[0:3], dfmt:14, nfmt:4, 0 idxen slc | 
|  | 9 | ; GCN: tbuffer_store_format_xyzw v[8:11], [[ZEROREG]], s[0:3], dfmt:14, nfmt:4, 0 idxen | 
|  | 10 | define amdgpu_ps void @tbuffer_store(<4 x i32> inreg, <4 x float>, <4 x float>, <4 x float>) { | 
|  | 11 | main_body: | 
|  | 12 | %in1 = bitcast <4 x float> %1 to <4 x i32> | 
|  | 13 | %in2 = bitcast <4 x float> %2 to <4 x i32> | 
|  | 14 | %in3 = bitcast <4 x float> %3 to <4 x i32> | 
|  | 15 | call void @llvm.amdgcn.struct.tbuffer.store.v4i32(<4 x i32> %in1, <4 x i32> %0, i32 0, i32 0, i32 0, i32 44, i32 0) | 
|  | 16 | call void @llvm.amdgcn.struct.tbuffer.store.v4i32(<4 x i32> %in2, <4 x i32> %0, i32 0, i32 0, i32 0, i32 61, i32 1) | 
|  | 17 | call void @llvm.amdgcn.struct.tbuffer.store.v4i32(<4 x i32> %in3, <4 x i32> %0, i32 0, i32 0, i32 0, i32 78, i32 2) | 
|  | 18 | call void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float> %3, <4 x i32> %0, i32 0, i32 0, i32 0, i32 78, i32 0) | 
|  | 19 | ret void | 
|  | 20 | } | 
|  | 21 |  | 
|  | 22 | ; GCN-LABEL: {{^}}tbuffer_store_immoffs: | 
|  | 23 | ; GCN: v_mov_b32_e32 [[ZEROREG:v[0-9]+]], 0 | 
|  | 24 | ; GCN: tbuffer_store_format_xyzw v[0:3], [[ZEROREG]], s[0:3], dfmt:5, nfmt:7, 0 idxen offset:42 | 
|  | 25 | define amdgpu_ps void @tbuffer_store_immoffs(<4 x i32> inreg, <4 x float>) { | 
|  | 26 | main_body: | 
|  | 27 | %in1 = bitcast <4 x float> %1 to <4 x i32> | 
|  | 28 | call void @llvm.amdgcn.struct.tbuffer.store.v4i32(<4 x i32> %in1, <4 x i32> %0, i32 0, i32 42, i32 0, i32 117, i32 0) | 
|  | 29 | ret void | 
|  | 30 | } | 
|  | 31 |  | 
|  | 32 | ; GCN-LABEL: {{^}}tbuffer_store_scalar_and_imm_offs: | 
|  | 33 | ; GCN: v_mov_b32_e32 [[ZEROREG:v[0-9]+]], 0 | 
|  | 34 | ; GCN: tbuffer_store_format_xyzw v[0:3], [[ZEROREG]], s[0:3], dfmt:5, nfmt:7, {{s[0-9]+}} idxen offset:42 | 
|  | 35 | define amdgpu_ps void @tbuffer_store_scalar_and_imm_offs(<4 x i32> inreg, <4 x float> %vdata, i32 inreg %soffset) { | 
|  | 36 | main_body: | 
|  | 37 | %in1 = bitcast <4 x float> %vdata to <4 x i32> | 
|  | 38 | call void @llvm.amdgcn.struct.tbuffer.store.v4i32(<4 x i32> %in1, <4 x i32> %0, i32 0, i32 42, i32 %soffset, i32 117, i32 0) | 
|  | 39 | ret void | 
|  | 40 | } | 
|  | 41 |  | 
|  | 42 | ; GCN-LABEL: {{^}}buffer_store_idx: | 
|  | 43 | ; GCN: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], dfmt:15, nfmt:2, 0 idxen | 
|  | 44 | define amdgpu_ps void @buffer_store_idx(<4 x i32> inreg, <4 x float> %vdata, i32 %vindex) { | 
|  | 45 | main_body: | 
|  | 46 | %in1 = bitcast <4 x float> %vdata to <4 x i32> | 
|  | 47 | call void @llvm.amdgcn.struct.tbuffer.store.v4i32(<4 x i32> %in1, <4 x i32> %0, i32 %vindex, i32 0, i32 0, i32 47, i32 0) | 
|  | 48 | ret void | 
|  | 49 | } | 
|  | 50 |  | 
|  | 51 | ; GCN-LABEL: {{^}}buffer_store_ofs: | 
|  | 52 | ; GCN: tbuffer_store_format_xyzw v[0:3], {{v\[[0-9]+:[0-9]+\]}}, s[0:3], dfmt:3, nfmt:7, 0 idxen offen | 
|  | 53 | define amdgpu_ps void @buffer_store_ofs(<4 x i32> inreg, <4 x float> %vdata, i32 %voffset) { | 
|  | 54 | main_body: | 
|  | 55 | %in1 = bitcast <4 x float> %vdata to <4 x i32> | 
|  | 56 | call void @llvm.amdgcn.struct.tbuffer.store.v4i32(<4 x i32> %in1, <4 x i32> %0, i32 0, i32 %voffset, i32 0, i32 115, i32 0) | 
|  | 57 | ret void | 
|  | 58 | } | 
|  | 59 |  | 
|  | 60 | ; GCN-LABEL: {{^}}buffer_store_both: | 
|  | 61 | ; GCN: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], dfmt:6, nfmt:4, 0 idxen offen | 
|  | 62 | define amdgpu_ps void @buffer_store_both(<4 x i32> inreg, <4 x float> %vdata, i32 %vindex, i32 %voffset) { | 
|  | 63 | main_body: | 
|  | 64 | %in1 = bitcast <4 x float> %vdata to <4 x i32> | 
|  | 65 | call void @llvm.amdgcn.struct.tbuffer.store.v4i32(<4 x i32> %in1, <4 x i32> %0, i32 %vindex, i32 %voffset, i32 0, i32 70, i32 0) | 
|  | 66 | ret void | 
|  | 67 | } | 
|  | 68 |  | 
|  | 69 | ; Ideally, the register allocator would avoid the wait here | 
|  | 70 | ; | 
|  | 71 | ; GCN-LABEL: {{^}}buffer_store_wait: | 
|  | 72 | ; GCN: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], dfmt:15, nfmt:3, 0 idxen | 
|  | 73 | ; VERDE: s_waitcnt expcnt(0) | 
|  | 74 | ; GCN: buffer_load_format_xyzw v[0:3], v5, s[0:3], 0 idxen | 
|  | 75 | ; GCN: s_waitcnt vmcnt(0) | 
|  | 76 | ; GCN: tbuffer_store_format_xyzw v[0:3], v6, s[0:3], dfmt:14, nfmt:2, 0 idxen | 
|  | 77 | define amdgpu_ps void @buffer_store_wait(<4 x i32> inreg, <4 x float> %vdata, i32 %vindex.1, i32 %vindex.2, i32 %vindex.3) { | 
|  | 78 | main_body: | 
|  | 79 | %in1 = bitcast <4 x float> %vdata to <4 x i32> | 
|  | 80 | call void @llvm.amdgcn.struct.tbuffer.store.v4i32(<4 x i32> %in1, <4 x i32> %0, i32 %vindex.1, i32 0, i32 0, i32 63, i32 0) | 
|  | 81 | %data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 %vindex.2, i32 0, i1 0, i1 0) | 
|  | 82 | %data.i = bitcast <4 x float> %data to <4 x i32> | 
|  | 83 | call void @llvm.amdgcn.struct.tbuffer.store.v4i32(<4 x i32> %data.i, <4 x i32> %0, i32 %vindex.3, i32 0, i32 0, i32 46, i32 0) | 
|  | 84 | ret void | 
|  | 85 | } | 
|  | 86 |  | 
|  | 87 | ; GCN-LABEL: {{^}}buffer_store_x1: | 
|  | 88 | ; GCN: tbuffer_store_format_x v0, v1, s[0:3], dfmt:13, nfmt:7, 0 idxen | 
|  | 89 | define amdgpu_ps void @buffer_store_x1(<4 x i32> inreg %rsrc, float %data, i32 %vindex) { | 
|  | 90 | main_body: | 
|  | 91 | %data.i = bitcast float %data to i32 | 
|  | 92 | call void @llvm.amdgcn.struct.tbuffer.store.i32(i32 %data.i, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 125, i32 0) | 
|  | 93 | ret void | 
|  | 94 | } | 
|  | 95 |  | 
|  | 96 | ; GCN-LABEL: {{^}}buffer_store_x2: | 
|  | 97 | ; GCN: tbuffer_store_format_xy v[0:1], v2, s[0:3], dfmt:1, nfmt:2, 0 idxen | 
|  | 98 | define amdgpu_ps void @buffer_store_x2(<4 x i32> inreg %rsrc, <2 x float> %data, i32 %vindex) { | 
|  | 99 | main_body: | 
|  | 100 | %data.i = bitcast <2 x float> %data to <2 x i32> | 
|  | 101 | call void @llvm.amdgcn.struct.tbuffer.store.v2i32(<2 x i32> %data.i, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0) | 
|  | 102 | ret void | 
|  | 103 | } | 
|  | 104 |  | 
|  | 105 | declare void @llvm.amdgcn.struct.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32) #0 | 
|  | 106 | declare void @llvm.amdgcn.struct.tbuffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32, i32, i32) #0 | 
|  | 107 | declare void @llvm.amdgcn.struct.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32, i32) #0 | 
|  | 108 | declare void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32, i32) #0 | 
|  | 109 | declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1 | 
|  | 110 |  | 
|  | 111 | attributes #0 = { nounwind } | 
|  | 112 | attributes #1 = { nounwind readonly } | 
|  | 113 |  | 
|  | 114 |  |