blob: a547f007ce2a780e0215d1cf14731cc2c9120561 [file] [log] [blame]
Nicolai Haehnledf77c9a2016-04-12 21:18:10 +00001;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
2;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
3
4;CHECK-LABEL: {{^}}buffer_load:
Nikolay Haustov4f672a32016-04-29 09:02:30 +00005;CHECK: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
6;CHECK: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc
7;CHECK: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 slc
Nicolai Haehnledf77c9a2016-04-12 21:18:10 +00008;CHECK: s_waitcnt
9define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load(<4 x i32> inreg) {
10main_body:
11 %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %0, i32 0, i32 0, i1 0, i1 0)
12 %data_glc = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %0, i32 0, i32 0, i1 1, i1 0)
13 %data_slc = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %0, i32 0, i32 0, i1 0, i1 1)
14 %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %data, 0
15 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %data_glc, 1
16 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %data_slc, 2
17 ret {<4 x float>, <4 x float>, <4 x float>} %r2
18}
19
20;CHECK-LABEL: {{^}}buffer_load_immoffs:
Nikolay Haustov4f672a32016-04-29 09:02:30 +000021;CHECK: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:42
Nicolai Haehnledf77c9a2016-04-12 21:18:10 +000022;CHECK: s_waitcnt
23define amdgpu_ps <4 x float> @buffer_load_immoffs(<4 x i32> inreg) {
24main_body:
25 %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %0, i32 0, i32 42, i1 0, i1 0)
26 ret <4 x float> %data
27}
28
29;CHECK-LABEL: {{^}}buffer_load_immoffs_large:
30;CHECK: s_movk_i32 [[OFFSET:s[0-9]+]], 0x1fff
Nikolay Haustov4f672a32016-04-29 09:02:30 +000031;CHECK: buffer_load_dwordx4 v[0:3], off, s[0:3], [[OFFSET]] offset:1
Nicolai Haehnledf77c9a2016-04-12 21:18:10 +000032;CHECK: s_waitcnt
33define amdgpu_ps <4 x float> @buffer_load_immoffs_large(<4 x i32> inreg) {
34main_body:
35 %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %0, i32 0, i32 8192, i1 0, i1 0)
36 ret <4 x float> %data
37}
38
39;CHECK-LABEL: {{^}}buffer_load_idx:
40;CHECK: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 idxen
41;CHECK: s_waitcnt
42define amdgpu_ps <4 x float> @buffer_load_idx(<4 x i32> inreg, i32) {
43main_body:
44 %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %0, i32 %1, i32 0, i1 0, i1 0)
45 ret <4 x float> %data
46}
47
48;CHECK-LABEL: {{^}}buffer_load_ofs:
49;CHECK: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen
50;CHECK: s_waitcnt
51define amdgpu_ps <4 x float> @buffer_load_ofs(<4 x i32> inreg, i32) {
52main_body:
53 %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %0, i32 0, i32 %1, i1 0, i1 0)
54 ret <4 x float> %data
55}
56
57;CHECK-LABEL: {{^}}buffer_load_ofs_imm:
58;CHECK: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:58
59;CHECK: s_waitcnt
60define amdgpu_ps <4 x float> @buffer_load_ofs_imm(<4 x i32> inreg, i32) {
61main_body:
62 %ofs = add i32 %1, 58
63 %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %0, i32 0, i32 %ofs, i1 0, i1 0)
64 ret <4 x float> %data
65}
66
67;CHECK-LABEL: {{^}}buffer_load_both:
68;CHECK: buffer_load_dwordx4 v[0:3], v[0:1], s[0:3], 0 idxen offen
69;CHECK: s_waitcnt
70define amdgpu_ps <4 x float> @buffer_load_both(<4 x i32> inreg, i32, i32) {
71main_body:
72 %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %0, i32 %1, i32 %2, i1 0, i1 0)
73 ret <4 x float> %data
74}
75
76;CHECK-LABEL: {{^}}buffer_load_both_reversed:
77;CHECK: v_mov_b32_e32 v2, v0
78;CHECK: buffer_load_dwordx4 v[0:3], v[1:2], s[0:3], 0 idxen offen
79;CHECK: s_waitcnt
80define amdgpu_ps <4 x float> @buffer_load_both_reversed(<4 x i32> inreg, i32, i32) {
81main_body:
82 %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %0, i32 %2, i32 %1, i1 0, i1 0)
83 ret <4 x float> %data
84}
85
86;CHECK-LABEL: {{^}}buffer_load_x1:
87;CHECK: buffer_load_dword v0, v[0:1], s[0:3], 0 idxen offen
88;CHECK: s_waitcnt
89define amdgpu_ps float @buffer_load_x1(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) {
90main_body:
91 %data = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 0, i1 0)
92 ret float %data
93}
94
95;CHECK-LABEL: {{^}}buffer_load_x2:
96;CHECK: buffer_load_dwordx2 v[0:1], v[0:1], s[0:3], 0 idxen offen
97;CHECK: s_waitcnt
98define amdgpu_ps <2 x float> @buffer_load_x2(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) {
99main_body:
100 %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 0, i1 0)
101 ret <2 x float> %data
102}
103
104declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #0
105declare <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32>, i32, i32, i1, i1) #0
106declare <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32>, i32, i32, i1, i1) #0
107
108attributes #0 = { nounwind readonly }