blob: cb4601ac165c15a2e161e596af1d0cfa21e836fc [file] [log] [blame]
Tim Renouff1c7b922018-08-02 22:53:57 +00001; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx700 -verify-machineinstrs <%s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SICI %s
2
3; Check that an addrspace(1) (const) load with various combinations of
4; uniform, nonuniform and constant address components all load with an
5; addr64 mubuf with no readfirstlane.
6
7@indexable = internal unnamed_addr addrspace(1) constant [6 x <3 x float>] [<3 x float> <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00>, <3 x float> <float 0.000000e+00, float 1.000000e+00, float 0.000000e+00>, <3 x float> <float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, <3 x float> <float 0.000000e+00, float 1.000000e+00, float 1.000000e+00>, <3 x float> <float 1.000000e+00, float 0.000000e+00, float 1.000000e+00>, <3 x float> <float 1.000000e+00, float 1.000000e+00, float 0.000000e+00>]
8
9; GCN-LABEL: {{^}}nonuniform_uniform:
10; GCN-NOT: readfirstlane
11; SICI: buffer_load_dwordx4 {{.*}} addr64
12
13define amdgpu_ps float @nonuniform_uniform(i32 %arg18) {
14.entry:
15 %tmp31 = sext i32 %arg18 to i64
16 %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* @indexable, i64 0, i64 %tmp31
17 %tmp33 = load <3 x float>, <3 x float> addrspace(1)* %tmp32, align 16
18 %tmp34 = extractelement <3 x float> %tmp33, i32 0
19 ret float %tmp34
20}
21
22; GCN-LABEL: {{^}}uniform_nonuniform:
23; GCN-NOT: readfirstlane
24; SICI: buffer_load_dwordx4 {{.*}} addr64
25
26define amdgpu_ps float @uniform_nonuniform(i32 inreg %offset, i32 %arg18) {
27.entry:
28 %tmp1 = zext i32 %arg18 to i64
29 %tmp2 = inttoptr i64 %tmp1 to [6 x <3 x float>] addrspace(1)*
30 %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* %tmp2, i32 0, i32 %offset
31 %tmp33 = load <3 x float>, <3 x float> addrspace(1)* %tmp32, align 16
32 %tmp34 = extractelement <3 x float> %tmp33, i32 0
33 ret float %tmp34
34}
35
36; GCN-LABEL: {{^}}const_nonuniform:
37; GCN-NOT: readfirstlane
38; SICI: buffer_load_dwordx4 {{.*}} addr64
39
40define amdgpu_ps float @const_nonuniform(i32 %arg18) {
41.entry:
42 %tmp1 = zext i32 %arg18 to i64
43 %tmp2 = inttoptr i64 %tmp1 to [6 x <3 x float>] addrspace(1)*
44 %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* %tmp2, i32 0, i32 1
45 %tmp33 = load <3 x float>, <3 x float> addrspace(1)* %tmp32, align 16
46 %tmp34 = extractelement <3 x float> %tmp33, i32 0
47 ret float %tmp34
48}
49
50; GCN-LABEL: {{^}}nonuniform_nonuniform:
51; GCN-NOT: readfirstlane
52; SICI: buffer_load_dwordx4 {{.*}} addr64
53
54define amdgpu_ps float @nonuniform_nonuniform(i32 %offset, i32 %arg18) {
55.entry:
56 %tmp1 = zext i32 %arg18 to i64
57 %tmp2 = inttoptr i64 %tmp1 to [6 x <3 x float>] addrspace(1)*
58 %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* %tmp2, i32 0, i32 %offset
59 %tmp33 = load <3 x float>, <3 x float> addrspace(1)* %tmp32, align 16
60 %tmp34 = extractelement <3 x float> %tmp33, i32 0
61 ret float %tmp34
62}
63
64; GCN-LABEL: {{^}}nonuniform_uniform_const:
65; GCN-NOT: readfirstlane
66; SICI: buffer_load_dword {{.*}} addr64
67
68define amdgpu_ps float @nonuniform_uniform_const(i32 %arg18) {
69.entry:
70 %tmp31 = sext i32 %arg18 to i64
71 %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* @indexable, i64 0, i64 %tmp31, i64 1
72 %tmp33 = load float, float addrspace(1)* %tmp32, align 4
73 ret float %tmp33
74}
75
76; GCN-LABEL: {{^}}uniform_nonuniform_const:
77; GCN-NOT: readfirstlane
78; SICI: buffer_load_dword {{.*}} addr64
79
80define amdgpu_ps float @uniform_nonuniform_const(i32 inreg %offset, i32 %arg18) {
81.entry:
82 %tmp1 = zext i32 %arg18 to i64
83 %tmp2 = inttoptr i64 %tmp1 to [6 x <3 x float>] addrspace(1)*
84 %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* %tmp2, i32 0, i32 %offset, i32 1
85 %tmp33 = load float, float addrspace(1)* %tmp32, align 4
86 ret float %tmp33
87}
88
89; GCN-LABEL: {{^}}nonuniform_nonuniform_const:
90; GCN-NOT: readfirstlane
91; SICI: buffer_load_dword {{.*}} addr64
92
93define amdgpu_ps float @nonuniform_nonuniform_const(i32 %offset, i32 %arg18) {
94.entry:
95 %tmp1 = zext i32 %arg18 to i64
96 %tmp2 = inttoptr i64 %tmp1 to [6 x <3 x float>] addrspace(1)*
97 %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* %tmp2, i32 0, i32 %offset, i32 1
98 %tmp33 = load float, float addrspace(1)* %tmp32, align 4
99 ret float %tmp33
100}
101
102
103
104