blob: c74c0fa15855db2fa5ffb0a06d374dab52b104f3 [file] [log] [blame]
Matt Arsenault3ea06332017-02-22 00:02:21 +00001; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
Nicolai Haehnlef2c64db2016-02-18 16:44:18 +00003
Matt Arsenault3ea06332017-02-22 00:02:21 +00004; GCN-LABEL: {{^}}image_load_v4i32:
5; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm
6; GCN: s_waitcnt vmcnt(0)
7define amdgpu_ps <4 x float> @image_load_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 {
Nicolai Haehnlef2c64db2016-02-18 16:44:18 +00008main_body:
Matt Arsenault3ea06332017-02-22 00:02:21 +00009 %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
Nicolai Haehnlef2c64db2016-02-18 16:44:18 +000010 ret <4 x float> %tex
11}
12
Matt Arsenault3ea06332017-02-22 00:02:21 +000013; GCN-LABEL: {{^}}image_load_v2i32:
14; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm
15; GCN: s_waitcnt vmcnt(0)
16define amdgpu_ps <4 x float> @image_load_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) #0 {
Nicolai Haehnlef2c64db2016-02-18 16:44:18 +000017main_body:
Matt Arsenault3ea06332017-02-22 00:02:21 +000018 %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
Nicolai Haehnlef2c64db2016-02-18 16:44:18 +000019 ret <4 x float> %tex
20}
21
Matt Arsenault3ea06332017-02-22 00:02:21 +000022; GCN-LABEL: {{^}}image_load_i32:
23; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm
24; GCN: s_waitcnt vmcnt(0)
25define amdgpu_ps <4 x float> @image_load_i32(<8 x i32> inreg %rsrc, i32 %c) #0 {
Nicolai Haehnlef2c64db2016-02-18 16:44:18 +000026main_body:
Matt Arsenault3ea06332017-02-22 00:02:21 +000027 %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
Nicolai Haehnlef2c64db2016-02-18 16:44:18 +000028 ret <4 x float> %tex
29}
30
Matt Arsenault3ea06332017-02-22 00:02:21 +000031; GCN-LABEL: {{^}}image_load_mip:
32; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm
33; GCN: s_waitcnt vmcnt(0)
34define amdgpu_ps <4 x float> @image_load_mip(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 {
Nicolai Haehnlef2c64db2016-02-18 16:44:18 +000035main_body:
Matt Arsenault3ea06332017-02-22 00:02:21 +000036 %tex = call <4 x float> @llvm.amdgcn.image.load.mip.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
Nicolai Haehnlef2c64db2016-02-18 16:44:18 +000037 ret <4 x float> %tex
38}
39
Matt Arsenault3ea06332017-02-22 00:02:21 +000040; GCN-LABEL: {{^}}image_load_1:
41; GCN: image_load v0, v[0:3], s[0:7] dmask:0x1 unorm
42; GCN: s_waitcnt vmcnt(0)
43define amdgpu_ps float @image_load_1(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 {
Nicolai Haehnlef2c64db2016-02-18 16:44:18 +000044main_body:
Matt Arsenault3ea06332017-02-22 00:02:21 +000045 %tex = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
Nicolai Haehnlef2c64db2016-02-18 16:44:18 +000046 %elt = extractelement <4 x float> %tex, i32 0
Nicolai Haehnlef2c64db2016-02-18 16:44:18 +000047 ret float %elt
48}
49
Matt Arsenault3ea06332017-02-22 00:02:21 +000050; GCN-LABEL: {{^}}image_load_f32_v2i32:
51; GCN: image_load {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 unorm
52; GCN: s_waitcnt vmcnt(0)
53define amdgpu_ps float @image_load_f32_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) #0 {
Changpeng Fang8236fe12016-11-14 18:33:18 +000054main_body:
Matt Arsenault3ea06332017-02-22 00:02:21 +000055 %tex = call float @llvm.amdgcn.image.load.f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false)
Changpeng Fang8236fe12016-11-14 18:33:18 +000056 ret float %tex
57}
58
Matt Arsenault3ea06332017-02-22 00:02:21 +000059; GCN-LABEL: {{^}}image_load_v2f32_v4i32:
60; GCN: image_load {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 unorm
61; GCN: s_waitcnt vmcnt(0)
62define amdgpu_ps <2 x float> @image_load_v2f32_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 {
Changpeng Fang8236fe12016-11-14 18:33:18 +000063main_body:
Matt Arsenault3ea06332017-02-22 00:02:21 +000064 %tex = call <2 x float> @llvm.amdgcn.image.load.v2f32.v4i32.v8i32(<4 x i32> %c, <8 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false)
Changpeng Fang8236fe12016-11-14 18:33:18 +000065 ret <2 x float> %tex
66}
67
Matt Arsenault3ea06332017-02-22 00:02:21 +000068; GCN-LABEL: {{^}}image_store_v4i32:
69; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm
70define amdgpu_ps void @image_store_v4i32(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) #0 {
Nicolai Haehnlef2c64db2016-02-18 16:44:18 +000071main_body:
Matt Arsenault3ea06332017-02-22 00:02:21 +000072 call void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
Nicolai Haehnlef2c64db2016-02-18 16:44:18 +000073 ret void
74}
75
Matt Arsenault3ea06332017-02-22 00:02:21 +000076; GCN-LABEL: {{^}}image_store_v2i32:
77; GCN: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm
78define amdgpu_ps void @image_store_v2i32(<8 x i32> inreg %rsrc, <4 x float> %data, <2 x i32> %coords) #0 {
Nicolai Haehnlef2c64db2016-02-18 16:44:18 +000079main_body:
Matt Arsenault3ea06332017-02-22 00:02:21 +000080 call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %data, <2 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
Nicolai Haehnlef2c64db2016-02-18 16:44:18 +000081 ret void
82}
83
Matt Arsenault3ea06332017-02-22 00:02:21 +000084; GCN-LABEL: {{^}}image_store_i32:
85; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
86define amdgpu_ps void @image_store_i32(<8 x i32> inreg %rsrc, <4 x float> %data, i32 %coords) #0 {
Nicolai Haehnlef2c64db2016-02-18 16:44:18 +000087main_body:
Matt Arsenault3ea06332017-02-22 00:02:21 +000088 call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %data, i32 %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
Nicolai Haehnlef2c64db2016-02-18 16:44:18 +000089 ret void
90}
91
Matt Arsenault3ea06332017-02-22 00:02:21 +000092; GCN-LABEL: {{^}}image_store_f32_i32:
93; GCN: image_store {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 unorm
94define amdgpu_ps void @image_store_f32_i32(<8 x i32> inreg %rsrc, float %data, i32 %coords) #0 {
Changpeng Fang8236fe12016-11-14 18:33:18 +000095main_body:
Matt Arsenault3ea06332017-02-22 00:02:21 +000096 call void @llvm.amdgcn.image.store.f32.i32.v8i32(float %data, i32 %coords, <8 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false)
Changpeng Fang8236fe12016-11-14 18:33:18 +000097 ret void
98}
99
Matt Arsenault3ea06332017-02-22 00:02:21 +0000100; GCN-LABEL: {{^}}image_store_v2f32_v4i32:
101; GCN: image_store {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 unorm
102define amdgpu_ps void @image_store_v2f32_v4i32(<8 x i32> inreg %rsrc, <2 x float> %data, <4 x i32> %coords) #0 {
Changpeng Fang8236fe12016-11-14 18:33:18 +0000103main_body:
Matt Arsenault3ea06332017-02-22 00:02:21 +0000104 call void @llvm.amdgcn.image.store.v2f32.v4i32.v8i32(<2 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false)
Changpeng Fang8236fe12016-11-14 18:33:18 +0000105 ret void
106}
107
Matt Arsenault3ea06332017-02-22 00:02:21 +0000108; GCN-LABEL: {{^}}image_store_mip:
109; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm
110define amdgpu_ps void @image_store_mip(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) #0 {
Nicolai Haehnlef2c64db2016-02-18 16:44:18 +0000111main_body:
Matt Arsenault3ea06332017-02-22 00:02:21 +0000112 call void @llvm.amdgcn.image.store.mip.v4f32.v4i32.v8i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
Nicolai Haehnlef2c64db2016-02-18 16:44:18 +0000113 ret void
114}
115
Matt Arsenault3ea06332017-02-22 00:02:21 +0000116; GCN-LABEL: {{^}}getresinfo:
117; GCN: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
118define amdgpu_ps void @getresinfo() #0 {
Tom Stellardfac248c2016-10-12 16:35:29 +0000119main_body:
Matt Arsenault3ea06332017-02-22 00:02:21 +0000120 %r = call <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32 undef, <8 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false)
Tom Stellardfac248c2016-10-12 16:35:29 +0000121 %r0 = extractelement <4 x float> %r, i32 0
122 %r1 = extractelement <4 x float> %r, i32 1
123 %r2 = extractelement <4 x float> %r, i32 2
124 %r3 = extractelement <4 x float> %r, i32 3
Matt Arsenault3ea06332017-02-22 00:02:21 +0000125 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r0, float %r1, float %r2, float %r3, i1 true, i1 true) #0
Tom Stellardfac248c2016-10-12 16:35:29 +0000126 ret void
127}
128
Nicolai Haehnlef2c64db2016-02-18 16:44:18 +0000129; Ideally, the register allocator would avoid the wait here
130;
Matt Arsenault3ea06332017-02-22 00:02:21 +0000131; GCN-LABEL: {{^}}image_store_wait:
132; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
133; GCN: s_waitcnt vmcnt(0) expcnt(0)
134; GCN: image_load v[0:3], v4, s[8:15] dmask:0xf unorm
135; GCN: s_waitcnt vmcnt(0)
136; GCN: image_store v[0:3], v4, s[16:23] dmask:0xf unorm
137define amdgpu_ps void @image_store_wait(<8 x i32> inreg %arg, <8 x i32> inreg %arg1, <8 x i32> inreg %arg2, <4 x float> %arg3, i32 %arg4) #0 {
Nicolai Haehnlef2c64db2016-02-18 16:44:18 +0000138main_body:
Matt Arsenault3ea06332017-02-22 00:02:21 +0000139 call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %arg3, i32 %arg4, <8 x i32> %arg, i32 15, i1 false, i1 false, i1 false, i1 false)
140 %data = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %arg4, <8 x i32> %arg1, i32 15, i1 false, i1 false, i1 false, i1 false)
141 call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %data, i32 %arg4, <8 x i32> %arg2, i32 15, i1 false, i1 false, i1 false, i1 false)
Nicolai Haehnlef2c64db2016-02-18 16:44:18 +0000142 ret void
143}
144
Tom Stellard244891d2016-12-20 15:52:17 +0000145; SI won't merge ds memory operations, because of the signed offset bug, so
146; we only have check lines for VI.
147; VI-LABEL: image_load_mmo
148; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
149; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4
Matt Arsenault3ea06332017-02-22 00:02:21 +0000150define amdgpu_ps void @image_load_mmo(float addrspace(3)* %lds, <2 x i32> %c, <8 x i32> inreg %rsrc) #0 {
151bb:
152 store float 0.000000e+00, float addrspace(3)* %lds
153 %tex = call float @llvm.amdgcn.image.load.f32.v2i32.v8i32(<2 x i32> %c, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false)
Tom Stellard244891d2016-12-20 15:52:17 +0000154 %tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4
Matt Arsenault3ea06332017-02-22 00:02:21 +0000155 store float 0.000000e+00, float addrspace(3)* %tmp2
156 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tex, float %tex, float %tex, float %tex, i1 true, i1 true) #0
Tom Stellard244891d2016-12-20 15:52:17 +0000157 ret void
158}
Changpeng Fang8236fe12016-11-14 18:33:18 +0000159
160declare float @llvm.amdgcn.image.load.f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
161declare <2 x float> @llvm.amdgcn.image.load.v2f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
162declare void @llvm.amdgcn.image.store.f32.i32.v8i32(float, i32, <8 x i32>, i32, i1, i1, i1, i1) #0
Matt Arsenault3ea06332017-02-22 00:02:21 +0000163
164
Changpeng Fang8236fe12016-11-14 18:33:18 +0000165declare void @llvm.amdgcn.image.store.v2f32.v4i32.v8i32(<2 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0
Tom Stellardfac248c2016-10-12 16:35:29 +0000166declare void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float>, i32, <8 x i32>, i32, i1, i1, i1, i1) #0
167declare void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float>, <2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0
168declare void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0
169declare void @llvm.amdgcn.image.store.mip.v4f32.v4i32.v8i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0
Nicolai Haehnlef2c64db2016-02-18 16:44:18 +0000170
Tom Stellardfac248c2016-10-12 16:35:29 +0000171declare <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #1
172declare <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
173declare <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
174declare <4 x float> @llvm.amdgcn.image.load.mip.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
Matt Arsenault3ea06332017-02-22 00:02:21 +0000175declare <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #1
Tom Stellardfac248c2016-10-12 16:35:29 +0000176
Matt Arsenault3ea06332017-02-22 00:02:21 +0000177declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
Nicolai Haehnlef2c64db2016-02-18 16:44:18 +0000178
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000179attributes #0 = { nounwind }
180attributes #1 = { nounwind readonly }