blob: c50b508ed92fea27556be65a079312ab4bfb7788 [file] [log] [blame]
Changpeng Fang4737e892018-01-18 22:08:53 +00001; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=UNPACKED %s
2; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s
3; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s
4
5
6; GCN-LABEL: {{^}}image_sample_f16:
7; GCN: image_sample v[[HALF:[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1 d16
8
9; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]]
10
11; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]]
12
13; GFX9: global_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]], off
14define amdgpu_kernel void @image_sample_f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) {
15main_body:
16 %tex = call half @llvm.amdgcn.image.sample.f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 1, i1 0, i1 0, i1 0, i1 0, i1 0)
17 store half %tex, half addrspace(1)* %out
18 ret void
19}
20
21; GCN-LABEL: {{^}}image_sample_v2f16:
22; UNPACKED: image_sample v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 d16
23; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]]
24
25; PACKED: image_sample v[[DATA:[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 d16
26
27; GFX81: v_lshrrev_b32_e32 v[[HI:[0-9]+]], 16, v[[DATA]]
28; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]]
29
30; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[DATA]], off
31define amdgpu_kernel void @image_sample_v2f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) {
32main_body:
33 %tex = call <2 x half> @llvm.amdgcn.image.sample.v2f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 3, i1 0, i1 0, i1 0, i1 0, i1 0)
34 %elt = extractelement <2 x half> %tex, i32 1
35 store half %elt, half addrspace(1)* %out
36 ret void
37}
38
39; GCN-LABEL: {{^}}image_sample_v4f16:
40; UNPACKED: image_sample v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16
41; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]]
42
43; PACKED: image_sample v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16
Changpeng Fang4737e892018-01-18 22:08:53 +000044
Matt Arsenault762d4982018-05-09 18:37:39 +000045; GFX81: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]]
Changpeng Fang4737e892018-01-18 22:08:53 +000046; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]]
47
Matt Arsenault762d4982018-05-09 18:37:39 +000048; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[HI]], off
Changpeng Fang4737e892018-01-18 22:08:53 +000049define amdgpu_kernel void @image_sample_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) {
50main_body:
51 %tex = call <4 x half> @llvm.amdgcn.image.sample.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0)
52 %elt = extractelement <4 x half> %tex, i32 3
53 store half %elt, half addrspace(1)* %out
54 ret void
55}
56
57; GCN-LABEL: {{^}}image_sample_cl_v4f16:
58; UNPACKED: image_sample_cl v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16
59; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]]
60
61; PACKED: image_sample_cl v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16
Changpeng Fang4737e892018-01-18 22:08:53 +000062
Matt Arsenault762d4982018-05-09 18:37:39 +000063; GFX81: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]]
Changpeng Fang4737e892018-01-18 22:08:53 +000064; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]]
65
Matt Arsenault762d4982018-05-09 18:37:39 +000066; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[HI]], off
Changpeng Fang4737e892018-01-18 22:08:53 +000067define amdgpu_kernel void @image_sample_cl_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) {
68main_body:
69 %tex = call <4 x half> @llvm.amdgcn.image.sample.cl.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0)
70 %elt = extractelement <4 x half> %tex, i32 3
71 store half %elt, half addrspace(1)* %out
72 ret void
73}
74
75; GCN-LABEL: {{^}}image_sample_c_v4f16:
76; UNPACKED: image_sample_c v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16
77; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]]
78
79; PACKED: image_sample_c v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16
Changpeng Fang4737e892018-01-18 22:08:53 +000080
Matt Arsenault762d4982018-05-09 18:37:39 +000081; GFX81: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]]
Changpeng Fang4737e892018-01-18 22:08:53 +000082; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]]
83
Matt Arsenault762d4982018-05-09 18:37:39 +000084; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[HI]], off
Changpeng Fang4737e892018-01-18 22:08:53 +000085define amdgpu_kernel void @image_sample_c_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) {
86main_body:
87 %tex = call <4 x half> @llvm.amdgcn.image.sample.c.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0)
88 %elt = extractelement <4 x half> %tex, i32 3
89 store half %elt, half addrspace(1)* %out
90 ret void
91}
92
93; GCN-LABEL: {{^}}image_sample_o_v4f16:
94; UNPACKED: image_sample_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16
95; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]]
96
97; PACKED: image_sample_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16
Changpeng Fang4737e892018-01-18 22:08:53 +000098
Matt Arsenault762d4982018-05-09 18:37:39 +000099; GFX81: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]]
Changpeng Fang4737e892018-01-18 22:08:53 +0000100; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]]
Matt Arsenault762d4982018-05-09 18:37:39 +0000101
102; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[HI]], off
Changpeng Fang4737e892018-01-18 22:08:53 +0000103define amdgpu_kernel void @image_sample_o_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) {
104main_body:
105 %tex = call <4 x half> @llvm.amdgcn.image.sample.o.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0)
106 %elt = extractelement <4 x half> %tex, i32 3
107 store half %elt, half addrspace(1)* %out
108 ret void
109}
110
111; GCN-LABEL: {{^}}image_sample_c_o_v4f16:
112; UNPACKED: image_sample_c_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16
113; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]]
114
115; PACKED: image_sample_c_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16
Changpeng Fang4737e892018-01-18 22:08:53 +0000116
Matt Arsenault762d4982018-05-09 18:37:39 +0000117; GFX81: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]]
Changpeng Fang4737e892018-01-18 22:08:53 +0000118; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]]
Matt Arsenault762d4982018-05-09 18:37:39 +0000119
120; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[HI]], off
Changpeng Fang4737e892018-01-18 22:08:53 +0000121define amdgpu_kernel void @image_sample_c_o_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) {
122main_body:
123 %tex = call <4 x half> @llvm.amdgcn.image.sample.c.o.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0)
124 %elt = extractelement <4 x half> %tex, i32 3
125 store half %elt, half addrspace(1)* %out
126 ret void
127}
128
129declare half @llvm.amdgcn.image.sample.f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1)
130declare <2 x half> @llvm.amdgcn.image.sample.v2f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1)
131declare <4 x half> @llvm.amdgcn.image.sample.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1)
132
133
134declare <4 x half> @llvm.amdgcn.image.sample.cl.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1)
135declare <4 x half> @llvm.amdgcn.image.sample.c.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1)
136declare <4 x half> @llvm.amdgcn.image.sample.o.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1)
137declare <4 x half> @llvm.amdgcn.image.sample.c.o.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1)