blob: f43a799de2bf537981c2d905ea70f64bf03d6d0a [file] [log] [blame]
Nicolai Haehnle2f5a7382018-04-04 10:58:54 +00001; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=UNPACKED %s
2; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s
3; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s
4
5; GCN-LABEL: {{^}}image_load_f16:
6; GCN: image_load v0, v[0:1], s[0:7] dmask:0x1 unorm d16{{$}}
7define amdgpu_ps half @image_load_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
8main_body:
9 %tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
10 ret half %tex
11}
12
13; GCN-LABEL: {{^}}image_load_v2f16:
14; UNPACKED: image_load v[0:1], v[0:1], s[0:7] dmask:0x3 unorm d16{{$}}
15; PACKED: image_load v0, v[0:1], s[0:7] dmask:0x3 unorm d16{{$}}
16define amdgpu_ps float @image_load_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
17main_body:
18 %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
19 %r = bitcast <2 x half> %tex to float
20 ret float %r
21}
22
23; GCN-LABEL: {{^}}image_load_v4f16:
24; UNPACKED: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
25; PACKED: image_load v[0:1], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
26define amdgpu_ps <2 x float> @image_load_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
27main_body:
28 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
29 %r = bitcast <4 x half> %tex to <2 x float>
30 ret <2 x float> %r
31}
32
33; GCN-LABEL: {{^}}image_load_mip_v4f16:
34; UNPACKED: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm d16{{$}}
35; PACKED: image_load_mip v[0:1], v[0:3], s[0:7] dmask:0xf unorm d16{{$}}
36define amdgpu_ps <2 x float> @image_load_mip_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
37main_body:
38 %tex = call <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32(i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
39 %r = bitcast <4 x half> %tex to <2 x float>
40 ret <2 x float> %r
41}
42
43; GCN-LABEL: {{^}}image_load_3d_v2f16:
44; UNPACKED: image_load v[0:1], v[0:3], s[0:7] dmask:0x3 unorm d16{{$}}
45; PACKED: image_load v0, v[0:3], s[0:7] dmask:0x3 unorm d16{{$}}
46define amdgpu_ps float @image_load_3d_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
47main_body:
48 %tex = call <2 x half> @llvm.amdgcn.image.load.3d.v2f16.i32(i32 3, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
49 %x = bitcast <2 x half> %tex to float
50 ret float %x
51}
52
53; GCN-LABEL: {{^}}image_store_f16
54; GCN: image_store v2, v[0:1], s[0:7] dmask:0x1 unorm d16{{$}}
55define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, half %data) {
56main_body:
57 call void @llvm.amdgcn.image.store.2d.f16.i32(half %data, i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
58 ret void
59}
60
61; GCN-LABEL: {{^}}image_store_v2f16
62; UNPACKED: v_lshrrev_b32_e32
63; UNPACKED: v_and_b32_e32
64; UNPACKED: image_store v[{{[0-9:]+}}], v[0:1], s[0:7] dmask:0x3 unorm d16{{$}}
65; PACKED: image_store v2, v[0:1], s[0:7] dmask:0x3 unorm d16{{$}}
66define amdgpu_ps void @image_store_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, float %in) {
67main_body:
68 %data = bitcast float %in to <2 x half>
69 call void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half> %data, i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
70 ret void
71}
72
73; GCN-LABEL: {{^}}image_store_v4f16
74; UNPACKED: v_lshrrev_b32_e32
75; UNPACKED: v_and_b32_e32
76; UNPACKED: v_lshrrev_b32_e32
77; UNPACKED: v_and_b32_e32
78; UNPACKED: image_store v[{{[0-9:]+}}], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
79; PACKED: image_store v[2:3], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
80define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <2 x float> %in) {
81main_body:
82 %data = bitcast <2 x float> %in to <4 x half>
83 call void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half> %data, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
84 ret void
85}
86
87; GCN-LABEL: {{^}}image_store_mip_1d_v4f16
88; UNPACKED: v_lshrrev_b32_e32
89; UNPACKED: v_and_b32_e32
90; UNPACKED: v_lshrrev_b32_e32
91; UNPACKED: v_and_b32_e32
92; UNPACKED: image_store_mip v[{{[0-9:]+}}], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
93; PACKED: image_store_mip v[2:3], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
94define amdgpu_ps void @image_store_mip_1d_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %mip, <2 x float> %in) {
95main_body:
96 %data = bitcast <2 x float> %in to <4 x half>
97 call void @llvm.amdgcn.image.store.mip.1d.v4f16.i32(<4 x half> %data, i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
98 ret void
99}
100
101declare half @llvm.amdgcn.image.load.2d.f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
102declare <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
103declare <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
104declare <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
105declare <2 x half> @llvm.amdgcn.image.load.3d.v2f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
106
107declare void @llvm.amdgcn.image.store.2d.f16.i32(half, i32, i32, i32, <8 x i32>, i32, i32) #0
108declare void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0
109declare void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0
110declare void @llvm.amdgcn.image.store.mip.1d.v4f16.i32(<4 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0
111declare void @llvm.amdgcn.image.store.3d.v2f16.i32(<2 x half>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
112
113attributes #0 = { nounwind }
114attributes #1 = { nounwind readonly }
115attributes #2 = { nounwind readnone }