blob: f8737e6f572716ca897b00f5f2ebd2248d012eb2 [file] [log] [blame]
Tom Stellard919bb6b2014-04-29 23:12:53 +00001; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
Matt Arsenault2ba54c32013-10-30 23:30:05 +00002
Matt Arsenault24aa0282014-07-26 21:21:42 +00003; FIXME: This is probably wrong. This probably needs to expand to 8-bit reads and writes.
Tom Stellard79243d92014-10-01 17:15:17 +00004; SI-LABEL: {{^}}unaligned_load_store_i32:
Tom Stellard326d6ec2014-11-05 14:50:53 +00005; SI: ds_read_u16
6; SI: ds_read_u16
7; SI: ds_write_b32
8; SI: s_endpgm
Matt Arsenault2ba54c32013-10-30 23:30:05 +00009define void @unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind {
10 %v = load i32 addrspace(3)* %p, align 1
11 store i32 %v, i32 addrspace(3)* %r, align 1
12 ret void
13}
14
Tom Stellard79243d92014-10-01 17:15:17 +000015; SI-LABEL: {{^}}unaligned_load_store_v4i32:
Tom Stellard326d6ec2014-11-05 14:50:53 +000016; SI: ds_read_u16
17; SI: ds_read_u16
18; SI: ds_read_u16
19; SI: ds_read_u16
20; SI: ds_read_u16
21; SI: ds_read_u16
22; SI: ds_read_u16
23; SI: ds_read_u16
24; SI: ds_write_b32
25; SI: ds_write_b32
26; SI: ds_write_b32
27; SI: ds_write_b32
28; SI: s_endpgm
Matt Arsenault2ba54c32013-10-30 23:30:05 +000029define void @unaligned_load_store_v4i32(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) nounwind {
30 %v = load <4 x i32> addrspace(3)* %p, align 1
31 store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1
32 ret void
33}
Matt Arsenault6f2a5262014-07-27 17:46:40 +000034
Tom Stellard79243d92014-10-01 17:15:17 +000035; SI-LABEL: {{^}}load_lds_i64_align_4:
Tom Stellard326d6ec2014-11-05 14:50:53 +000036; SI: ds_read2_b32
37; SI: s_endpgm
Matt Arsenault6f2a5262014-07-27 17:46:40 +000038define void @load_lds_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
39 %val = load i64 addrspace(3)* %in, align 4
40 store i64 %val, i64 addrspace(1)* %out, align 8
41 ret void
42}
43
Matt Arsenault61cc9082014-10-10 22:16:07 +000044; SI-LABEL: {{^}}load_lds_i64_align_4_with_offset
Tom Stellard326d6ec2014-11-05 14:50:53 +000045; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:8 offset1:9
46; SI: s_endpgm
Tom Stellardf3fc5552014-08-22 18:49:35 +000047define void @load_lds_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
48 %ptr = getelementptr i64 addrspace(3)* %in, i32 4
49 %val = load i64 addrspace(3)* %ptr, align 4
50 store i64 %val, i64 addrspace(1)* %out, align 8
51 ret void
52}
53
Tom Stellard79243d92014-10-01 17:15:17 +000054; SI-LABEL: {{^}}load_lds_i64_align_4_with_split_offset:
Tom Stellardf3fc5552014-08-22 18:49:35 +000055; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
Tom Stellard326d6ec2014-11-05 14:50:53 +000056; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:0 offset1:1
57; SI: s_endpgm
Tom Stellardf3fc5552014-08-22 18:49:35 +000058define void @load_lds_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
59 %ptr = bitcast i64 addrspace(3)* %in to i32 addrspace(3)*
60 %ptr255 = getelementptr i32 addrspace(3)* %ptr, i32 255
61 %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)*
62 %val = load i64 addrspace(3)* %ptri64, align 4
63 store i64 %val, i64 addrspace(1)* %out, align 8
64 ret void
65}
66
Matt Arsenault6f2a5262014-07-27 17:46:40 +000067; FIXME: Need to fix this case.
68; define void @load_lds_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
69; %val = load i64 addrspace(3)* %in, align 1
70; store i64 %val, i64 addrspace(1)* %out, align 8
71; ret void
72; }
Tom Stellardf3fc5552014-08-22 18:49:35 +000073
Tom Stellard79243d92014-10-01 17:15:17 +000074; SI-LABEL: {{^}}store_lds_i64_align_4:
Tom Stellard326d6ec2014-11-05 14:50:53 +000075; SI: ds_write2_b32
76; SI: s_endpgm
Tom Stellardf3fc5552014-08-22 18:49:35 +000077define void @store_lds_i64_align_4(i64 addrspace(3)* %out, i64 %val) #0 {
78 store i64 %val, i64 addrspace(3)* %out, align 4
79 ret void
80}
81
Matt Arsenault61cc9082014-10-10 22:16:07 +000082; SI-LABEL: {{^}}store_lds_i64_align_4_with_offset
Tom Stellard326d6ec2014-11-05 14:50:53 +000083; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:8 offset1:9
84; SI: s_endpgm
Tom Stellardf3fc5552014-08-22 18:49:35 +000085define void @store_lds_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 {
86 %ptr = getelementptr i64 addrspace(3)* %out, i32 4
87 store i64 0, i64 addrspace(3)* %ptr, align 4
88 ret void
89}
90
Tom Stellard79243d92014-10-01 17:15:17 +000091; SI-LABEL: {{^}}store_lds_i64_align_4_with_split_offset:
Tom Stellardf3fc5552014-08-22 18:49:35 +000092; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
Tom Stellard326d6ec2014-11-05 14:50:53 +000093; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
94; SI: s_endpgm
Tom Stellardf3fc5552014-08-22 18:49:35 +000095define void @store_lds_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 {
96 %ptr = bitcast i64 addrspace(3)* %out to i32 addrspace(3)*
97 %ptr255 = getelementptr i32 addrspace(3)* %ptr, i32 255
98 %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)*
99 store i64 0, i64 addrspace(3)* %out, align 4
100 ret void
101}