blob: 5a0d87f5186d626a1722a72ae202aa302a1590dc [file] [log] [blame]
Matt Arsenaultcaf0ed42017-11-30 00:52:40 +00001; RUN: llc -march=amdgcn -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SICIVI %s
2; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SICIVI %s
3; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
Matt Arsenault0774ea22017-04-24 19:40:59 +00004
5; Test addressing modes when the scratch base is not a frame index.
6
7; GCN-LABEL: {{^}}store_private_offset_i8:
Geoff Berry4e38e022017-08-17 04:04:11 +00008; GCN: buffer_store_byte v{{[0-9]+}}, off, s[4:7], s2 offset:8
Matt Arsenault0774ea22017-04-24 19:40:59 +00009define amdgpu_kernel void @store_private_offset_i8() #0 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000010 store volatile i8 5, i8 addrspace(5)* inttoptr (i32 8 to i8 addrspace(5)*)
Matt Arsenault0774ea22017-04-24 19:40:59 +000011 ret void
12}
13
14; GCN-LABEL: {{^}}store_private_offset_i16:
Geoff Berry4e38e022017-08-17 04:04:11 +000015; GCN: buffer_store_short v{{[0-9]+}}, off, s[4:7], s2 offset:8
Matt Arsenault0774ea22017-04-24 19:40:59 +000016define amdgpu_kernel void @store_private_offset_i16() #0 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000017 store volatile i16 5, i16 addrspace(5)* inttoptr (i32 8 to i16 addrspace(5)*)
Matt Arsenault0774ea22017-04-24 19:40:59 +000018 ret void
19}
20
21; GCN-LABEL: {{^}}store_private_offset_i32:
Geoff Berry4e38e022017-08-17 04:04:11 +000022; GCN: buffer_store_dword v{{[0-9]+}}, off, s[4:7], s2 offset:8
Matt Arsenault0774ea22017-04-24 19:40:59 +000023define amdgpu_kernel void @store_private_offset_i32() #0 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000024 store volatile i32 5, i32 addrspace(5)* inttoptr (i32 8 to i32 addrspace(5)*)
Matt Arsenault0774ea22017-04-24 19:40:59 +000025 ret void
26}
27
28; GCN-LABEL: {{^}}store_private_offset_v2i32:
Geoff Berry4e38e022017-08-17 04:04:11 +000029; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s2 offset:8
Matt Arsenault0774ea22017-04-24 19:40:59 +000030define amdgpu_kernel void @store_private_offset_v2i32() #0 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000031 store volatile <2 x i32> <i32 5, i32 10>, <2 x i32> addrspace(5)* inttoptr (i32 8 to <2 x i32> addrspace(5)*)
Matt Arsenault0774ea22017-04-24 19:40:59 +000032 ret void
33}
34
35; GCN-LABEL: {{^}}store_private_offset_v4i32:
Geoff Berry4e38e022017-08-17 04:04:11 +000036; GCN: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s2 offset:8
Matt Arsenault0774ea22017-04-24 19:40:59 +000037define amdgpu_kernel void @store_private_offset_v4i32() #0 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000038 store volatile <4 x i32> <i32 5, i32 10, i32 15, i32 0>, <4 x i32> addrspace(5)* inttoptr (i32 8 to <4 x i32> addrspace(5)*)
Matt Arsenault0774ea22017-04-24 19:40:59 +000039 ret void
40}
41
42; GCN-LABEL: {{^}}load_private_offset_i8:
Geoff Berry4e38e022017-08-17 04:04:11 +000043; GCN: buffer_load_ubyte v{{[0-9]+}}, off, s[4:7], s2 offset:8
Matt Arsenault0774ea22017-04-24 19:40:59 +000044define amdgpu_kernel void @load_private_offset_i8() #0 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000045 %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 8 to i8 addrspace(5)*)
Matt Arsenault0774ea22017-04-24 19:40:59 +000046 ret void
47}
48
49; GCN-LABEL: {{^}}sextload_private_offset_i8:
Geoff Berry4e38e022017-08-17 04:04:11 +000050; GCN: buffer_load_sbyte v{{[0-9]+}}, off, s[4:7], s8 offset:8
Matt Arsenault0774ea22017-04-24 19:40:59 +000051define amdgpu_kernel void @sextload_private_offset_i8(i32 addrspace(1)* %out) #0 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000052 %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 8 to i8 addrspace(5)*)
Matt Arsenault0774ea22017-04-24 19:40:59 +000053 %sextload = sext i8 %load to i32
54 store i32 %sextload, i32 addrspace(1)* undef
55 ret void
56}
57
58; GCN-LABEL: {{^}}zextload_private_offset_i8:
Geoff Berry4e38e022017-08-17 04:04:11 +000059; GCN: buffer_load_ubyte v{{[0-9]+}}, off, s[4:7], s8 offset:8
Matt Arsenault0774ea22017-04-24 19:40:59 +000060define amdgpu_kernel void @zextload_private_offset_i8(i32 addrspace(1)* %out) #0 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000061 %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 8 to i8 addrspace(5)*)
Matt Arsenault0774ea22017-04-24 19:40:59 +000062 %zextload = zext i8 %load to i32
63 store i32 %zextload, i32 addrspace(1)* undef
64 ret void
65}
66
67; GCN-LABEL: {{^}}load_private_offset_i16:
Geoff Berry4e38e022017-08-17 04:04:11 +000068; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[4:7], s2 offset:8
Matt Arsenault0774ea22017-04-24 19:40:59 +000069define amdgpu_kernel void @load_private_offset_i16() #0 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000070 %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 8 to i16 addrspace(5)*)
Matt Arsenault0774ea22017-04-24 19:40:59 +000071 ret void
72}
73
74; GCN-LABEL: {{^}}sextload_private_offset_i16:
Geoff Berry4e38e022017-08-17 04:04:11 +000075; GCN: buffer_load_sshort v{{[0-9]+}}, off, s[4:7], s8 offset:8
Matt Arsenault0774ea22017-04-24 19:40:59 +000076define amdgpu_kernel void @sextload_private_offset_i16(i32 addrspace(1)* %out) #0 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000077 %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 8 to i16 addrspace(5)*)
Matt Arsenault0774ea22017-04-24 19:40:59 +000078 %sextload = sext i16 %load to i32
79 store i32 %sextload, i32 addrspace(1)* undef
80 ret void
81}
82
83; GCN-LABEL: {{^}}zextload_private_offset_i16:
Geoff Berry4e38e022017-08-17 04:04:11 +000084; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[4:7], s8 offset:8
Matt Arsenault0774ea22017-04-24 19:40:59 +000085define amdgpu_kernel void @zextload_private_offset_i16(i32 addrspace(1)* %out) #0 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000086 %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 8 to i16 addrspace(5)*)
Matt Arsenault0774ea22017-04-24 19:40:59 +000087 %zextload = zext i16 %load to i32
88 store i32 %zextload, i32 addrspace(1)* undef
89 ret void
90}
91
92; GCN-LABEL: {{^}}load_private_offset_i32:
Geoff Berry4e38e022017-08-17 04:04:11 +000093; GCN: buffer_load_dword v{{[0-9]+}}, off, s[4:7], s2 offset:8
Matt Arsenault0774ea22017-04-24 19:40:59 +000094define amdgpu_kernel void @load_private_offset_i32() #0 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +000095 %load = load volatile i32, i32 addrspace(5)* inttoptr (i32 8 to i32 addrspace(5)*)
Matt Arsenault0774ea22017-04-24 19:40:59 +000096 ret void
97}
98
99; GCN-LABEL: {{^}}load_private_offset_v2i32:
Geoff Berry4e38e022017-08-17 04:04:11 +0000100; GCN: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s2 offset:8
Matt Arsenault0774ea22017-04-24 19:40:59 +0000101define amdgpu_kernel void @load_private_offset_v2i32() #0 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000102 %load = load volatile <2 x i32>, <2 x i32> addrspace(5)* inttoptr (i32 8 to <2 x i32> addrspace(5)*)
Matt Arsenault0774ea22017-04-24 19:40:59 +0000103 ret void
104}
105
106; GCN-LABEL: {{^}}load_private_offset_v4i32:
Geoff Berry4e38e022017-08-17 04:04:11 +0000107; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s2 offset:8
Matt Arsenault0774ea22017-04-24 19:40:59 +0000108define amdgpu_kernel void @load_private_offset_v4i32() #0 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000109 %load = load volatile <4 x i32>, <4 x i32> addrspace(5)* inttoptr (i32 8 to <4 x i32> addrspace(5)*)
Matt Arsenault0774ea22017-04-24 19:40:59 +0000110 ret void
111}
112
113; GCN-LABEL: {{^}}store_private_offset_i8_max_offset:
Geoff Berry4e38e022017-08-17 04:04:11 +0000114; GCN: buffer_store_byte v{{[0-9]+}}, off, s[4:7], s2 offset:4095
Matt Arsenault0774ea22017-04-24 19:40:59 +0000115define amdgpu_kernel void @store_private_offset_i8_max_offset() #0 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000116 store volatile i8 5, i8 addrspace(5)* inttoptr (i32 4095 to i8 addrspace(5)*)
Matt Arsenault0774ea22017-04-24 19:40:59 +0000117 ret void
118}
119
120; GCN-LABEL: {{^}}store_private_offset_i8_max_offset_plus1:
121; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1000
Geoff Berry4e38e022017-08-17 04:04:11 +0000122; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[4:7], s2 offen{{$}}
Matt Arsenault0774ea22017-04-24 19:40:59 +0000123define amdgpu_kernel void @store_private_offset_i8_max_offset_plus1() #0 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000124 store volatile i8 5, i8 addrspace(5)* inttoptr (i32 4096 to i8 addrspace(5)*)
Matt Arsenault0774ea22017-04-24 19:40:59 +0000125 ret void
126}
127
128; GCN-LABEL: {{^}}store_private_offset_i8_max_offset_plus2:
129; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1000
Geoff Berry4e38e022017-08-17 04:04:11 +0000130; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[4:7], s2 offen offset:1{{$}}
Matt Arsenault0774ea22017-04-24 19:40:59 +0000131define amdgpu_kernel void @store_private_offset_i8_max_offset_plus2() #0 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000132 store volatile i8 5, i8 addrspace(5)* inttoptr (i32 4097 to i8 addrspace(5)*)
Matt Arsenault0774ea22017-04-24 19:40:59 +0000133 ret void
134}
135
Matt Arsenaultcaf0ed42017-11-30 00:52:40 +0000136; MUBUF used for stack access has bounds checking enabled before gfx9,
137; so a possibly negative base index can't be used for the vgpr offset.
138
139; GCN-LABEL: {{^}}store_private_unknown_bits_vaddr:
140; SICIVI: v_add_{{i|u}}32_e32 [[ADDR0:v[0-9]+]], vcc, 4
141; SICIVI: v_add_{{i|u}}32_e32 [[ADDR1:v[0-9]+]], vcc, 32, [[ADDR0]]
142; SICIVI: buffer_store_dword v{{[0-9]+}}, [[ADDR1]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
143
Matt Arsenault84445dd2017-11-30 22:51:26 +0000144; GFX9: v_add_u32_e32 [[ADDR:v[0-9]+]], 4,
Matt Arsenaultcaf0ed42017-11-30 00:52:40 +0000145; GFX9: buffer_store_dword v{{[0-9]+}}, [[ADDR]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:32
146define amdgpu_kernel void @store_private_unknown_bits_vaddr() #0 {
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000147 %alloca = alloca [16 x i32], align 4, addrspace(5)
Matt Arsenaultcaf0ed42017-11-30 00:52:40 +0000148 %vaddr = load volatile i32, i32 addrspace(1)* undef
149 %vaddr.off = add i32 %vaddr, 8
Yaxun Liu2a22c5d2018-02-02 16:07:16 +0000150 %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %vaddr.off
151 store volatile i32 9, i32 addrspace(5)* %gep
Matt Arsenaultcaf0ed42017-11-30 00:52:40 +0000152 ret void
153}
154
Matt Arsenault0774ea22017-04-24 19:40:59 +0000155attributes #0 = { nounwind }