blob: d74bd5aa15ac1354ae818dfa840f94a0bd1334ab [file] [log] [blame]
Matt Arsenault966a94f2015-09-08 19:34:22 +00001; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
2
Matt Arsenault9c47dd52016-02-11 06:02:01 +00003declare i32 @llvm.amdgcn.workitem.id.x() #0
Matt Arsenault966a94f2015-09-08 19:34:22 +00004
5@lds.obj = addrspace(3) global [256 x i32] undef, align 4
6
7; GCN-LABEL: {{^}}write_ds_sub0_offset0_global:
8; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 2, v0
9; GCN: v_sub_i32_e32 [[BASEPTR:v[0-9]+]], vcc, 0, [[SHL]]
10; GCN: v_mov_b32_e32 [[VAL:v[0-9]+]], 0x7b
11; GCN: ds_write_b32 [[BASEPTR]], [[VAL]] offset:12
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000012define amdgpu_kernel void @write_ds_sub0_offset0_global() #0 {
Matt Arsenault966a94f2015-09-08 19:34:22 +000013entry:
Matt Arsenault9c47dd52016-02-11 06:02:01 +000014 %x.i = call i32 @llvm.amdgcn.workitem.id.x() #1
Matt Arsenault966a94f2015-09-08 19:34:22 +000015 %sub1 = sub i32 0, %x.i
16 %tmp0 = getelementptr [256 x i32], [256 x i32] addrspace(3)* @lds.obj, i32 0, i32 %sub1
17 %arrayidx = getelementptr inbounds i32, i32 addrspace(3)* %tmp0, i32 3
18 store i32 123, i32 addrspace(3)* %arrayidx
19 ret void
20}
21
22; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_max_offset:
23; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0
24; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]]
25; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13
26; GCN: ds_write_b8 [[NEG]], [[K]] offset:65535
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000027define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset() #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000028 %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
Matt Arsenault966a94f2015-09-08 19:34:22 +000029 %neg = sub i32 0, %x.i
30 %shl = shl i32 %neg, 2
31 %add = add i32 65535, %shl
32 %ptr = inttoptr i32 %add to i8 addrspace(3)*
33 store i8 13, i8 addrspace(3)* %ptr
34 ret void
35}
36
37; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_max_offset_p1:
38; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0
39; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0x10000, [[SCALED]]
40; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13
41; GCN: ds_write_b8 [[NEG]], [[K]]{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000042define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_p1() #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000043 %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
Matt Arsenault966a94f2015-09-08 19:34:22 +000044 %neg = sub i32 0, %x.i
45 %shl = shl i32 %neg, 2
46 %add = add i32 65536, %shl
47 %ptr = inttoptr i32 %add to i8 addrspace(3)*
48 store i8 13, i8 addrspace(3)* %ptr
49 ret void
50}
51
52; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_multi_use:
53; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0
54; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]]
55; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13
56; GCN-NOT: v_sub
57; GCN: ds_write_b32 [[NEG]], [[K]] offset:123{{$}}
58; GCN-NOT: v_sub
59; GCN: ds_write_b32 [[NEG]], [[K]] offset:456{{$}}
60; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000061define amdgpu_kernel void @add_x_shl_neg_to_sub_multi_use() #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000062 %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
Matt Arsenault966a94f2015-09-08 19:34:22 +000063 %neg = sub i32 0, %x.i
64 %shl = shl i32 %neg, 2
65 %add0 = add i32 123, %shl
66 %add1 = add i32 456, %shl
67 %ptr0 = inttoptr i32 %add0 to i32 addrspace(3)*
68 store volatile i32 13, i32 addrspace(3)* %ptr0
69 %ptr1 = inttoptr i32 %add1 to i32 addrspace(3)*
70 store volatile i32 13, i32 addrspace(3)* %ptr1
71 ret void
72}
73
74; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_multi_use_same_offset:
75; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0
76; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]]
77; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13
78; GCN-NOT: v_sub
79; GCN: ds_write_b32 [[NEG]], [[K]] offset:123{{$}}
80; GCN-NOT: v_sub
81; GCN: ds_write_b32 [[NEG]], [[K]] offset:123{{$}}
82; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000083define amdgpu_kernel void @add_x_shl_neg_to_sub_multi_use_same_offset() #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000084 %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
Matt Arsenault966a94f2015-09-08 19:34:22 +000085 %neg = sub i32 0, %x.i
86 %shl = shl i32 %neg, 2
87 %add = add i32 123, %shl
88 %ptr = inttoptr i32 %add to i32 addrspace(3)*
89 store volatile i32 13, i32 addrspace(3)* %ptr
90 store volatile i32 13, i32 addrspace(3)* %ptr
91 ret void
92}
93
94; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_misaligned_i64_max_offset:
95; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0
96; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]]
97; GCN: ds_write2_b32 [[NEG]], {{v[0-9]+}}, {{v[0-9]+}} offset0:254 offset1:255
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000098define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset() #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000099 %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
Matt Arsenault966a94f2015-09-08 19:34:22 +0000100 %neg = sub i32 0, %x.i
101 %shl = shl i32 %neg, 2
102 %add = add i32 1019, %shl
103 %ptr = inttoptr i32 %add to i64 addrspace(3)*
104 store i64 123, i64 addrspace(3)* %ptr, align 4
105 ret void
106}
107
108; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1:
109; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0
110; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0x3fc, [[SCALED]]
111; GCN: ds_write2_b32 [[NEG]], {{v[0-9]+}}, {{v[0-9]+}} offset1:1{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000112define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1() #1 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000113 %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
Matt Arsenault966a94f2015-09-08 19:34:22 +0000114 %neg = sub i32 0, %x.i
115 %shl = shl i32 %neg, 2
116 %add = add i32 1020, %shl
117 %ptr = inttoptr i32 %add to i64 addrspace(3)*
118 store i64 123, i64 addrspace(3)* %ptr, align 4
119 ret void
120}
121
122attributes #0 = { nounwind readnone }
123attributes #1 = { nounwind }
Matt Arsenault2aed6ca2015-12-19 01:46:41 +0000124attributes #2 = { nounwind convergent }