blob: 5b3ad15fb1a99426d4df4cacb9cab251d1ca301a [file] [log] [blame]
Stanislav Mekhanoshine3eb42c2017-06-21 22:05:06 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
Farhana Aleen07e61232018-05-02 18:16:39 +00002; RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
Stanislav Mekhanoshine3eb42c2017-06-21 22:05:06 +00003
4; GCN-LABEL: {{^}}add1:
5; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
6; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, 0, v{{[0-9]+}}, [[CC]]
7; GCN-NOT: v_cndmask
8
Farhana Aleen07e61232018-05-02 18:16:39 +00009; GFX9-LABEL: {{^}}add1:
10; GFX9: v_addc_co_u32_e{{32|64}} v{{[0-9]+}}, vcc
Stanislav Mekhanoshine3eb42c2017-06-21 22:05:06 +000011define amdgpu_kernel void @add1(i32 addrspace(1)* nocapture %arg) {
12bb:
13 %x = tail call i32 @llvm.amdgcn.workitem.id.x()
14 %y = tail call i32 @llvm.amdgcn.workitem.id.y()
15 %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
16 %v = load i32, i32 addrspace(1)* %gep, align 4
17 %cmp = icmp ugt i32 %x, %y
18 %ext = zext i1 %cmp to i32
19 %add = add i32 %v, %ext
20 store i32 %add, i32 addrspace(1)* %gep, align 4
21 ret void
22}
23
Farhana Aleen07e61232018-05-02 18:16:39 +000024; GCN-LABEL: {{^}}add1_i16:
25; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
26; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, 0, v{{[0-9]+}}, [[CC]]
27; GCN-NOT: v_cndmask
28
29; GFX9-LABEL: {{^}}add1_i16:
30; GFX9: v_addc_co_u32_e{{32|64}} v{{[0-9]+}}, vcc
31define i16 @add1_i16(i32 addrspace(1)* nocapture %arg, i16 addrspace(1)* nocapture %dst) {
32bb:
33 %x = tail call i32 @llvm.amdgcn.workitem.id.x()
34 %y = tail call i32 @llvm.amdgcn.workitem.id.y()
35 %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
36 %v = load i32, i32 addrspace(1)* %gep, align 4
37 %cmp = icmp ugt i32 %x, %y
38 %ext = zext i1 %cmp to i32
39 %add = add i32 %v, %ext
40 %trunc = trunc i32 %add to i16
41 ret i16 %trunc
42}
43
Stanislav Mekhanoshine3eb42c2017-06-21 22:05:06 +000044; GCN-LABEL: {{^}}sub1:
Stanislav Mekhanoshinfa48c492018-02-24 01:32:32 +000045; GCN: v_cmp_gt_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
46; GCN: v_subbrev_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
Stanislav Mekhanoshine3eb42c2017-06-21 22:05:06 +000047; GCN-NOT: v_cndmask
48
Farhana Aleen07e61232018-05-02 18:16:39 +000049; GFX9-LABEL: {{^}}sub1:
50; GFX9: v_subbrev_co_u32_e{{32|64}} v{{[0-9]+}}, vcc
Stanislav Mekhanoshine3eb42c2017-06-21 22:05:06 +000051define amdgpu_kernel void @sub1(i32 addrspace(1)* nocapture %arg) {
52bb:
53 %x = tail call i32 @llvm.amdgcn.workitem.id.x()
54 %y = tail call i32 @llvm.amdgcn.workitem.id.y()
55 %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
56 %v = load i32, i32 addrspace(1)* %gep, align 4
57 %cmp = icmp ugt i32 %x, %y
58 %ext = sext i1 %cmp to i32
59 %add = add i32 %v, %ext
60 store i32 %add, i32 addrspace(1)* %gep, align 4
61 ret void
62}
63
Stanislav Mekhanoshina8b26932017-06-21 22:30:01 +000064; GCN-LABEL: {{^}}add_adde:
65; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
66; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]
67; GCN-NOT: v_cndmask
68; GCN-NOT: v_add
69
Farhana Aleen07e61232018-05-02 18:16:39 +000070; GFX9-LABEL: {{^}}add_adde:
71; GFX9: v_addc_co_u32_e{{32|64}} v{{[0-9]+}}, vcc
Stanislav Mekhanoshina8b26932017-06-21 22:30:01 +000072define amdgpu_kernel void @add_adde(i32 addrspace(1)* nocapture %arg, i32 %a) {
73bb:
74 %x = tail call i32 @llvm.amdgcn.workitem.id.x()
75 %y = tail call i32 @llvm.amdgcn.workitem.id.y()
76 %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
77 %v = load i32, i32 addrspace(1)* %gep, align 4
78 %cmp = icmp ugt i32 %x, %y
79 %ext = zext i1 %cmp to i32
80 %adde = add i32 %v, %ext
81 %add2 = add i32 %adde, %a
82 store i32 %add2, i32 addrspace(1)* %gep, align 4
83 ret void
84}
85
86; GCN-LABEL: {{^}}adde_add:
87; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
88; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]
89; GCN-NOT: v_cndmask
90; GCN-NOT: v_add
91
Farhana Aleen07e61232018-05-02 18:16:39 +000092; GFX9-LABEL: {{^}}adde_add:
93; GFX9: v_addc_co_u32_e{{32|64}} v{{[0-9]+}}, vcc
Stanislav Mekhanoshina8b26932017-06-21 22:30:01 +000094define amdgpu_kernel void @adde_add(i32 addrspace(1)* nocapture %arg, i32 %a) {
95bb:
96 %x = tail call i32 @llvm.amdgcn.workitem.id.x()
97 %y = tail call i32 @llvm.amdgcn.workitem.id.y()
98 %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
99 %v = load i32, i32 addrspace(1)* %gep, align 4
100 %cmp = icmp ugt i32 %x, %y
101 %ext = zext i1 %cmp to i32
102 %add = add i32 %v, %a
103 %adde = add i32 %add, %ext
104 store i32 %adde, i32 addrspace(1)* %gep, align 4
105 ret void
106}
107
108; GCN-LABEL: {{^}}sub_sube:
109; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
110; GCN: v_subb_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]
111; GCN-NOT: v_cndmask
112; GCN-NOT: v_sub
113
Farhana Aleen07e61232018-05-02 18:16:39 +0000114; GFX9-LABEL: {{^}}sub_sube:
115; GFX9: v_subb_co_u32_e{{32|64}} v{{[0-9]+}}, vcc
Stanislav Mekhanoshina8b26932017-06-21 22:30:01 +0000116define amdgpu_kernel void @sub_sube(i32 addrspace(1)* nocapture %arg, i32 %a) {
117bb:
118 %x = tail call i32 @llvm.amdgcn.workitem.id.x()
119 %y = tail call i32 @llvm.amdgcn.workitem.id.y()
120 %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
121 %v = load i32, i32 addrspace(1)* %gep, align 4
122 %cmp = icmp ugt i32 %x, %y
123 %ext = sext i1 %cmp to i32
124 %adde = add i32 %v, %ext
125 %sub = sub i32 %adde, %a
126 store i32 %sub, i32 addrspace(1)* %gep, align 4
127 ret void
128}
129
130; GCN-LABEL: {{^}}sube_sub:
131; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
132; GCN: v_subb_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]
133; GCN-NOT: v_cndmask
134; GCN-NOT: v_sub
135
Farhana Aleen07e61232018-05-02 18:16:39 +0000136; GFX9-LABEL: {{^}}sube_sub:
137; GFX9: v_subb_co_u32_e{{32|64}} v{{[0-9]+}}, vcc
Stanislav Mekhanoshina8b26932017-06-21 22:30:01 +0000138define amdgpu_kernel void @sube_sub(i32 addrspace(1)* nocapture %arg, i32 %a) {
139bb:
140 %x = tail call i32 @llvm.amdgcn.workitem.id.x()
141 %y = tail call i32 @llvm.amdgcn.workitem.id.y()
142 %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
143 %v = load i32, i32 addrspace(1)* %gep, align 4
144 %cmp = icmp ugt i32 %x, %y
145 %ext = sext i1 %cmp to i32
146 %sub = sub i32 %v, %a
147 %adde = add i32 %sub, %ext
148 store i32 %adde, i32 addrspace(1)* %gep, align 4
149 ret void
150}
151
Stanislav Mekhanoshin3ed38c62017-06-21 23:46:22 +0000152; GCN-LABEL: {{^}}zext_flclass:
153; GCN: v_cmp_class_f32_e{{32|64}} [[CC:[^,]+]],
154; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, 0, v{{[0-9]+}}, [[CC]]
155; GCN-NOT: v_cndmask
156
Farhana Aleen07e61232018-05-02 18:16:39 +0000157; GFX9-LABEL: {{^}}zext_flclass:
158; GFX9: v_addc_co_u32_e{{32|64}} v{{[0-9]+}}, vcc
Stanislav Mekhanoshin3ed38c62017-06-21 23:46:22 +0000159define amdgpu_kernel void @zext_flclass(i32 addrspace(1)* nocapture %arg, float %x) {
160bb:
161 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
162 %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %id
163 %v = load i32, i32 addrspace(1)* %gep, align 4
164 %cmp = tail call zeroext i1 @llvm.amdgcn.class.f32(float %x, i32 608)
165 %ext = zext i1 %cmp to i32
166 %add = add i32 %v, %ext
167 store i32 %add, i32 addrspace(1)* %gep, align 4
168 ret void
169}
170
171; GCN-LABEL: {{^}}sext_flclass:
Stanislav Mekhanoshinfa48c492018-02-24 01:32:32 +0000172; GCN: v_cmp_class_f32_e32 vcc,
173; GCN: v_subbrev_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
Stanislav Mekhanoshin3ed38c62017-06-21 23:46:22 +0000174; GCN-NOT: v_cndmask
175
Farhana Aleen07e61232018-05-02 18:16:39 +0000176; GFX9-LABEL: {{^}}sext_flclass:
177; GFX9: v_subbrev_co_u32_e32 v{{[0-9]+}}, vcc
Stanislav Mekhanoshin3ed38c62017-06-21 23:46:22 +0000178define amdgpu_kernel void @sext_flclass(i32 addrspace(1)* nocapture %arg, float %x) {
179bb:
180 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
181 %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %id
182 %v = load i32, i32 addrspace(1)* %gep, align 4
183 %cmp = tail call zeroext i1 @llvm.amdgcn.class.f32(float %x, i32 608)
184 %ext = sext i1 %cmp to i32
185 %add = add i32 %v, %ext
186 store i32 %add, i32 addrspace(1)* %gep, align 4
187 ret void
188}
189
Stanislav Mekhanoshin6851ddf2017-06-27 18:25:26 +0000190; GCN-LABEL: {{^}}add_and:
191; GCN: s_and_b64 [[CC:[^,]+]],
192; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, 0, v{{[0-9]+}}, [[CC]]
193; GCN-NOT: v_cndmask
194
Farhana Aleen07e61232018-05-02 18:16:39 +0000195; GFX9-LABEL: {{^}}add_and:
196; GFX9: v_addc_co_u32_e{{32|64}} v{{[0-9]+}}, vcc
Stanislav Mekhanoshin6851ddf2017-06-27 18:25:26 +0000197define amdgpu_kernel void @add_and(i32 addrspace(1)* nocapture %arg) {
198bb:
199 %x = tail call i32 @llvm.amdgcn.workitem.id.x()
200 %y = tail call i32 @llvm.amdgcn.workitem.id.y()
201 %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
202 %v = load i32, i32 addrspace(1)* %gep, align 4
203 %cmp1 = icmp ugt i32 %x, %y
204 %cmp2 = icmp ugt i32 %x, 1
205 %cmp = and i1 %cmp1, %cmp2
206 %ext = zext i1 %cmp to i32
207 %add = add i32 %v, %ext
208 store i32 %add, i32 addrspace(1)* %gep, align 4
209 ret void
210}
211
Stanislav Mekhanoshin3ed38c62017-06-21 23:46:22 +0000212declare i1 @llvm.amdgcn.class.f32(float, i32) #0
213
Stanislav Mekhanoshine3eb42c2017-06-21 22:05:06 +0000214declare i32 @llvm.amdgcn.workitem.id.x() #0
215
216declare i32 @llvm.amdgcn.workitem.id.y() #0
217
218attributes #0 = { nounwind readnone speculatable }