blob: c950e2d7cd37870ba4844e7444354adfc3c73e16 [file] [log] [blame]
Simon Pilgrim09bf2282019-01-07 12:20:35 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -amdgpu-codegenprepare-widen-constant-loads=0 -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
3; RUN: llc -amdgpu-codegenprepare-widen-constant-loads=0 -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
Matt Arsenault90083d32018-06-07 09:54:49 +00004
Matt Arsenault90083d32018-06-07 09:54:49 +00005define amdgpu_kernel void @widen_i16_constant_load(i16 addrspace(4)* %arg) {
Simon Pilgrim09bf2282019-01-07 12:20:35 +00006; SI-LABEL: widen_i16_constant_load:
7; SI: ; %bb.0:
8; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
9; SI-NEXT: s_mov_b32 s5, 0
10; SI-NEXT: s_mov_b32 s4, 0
11; SI-NEXT: s_mov_b32 s7, 0xf000
12; SI-NEXT: s_mov_b32 s6, -1
13; SI-NEXT: s_waitcnt lgkmcnt(0)
14; SI-NEXT: s_load_dword s0, s[0:1], 0x0
15; SI-NEXT: s_waitcnt lgkmcnt(0)
16; SI-NEXT: s_addk_i32 s0, 0x3e7
17; SI-NEXT: s_or_b32 s0, s0, 4
18; SI-NEXT: v_mov_b32_e32 v0, s0
19; SI-NEXT: buffer_store_short v0, off, s[4:7], 0
20; SI-NEXT: s_endpgm
21;
22; VI-LABEL: widen_i16_constant_load:
23; VI: ; %bb.0:
24; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
25; VI-NEXT: v_mov_b32_e32 v0, 0
26; VI-NEXT: v_mov_b32_e32 v1, 0
27; VI-NEXT: s_waitcnt lgkmcnt(0)
28; VI-NEXT: s_load_dword s0, s[0:1], 0x0
29; VI-NEXT: s_waitcnt lgkmcnt(0)
30; VI-NEXT: s_and_b32 s0, s0, 0xffff
31; VI-NEXT: s_addk_i32 s0, 0x3e7
32; VI-NEXT: s_or_b32 s0, s0, 4
33; VI-NEXT: v_mov_b32_e32 v2, s0
34; VI-NEXT: flat_store_short v[0:1], v2
35; VI-NEXT: s_endpgm
Matt Arsenault90083d32018-06-07 09:54:49 +000036 %load = load i16, i16 addrspace(4)* %arg, align 4
37 %add = add i16 %load, 999
38 %or = or i16 %add, 4
39 store i16 %or, i16 addrspace(1)* null
40 ret void
41}
42
Matt Arsenault90083d32018-06-07 09:54:49 +000043define amdgpu_kernel void @widen_i16_constant_load_zext_i32(i16 addrspace(4)* %arg) {
Simon Pilgrim09bf2282019-01-07 12:20:35 +000044; SI-LABEL: widen_i16_constant_load_zext_i32:
45; SI: ; %bb.0:
46; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
47; SI-NEXT: s_mov_b32 s5, 0
48; SI-NEXT: s_mov_b32 s4, 0
49; SI-NEXT: s_mov_b32 s7, 0xf000
50; SI-NEXT: s_mov_b32 s6, -1
51; SI-NEXT: s_waitcnt lgkmcnt(0)
52; SI-NEXT: s_load_dword s0, s[0:1], 0x0
53; SI-NEXT: s_waitcnt lgkmcnt(0)
54; SI-NEXT: s_and_b32 s0, s0, 0xffff
55; SI-NEXT: s_addk_i32 s0, 0x3e7
56; SI-NEXT: s_or_b32 s0, s0, 4
57; SI-NEXT: v_mov_b32_e32 v0, s0
58; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
59; SI-NEXT: s_endpgm
60;
61; VI-LABEL: widen_i16_constant_load_zext_i32:
62; VI: ; %bb.0:
63; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
64; VI-NEXT: v_mov_b32_e32 v0, 0
65; VI-NEXT: v_mov_b32_e32 v1, 0
66; VI-NEXT: s_waitcnt lgkmcnt(0)
67; VI-NEXT: s_load_dword s0, s[0:1], 0x0
68; VI-NEXT: s_waitcnt lgkmcnt(0)
69; VI-NEXT: s_and_b32 s0, s0, 0xffff
70; VI-NEXT: s_addk_i32 s0, 0x3e7
71; VI-NEXT: s_or_b32 s0, s0, 4
72; VI-NEXT: v_mov_b32_e32 v2, s0
73; VI-NEXT: flat_store_dword v[0:1], v2
74; VI-NEXT: s_endpgm
Matt Arsenault90083d32018-06-07 09:54:49 +000075 %load = load i16, i16 addrspace(4)* %arg, align 4
76 %ext = zext i16 %load to i32
77 %add = add i32 %ext, 999
78 %or = or i32 %add, 4
79 store i32 %or, i32 addrspace(1)* null
80 ret void
81}
82
Matt Arsenault90083d32018-06-07 09:54:49 +000083define amdgpu_kernel void @widen_i16_constant_load_sext_i32(i16 addrspace(4)* %arg) {
Simon Pilgrim09bf2282019-01-07 12:20:35 +000084; SI-LABEL: widen_i16_constant_load_sext_i32:
85; SI: ; %bb.0:
86; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
87; SI-NEXT: s_mov_b32 s5, 0
88; SI-NEXT: s_mov_b32 s4, 0
89; SI-NEXT: s_mov_b32 s7, 0xf000
90; SI-NEXT: s_mov_b32 s6, -1
91; SI-NEXT: s_waitcnt lgkmcnt(0)
92; SI-NEXT: s_load_dword s0, s[0:1], 0x0
93; SI-NEXT: s_waitcnt lgkmcnt(0)
94; SI-NEXT: s_sext_i32_i16 s0, s0
95; SI-NEXT: s_addk_i32 s0, 0x3e7
96; SI-NEXT: s_or_b32 s0, s0, 4
97; SI-NEXT: v_mov_b32_e32 v0, s0
98; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
99; SI-NEXT: s_endpgm
100;
101; VI-LABEL: widen_i16_constant_load_sext_i32:
102; VI: ; %bb.0:
103; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
104; VI-NEXT: v_mov_b32_e32 v0, 0
105; VI-NEXT: v_mov_b32_e32 v1, 0
106; VI-NEXT: s_waitcnt lgkmcnt(0)
107; VI-NEXT: s_load_dword s0, s[0:1], 0x0
108; VI-NEXT: s_waitcnt lgkmcnt(0)
109; VI-NEXT: s_sext_i32_i16 s0, s0
110; VI-NEXT: s_addk_i32 s0, 0x3e7
111; VI-NEXT: s_or_b32 s0, s0, 4
112; VI-NEXT: v_mov_b32_e32 v2, s0
113; VI-NEXT: flat_store_dword v[0:1], v2
114; VI-NEXT: s_endpgm
Matt Arsenault90083d32018-06-07 09:54:49 +0000115 %load = load i16, i16 addrspace(4)* %arg, align 4
116 %ext = sext i16 %load to i32
117 %add = add i32 %ext, 999
118 %or = or i32 %add, 4
119 store i32 %or, i32 addrspace(1)* null
120 ret void
121}
122
Matt Arsenault90083d32018-06-07 09:54:49 +0000123define amdgpu_kernel void @widen_i17_constant_load(i17 addrspace(4)* %arg) {
Simon Pilgrim09bf2282019-01-07 12:20:35 +0000124; SI-LABEL: widen_i17_constant_load:
125; SI: ; %bb.0:
126; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
127; SI-NEXT: s_mov_b32 s5, 0
128; SI-NEXT: s_mov_b32 s4, 0
129; SI-NEXT: s_mov_b32 s7, 0xf000
130; SI-NEXT: s_mov_b32 s6, -1
131; SI-NEXT: s_waitcnt lgkmcnt(0)
132; SI-NEXT: s_load_dword s0, s[0:1], 0x0
133; SI-NEXT: s_waitcnt lgkmcnt(0)
134; SI-NEXT: s_add_i32 s0, s0, 34
135; SI-NEXT: s_or_b32 s0, s0, 4
136; SI-NEXT: v_mov_b32_e32 v0, s0
137; SI-NEXT: s_bfe_u32 s0, s0, 0x10010
138; SI-NEXT: buffer_store_short v0, off, s[4:7], 0
139; SI-NEXT: s_mov_b32 s4, 2
140; SI-NEXT: s_waitcnt expcnt(0)
141; SI-NEXT: v_mov_b32_e32 v0, s0
142; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0
143; SI-NEXT: s_endpgm
144;
145; VI-LABEL: widen_i17_constant_load:
146; VI: ; %bb.0:
147; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
148; VI-NEXT: v_mov_b32_e32 v0, 0
149; VI-NEXT: v_mov_b32_e32 v2, 2
150; VI-NEXT: v_mov_b32_e32 v1, 0
151; VI-NEXT: v_mov_b32_e32 v3, 0
152; VI-NEXT: s_waitcnt lgkmcnt(0)
153; VI-NEXT: s_load_dword s0, s[0:1], 0x0
154; VI-NEXT: s_waitcnt lgkmcnt(0)
155; VI-NEXT: s_add_i32 s0, s0, 34
156; VI-NEXT: s_or_b32 s0, s0, 4
157; VI-NEXT: v_mov_b32_e32 v4, s0
158; VI-NEXT: s_bfe_u32 s0, s0, 0x10010
159; VI-NEXT: v_mov_b32_e32 v5, s0
160; VI-NEXT: flat_store_short v[0:1], v4
161; VI-NEXT: flat_store_byte v[2:3], v5
162; VI-NEXT: s_endpgm
Matt Arsenault90083d32018-06-07 09:54:49 +0000163 %load = load i17, i17 addrspace(4)* %arg, align 4
164 %add = add i17 %load, 34
165 %or = or i17 %add, 4
166 store i17 %or, i17 addrspace(1)* null
167 ret void
168}
169
Matt Arsenault90083d32018-06-07 09:54:49 +0000170define amdgpu_kernel void @widen_f16_constant_load(half addrspace(4)* %arg) {
Simon Pilgrim09bf2282019-01-07 12:20:35 +0000171; SI-LABEL: widen_f16_constant_load:
172; SI: ; %bb.0:
173; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
174; SI-NEXT: s_mov_b32 s3, 0xf000
175; SI-NEXT: s_mov_b32 s2, -1
176; SI-NEXT: s_waitcnt lgkmcnt(0)
177; SI-NEXT: s_load_dword s0, s[0:1], 0x0
178; SI-NEXT: s_mov_b32 s1, 0
179; SI-NEXT: s_waitcnt lgkmcnt(0)
180; SI-NEXT: v_cvt_f32_f16_e32 v0, s0
181; SI-NEXT: s_mov_b32 s0, 0
182; SI-NEXT: v_add_f32_e32 v0, 4.0, v0
183; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
184; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
185; SI-NEXT: s_endpgm
186;
187; VI-LABEL: widen_f16_constant_load:
188; VI: ; %bb.0:
189; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
190; VI-NEXT: v_mov_b32_e32 v0, 0
191; VI-NEXT: v_mov_b32_e32 v1, 0
192; VI-NEXT: s_waitcnt lgkmcnt(0)
193; VI-NEXT: s_load_dword s0, s[0:1], 0x0
194; VI-NEXT: s_waitcnt lgkmcnt(0)
195; VI-NEXT: v_add_f16_e64 v2, s0, 4.0
196; VI-NEXT: flat_store_short v[0:1], v2
197; VI-NEXT: s_endpgm
Matt Arsenault90083d32018-06-07 09:54:49 +0000198 %load = load half, half addrspace(4)* %arg, align 4
199 %add = fadd half %load, 4.0
200 store half %add, half addrspace(1)* null
201 ret void
202}
203
204; FIXME: valu usage on VI
Matt Arsenault90083d32018-06-07 09:54:49 +0000205define amdgpu_kernel void @widen_v2i8_constant_load(<2 x i8> addrspace(4)* %arg) {
Simon Pilgrim09bf2282019-01-07 12:20:35 +0000206; SI-LABEL: widen_v2i8_constant_load:
207; SI: ; %bb.0:
208; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
209; SI-NEXT: s_mov_b32 s5, 0
210; SI-NEXT: s_mov_b32 s4, 0
211; SI-NEXT: s_mov_b32 s7, 0xf000
212; SI-NEXT: s_mov_b32 s6, -1
213; SI-NEXT: s_waitcnt lgkmcnt(0)
214; SI-NEXT: s_load_dword s0, s[0:1], 0x0
215; SI-NEXT: s_waitcnt lgkmcnt(0)
216; SI-NEXT: s_and_b32 s1, s0, 0xff00
217; SI-NEXT: s_and_b32 s0, s0, 0xffff
218; SI-NEXT: s_add_i32 s0, s0, 12
219; SI-NEXT: s_or_b32 s0, s0, 4
220; SI-NEXT: s_addk_i32 s1, 0x2c00
221; SI-NEXT: s_and_b32 s0, s0, 0xff
222; SI-NEXT: s_or_b32 s0, s0, s1
223; SI-NEXT: s_or_b32 s0, s0, 0x300
224; SI-NEXT: v_mov_b32_e32 v0, s0
225; SI-NEXT: buffer_store_short v0, off, s[4:7], 0
226; SI-NEXT: s_endpgm
227;
228; VI-LABEL: widen_v2i8_constant_load:
229; VI: ; %bb.0:
230; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
231; VI-NEXT: v_mov_b32_e32 v0, 44
232; VI-NEXT: v_mov_b32_e32 v1, 3
233; VI-NEXT: s_waitcnt lgkmcnt(0)
234; VI-NEXT: s_load_dword s0, s[0:1], 0x0
235; VI-NEXT: s_waitcnt lgkmcnt(0)
236; VI-NEXT: s_and_b32 s1, s0, 0xffff
237; VI-NEXT: v_mov_b32_e32 v2, s0
238; VI-NEXT: s_add_i32 s1, s1, 12
239; VI-NEXT: v_add_u32_sdwa v0, vcc, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
240; VI-NEXT: s_or_b32 s0, s1, 4
241; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
242; VI-NEXT: s_and_b32 s0, s0, 0xff
243; VI-NEXT: v_or_b32_e32 v2, s0, v0
244; VI-NEXT: v_mov_b32_e32 v0, 0
245; VI-NEXT: v_mov_b32_e32 v1, 0
246; VI-NEXT: flat_store_short v[0:1], v2
247; VI-NEXT: s_endpgm
Matt Arsenault90083d32018-06-07 09:54:49 +0000248 %load = load <2 x i8>, <2 x i8> addrspace(4)* %arg, align 4
249 %add = add <2 x i8> %load, <i8 12, i8 44>
250 %or = or <2 x i8> %add, <i8 4, i8 3>
251 store <2 x i8> %or, <2 x i8> addrspace(1)* null
252 ret void
253}
254
Matt Arsenault90083d32018-06-07 09:54:49 +0000255define amdgpu_kernel void @no_widen_i16_constant_divergent_load(i16 addrspace(4)* %arg) {
Simon Pilgrim09bf2282019-01-07 12:20:35 +0000256; SI-LABEL: no_widen_i16_constant_divergent_load:
257; SI: ; %bb.0:
258; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
259; SI-NEXT: s_mov_b32 s2, 0
260; SI-NEXT: s_mov_b32 s3, 0xf000
261; SI-NEXT: v_lshlrev_b32_e32 v0, 1, v0
262; SI-NEXT: v_mov_b32_e32 v1, 0
263; SI-NEXT: s_waitcnt lgkmcnt(0)
264; SI-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64
265; SI-NEXT: s_mov_b32 s1, 0
266; SI-NEXT: s_mov_b32 s0, 0
267; SI-NEXT: s_mov_b32 s2, -1
268; SI-NEXT: s_waitcnt vmcnt(0)
269; SI-NEXT: v_add_i32_e32 v0, vcc, 0x3e7, v0
270; SI-NEXT: v_or_b32_e32 v0, 4, v0
271; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
272; SI-NEXT: s_endpgm
273;
274; VI-LABEL: no_widen_i16_constant_divergent_load:
275; VI: ; %bb.0:
276; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
277; VI-NEXT: v_lshlrev_b32_e32 v2, 1, v0
278; VI-NEXT: v_mov_b32_e32 v0, 0
279; VI-NEXT: v_mov_b32_e32 v1, 0
280; VI-NEXT: s_waitcnt lgkmcnt(0)
281; VI-NEXT: v_mov_b32_e32 v3, s1
282; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2
283; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
284; VI-NEXT: flat_load_ushort v2, v[2:3]
285; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
286; VI-NEXT: v_add_u16_e32 v2, 0x3e7, v2
287; VI-NEXT: v_or_b32_e32 v2, 4, v2
288; VI-NEXT: flat_store_short v[0:1], v2
289; VI-NEXT: s_endpgm
Matt Arsenault90083d32018-06-07 09:54:49 +0000290 %tid = call i32 @llvm.amdgcn.workitem.id.x()
291 %tid.ext = zext i32 %tid to i64
292 %gep.arg = getelementptr inbounds i16, i16 addrspace(4)* %arg, i64 %tid.ext
293 %load = load i16, i16 addrspace(4)* %gep.arg, align 4
294 %add = add i16 %load, 999
295 %or = or i16 %add, 4
296 store i16 %or, i16 addrspace(1)* null
297 ret void
298}
299
Matt Arsenault90083d32018-06-07 09:54:49 +0000300define amdgpu_kernel void @widen_i1_constant_load(i1 addrspace(4)* %arg) {
Simon Pilgrim09bf2282019-01-07 12:20:35 +0000301; SI-LABEL: widen_i1_constant_load:
302; SI: ; %bb.0:
303; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
304; SI-NEXT: s_mov_b32 s5, 0
305; SI-NEXT: s_mov_b32 s4, 0
306; SI-NEXT: s_mov_b32 s7, 0xf000
307; SI-NEXT: s_mov_b32 s6, -1
308; SI-NEXT: s_waitcnt lgkmcnt(0)
309; SI-NEXT: s_load_dword s0, s[0:1], 0x0
310; SI-NEXT: s_waitcnt lgkmcnt(0)
311; SI-NEXT: s_and_b32 s0, s0, 1
312; SI-NEXT: v_mov_b32_e32 v0, s0
313; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0
314; SI-NEXT: s_endpgm
315;
316; VI-LABEL: widen_i1_constant_load:
317; VI: ; %bb.0:
318; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
319; VI-NEXT: v_mov_b32_e32 v0, 0
320; VI-NEXT: v_mov_b32_e32 v1, 0
321; VI-NEXT: s_waitcnt lgkmcnt(0)
322; VI-NEXT: s_load_dword s0, s[0:1], 0x0
323; VI-NEXT: s_waitcnt lgkmcnt(0)
324; VI-NEXT: s_and_b32 s0, s0, 1
325; VI-NEXT: v_mov_b32_e32 v2, s0
326; VI-NEXT: flat_store_byte v[0:1], v2
327; VI-NEXT: s_endpgm
Matt Arsenault90083d32018-06-07 09:54:49 +0000328 %load = load i1, i1 addrspace(4)* %arg, align 4
329 %and = and i1 %load, true
330 store i1 %and, i1 addrspace(1)* null
331 ret void
332}
333
Matt Arsenault90083d32018-06-07 09:54:49 +0000334define amdgpu_kernel void @widen_i16_zextload_i64_constant_load(i16 addrspace(4)* %arg) {
Simon Pilgrim09bf2282019-01-07 12:20:35 +0000335; SI-LABEL: widen_i16_zextload_i64_constant_load:
336; SI: ; %bb.0:
337; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
338; SI-NEXT: s_mov_b32 s5, 0
339; SI-NEXT: s_mov_b32 s4, 0
340; SI-NEXT: s_mov_b32 s7, 0xf000
341; SI-NEXT: s_mov_b32 s6, -1
342; SI-NEXT: s_waitcnt lgkmcnt(0)
343; SI-NEXT: s_load_dword s0, s[0:1], 0x0
344; SI-NEXT: s_waitcnt lgkmcnt(0)
345; SI-NEXT: s_and_b32 s0, s0, 0xffff
346; SI-NEXT: s_addk_i32 s0, 0x3e7
347; SI-NEXT: s_or_b32 s0, s0, 4
348; SI-NEXT: v_mov_b32_e32 v0, s0
349; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
350; SI-NEXT: s_endpgm
351;
352; VI-LABEL: widen_i16_zextload_i64_constant_load:
353; VI: ; %bb.0:
354; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
355; VI-NEXT: v_mov_b32_e32 v0, 0
356; VI-NEXT: v_mov_b32_e32 v1, 0
357; VI-NEXT: s_waitcnt lgkmcnt(0)
358; VI-NEXT: s_load_dword s0, s[0:1], 0x0
359; VI-NEXT: s_waitcnt lgkmcnt(0)
360; VI-NEXT: s_and_b32 s0, s0, 0xffff
361; VI-NEXT: s_addk_i32 s0, 0x3e7
362; VI-NEXT: s_or_b32 s0, s0, 4
363; VI-NEXT: v_mov_b32_e32 v2, s0
364; VI-NEXT: flat_store_dword v[0:1], v2
365; VI-NEXT: s_endpgm
Matt Arsenault90083d32018-06-07 09:54:49 +0000366 %load = load i16, i16 addrspace(4)* %arg, align 4
367 %zext = zext i16 %load to i32
368 %add = add i32 %zext, 999
369 %or = or i32 %add, 4
370 store i32 %or, i32 addrspace(1)* null
371 ret void
372}
373
Matt Arsenault90083d32018-06-07 09:54:49 +0000374define amdgpu_kernel void @widen_i1_zext_to_i64_constant_load(i1 addrspace(4)* %arg) {
Simon Pilgrim09bf2282019-01-07 12:20:35 +0000375; SI-LABEL: widen_i1_zext_to_i64_constant_load:
376; SI: ; %bb.0:
377; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
378; SI-NEXT: s_mov_b32 s5, 0
379; SI-NEXT: s_mov_b32 s4, 0
380; SI-NEXT: s_mov_b32 s7, 0xf000
381; SI-NEXT: s_mov_b32 s6, -1
382; SI-NEXT: s_waitcnt lgkmcnt(0)
383; SI-NEXT: s_load_dword s0, s[0:1], 0x0
384; SI-NEXT: s_waitcnt lgkmcnt(0)
385; SI-NEXT: s_and_b32 s0, s0, 1
386; SI-NEXT: s_add_u32 s0, s0, 0x3e7
387; SI-NEXT: s_addc_u32 s1, 0, 0
388; SI-NEXT: v_mov_b32_e32 v0, s0
389; SI-NEXT: v_mov_b32_e32 v1, s1
390; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
391; SI-NEXT: s_endpgm
392;
393; VI-LABEL: widen_i1_zext_to_i64_constant_load:
394; VI: ; %bb.0:
395; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
396; VI-NEXT: v_mov_b32_e32 v0, 0
397; VI-NEXT: v_mov_b32_e32 v1, 0
398; VI-NEXT: s_waitcnt lgkmcnt(0)
399; VI-NEXT: s_load_dword s0, s[0:1], 0x0
400; VI-NEXT: s_waitcnt lgkmcnt(0)
401; VI-NEXT: s_and_b32 s0, s0, 1
402; VI-NEXT: s_add_u32 s0, s0, 0x3e7
403; VI-NEXT: s_addc_u32 s1, 0, 0
404; VI-NEXT: v_mov_b32_e32 v3, s1
405; VI-NEXT: v_mov_b32_e32 v2, s0
406; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
407; VI-NEXT: s_endpgm
Matt Arsenault90083d32018-06-07 09:54:49 +0000408 %load = load i1, i1 addrspace(4)* %arg, align 4
409 %zext = zext i1 %load to i64
410 %add = add i64 %zext, 999
411 store i64 %add, i64 addrspace(1)* null
412 ret void
413}
414
Matt Arsenault90083d32018-06-07 09:54:49 +0000415define amdgpu_kernel void @widen_i16_constant32_load(i16 addrspace(6)* %arg) {
Simon Pilgrim09bf2282019-01-07 12:20:35 +0000416; SI-LABEL: widen_i16_constant32_load:
417; SI: ; %bb.0:
418; SI-NEXT: s_load_dword s0, s[0:1], 0x9
419; SI-NEXT: s_mov_b32 s1, 0
420; SI-NEXT: s_mov_b32 s5, 0
421; SI-NEXT: s_mov_b32 s4, 0
422; SI-NEXT: s_mov_b32 s7, 0xf000
423; SI-NEXT: s_waitcnt lgkmcnt(0)
424; SI-NEXT: s_load_dword s0, s[0:1], 0x0
425; SI-NEXT: s_mov_b32 s6, -1
426; SI-NEXT: s_waitcnt lgkmcnt(0)
427; SI-NEXT: s_addk_i32 s0, 0x3e7
428; SI-NEXT: s_or_b32 s0, s0, 4
429; SI-NEXT: v_mov_b32_e32 v0, s0
430; SI-NEXT: buffer_store_short v0, off, s[4:7], 0
431; SI-NEXT: s_endpgm
432;
433; VI-LABEL: widen_i16_constant32_load:
434; VI: ; %bb.0:
435; VI-NEXT: s_load_dword s0, s[0:1], 0x24
436; VI-NEXT: s_mov_b32 s1, 0
437; VI-NEXT: v_mov_b32_e32 v0, 0
438; VI-NEXT: v_mov_b32_e32 v1, 0
439; VI-NEXT: s_waitcnt lgkmcnt(0)
440; VI-NEXT: s_load_dword s0, s[0:1], 0x0
441; VI-NEXT: s_waitcnt lgkmcnt(0)
442; VI-NEXT: s_and_b32 s0, s0, 0xffff
443; VI-NEXT: s_addk_i32 s0, 0x3e7
444; VI-NEXT: s_or_b32 s0, s0, 4
445; VI-NEXT: v_mov_b32_e32 v2, s0
446; VI-NEXT: flat_store_short v[0:1], v2
447; VI-NEXT: s_endpgm
Matt Arsenault90083d32018-06-07 09:54:49 +0000448 %load = load i16, i16 addrspace(6)* %arg, align 4
449 %add = add i16 %load, 999
450 %or = or i16 %add, 4
451 store i16 %or, i16 addrspace(1)* null
452 ret void
453}
454
Matt Arsenault90083d32018-06-07 09:54:49 +0000455define amdgpu_kernel void @widen_i16_global_invariant_load(i16 addrspace(1)* %arg) {
Simon Pilgrim09bf2282019-01-07 12:20:35 +0000456; SI-LABEL: widen_i16_global_invariant_load:
457; SI: ; %bb.0:
458; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
459; SI-NEXT: s_mov_b32 s5, 0
460; SI-NEXT: s_mov_b32 s4, 0
461; SI-NEXT: s_mov_b32 s7, 0xf000
462; SI-NEXT: s_mov_b32 s6, -1
463; SI-NEXT: s_waitcnt lgkmcnt(0)
464; SI-NEXT: s_load_dword s0, s[0:1], 0x0
465; SI-NEXT: s_waitcnt lgkmcnt(0)
466; SI-NEXT: s_addk_i32 s0, 0x3e7
467; SI-NEXT: s_or_b32 s0, s0, 1
468; SI-NEXT: v_mov_b32_e32 v0, s0
469; SI-NEXT: buffer_store_short v0, off, s[4:7], 0
470; SI-NEXT: s_endpgm
471;
472; VI-LABEL: widen_i16_global_invariant_load:
473; VI: ; %bb.0:
474; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
475; VI-NEXT: v_mov_b32_e32 v0, 0
476; VI-NEXT: v_mov_b32_e32 v1, 0
477; VI-NEXT: s_waitcnt lgkmcnt(0)
478; VI-NEXT: s_load_dword s0, s[0:1], 0x0
479; VI-NEXT: s_waitcnt lgkmcnt(0)
480; VI-NEXT: s_and_b32 s0, s0, 0xffff
481; VI-NEXT: s_addk_i32 s0, 0x3e7
482; VI-NEXT: s_or_b32 s0, s0, 1
483; VI-NEXT: v_mov_b32_e32 v2, s0
484; VI-NEXT: flat_store_short v[0:1], v2
485; VI-NEXT: s_endpgm
Matt Arsenault90083d32018-06-07 09:54:49 +0000486 %load = load i16, i16 addrspace(1)* %arg, align 4, !invariant.load !0
487 %add = add i16 %load, 999
488 %or = or i16 %add, 1
489 store i16 %or, i16 addrspace(1)* null
490 ret void
491}
492
493declare i32 @llvm.amdgcn.workitem.id.x()
494
495!0 = !{}