blob: 70964348e31159a840e9a62d207c9641d8195d4b [file] [log] [blame]
Matt Arsenault8728c5f2017-08-07 14:58:04 +00001; RUN: llc -O0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s
Matt Arsenaultf403df32016-08-26 06:31:32 +00002
3; FIXME: Merge into indirect-addressing-si.ll
4
5; Make sure that TwoAddressInstructions keeps src0 as subregister sub0
6; of the tied implicit use and def of the super register.
7
8; CHECK-LABEL: {{^}}insert_wo_offset:
9; CHECK: s_load_dword [[IN:s[0-9]+]]
10; CHECK: s_mov_b32 m0, [[IN]]
11; CHECK: v_movreld_b32_e32 v[[ELT0:[0-9]+]]
Stanislav Mekhanoshin054f8102018-11-19 17:39:20 +000012; CHECK: buffer_store_dwordx4
13; CHECK: buffer_store_dwordx4
14; CHECK: buffer_store_dwordx4
15; CHECK: buffer_store_dwordx4
16define amdgpu_kernel void @insert_wo_offset(<16 x float> addrspace(1)* %out, i32 %in) {
Matt Arsenaultf403df32016-08-26 06:31:32 +000017entry:
Stanislav Mekhanoshin054f8102018-11-19 17:39:20 +000018 %ins = insertelement <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, float 17.0, i32 %in
19 store <16 x float> %ins, <16 x float> addrspace(1)* %out
Matt Arsenaultf403df32016-08-26 06:31:32 +000020 ret void
21}
22
Zvi Rackoverffaed7202017-12-06 17:40:09 +000023; Make sure we don't hit use of undefined register errors when expanding an
24; extract with undef index.
25
26; CHECK-LABEL: {{^}}extract_adjacent_blocks:
27; CHECK: s_load_dword [[ARG:s[0-9]+]]
28; CHECK: s_cmp_lg_u32
29; CHECK: s_cbranch_scc1 [[BB4:BB[0-9]+_[0-9]+]]
30
31; CHECK: buffer_load_dwordx4
Zvi Rackoverffaed7202017-12-06 17:40:09 +000032
33; CHECK: s_branch [[ENDBB:BB[0-9]+_[0-9]+]]
34
35; CHECK: [[BB4]]:
36; CHECK: buffer_load_dwordx4
Zvi Rackoverffaed7202017-12-06 17:40:09 +000037
38; CHECK: [[ENDBB]]:
39; CHECK: buffer_store_dword
40; CHECK: s_endpgm
41
42define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) #0 {
43bb:
44 %tmp = icmp eq i32 %arg, 0
45 br i1 %tmp, label %bb1, label %bb4
46
47bb1:
48 %tmp2 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
49 %tmp3 = extractelement <4 x float> %tmp2, i32 undef
50 call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp2) #0 ; Prevent block optimize out
51 br label %bb7
52
53bb4:
54 %tmp5 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
55 %tmp6 = extractelement <4 x float> %tmp5, i32 undef
56 call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp5) #0 ; Prevent block optimize out
57 br label %bb7
58
59bb7:
60 %tmp8 = phi float [ %tmp3, %bb1 ], [ %tmp6, %bb4 ]
61 store volatile float %tmp8, float addrspace(1)* undef
62 ret void
63}