blob: cc0ae3ab4303e56589790cdd41393e43486274da [file] [log] [blame]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +00001; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-sdwa-peephole=0 < %s | FileCheck --check-prefix=GCN --check-prefix=VI %s
2; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck --check-prefix=GCN --check-prefix=VI-SDWA %s
Stanislav Mekhanoshin53a21292017-05-23 19:54:48 +00003; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck --check-prefix=GCN --check-prefix=CI %s
4
5; GCN-LABEL: {{^}}bfe_combine8:
6; VI: v_bfe_u32 v[[BFE:[0-9]+]], v{{[0-9]+}}, 8, 8
7; VI: v_lshlrev_b32_e32 v[[ADDRBASE:[0-9]+]], 2, v[[BFE]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +00008; VI-SDWA: v_mov_b32_e32 v[[SHIFT:[0-9]+]], 2
9; VI-SDWA: v_lshlrev_b32_sdwa v[[ADDRBASE:[0-9]+]], v[[SHIFT]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
Stanislav Mekhanoshin53a21292017-05-23 19:54:48 +000010; CI: v_lshrrev_b32_e32 v[[SHR:[0-9]+]], 6, v{{[0-9]+}}
11; CI: v_and_b32_e32 v[[ADDRLO:[0-9]+]], 0x3fc, v[[SHR]]
Dmitry Preobrazhenskya0342dc2017-11-20 18:24:21 +000012; VI: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]]
13; VI-SDWA: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]]
Stanislav Mekhanoshin53a21292017-05-23 19:54:48 +000014; GCN: load_dword v{{[0-9]+}}, v{{\[}}[[ADDRLO]]:
15define amdgpu_kernel void @bfe_combine8(i32 addrspace(1)* nocapture %arg, i32 %x) {
16 %id = tail call i32 @llvm.amdgcn.workitem.id.x() #2
17 %idx = add i32 %x, %id
18 %srl = lshr i32 %idx, 8
19 %and = and i32 %srl, 255
20 %ptr = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %and
21 %val = load i32, i32 addrspace(1)* %ptr, align 4
22 store i32 %val, i32 addrspace(1)* %arg, align 4
23 ret void
24}
25
26; GCN-LABEL: {{^}}bfe_combine16:
27; VI: v_bfe_u32 v[[BFE:[0-9]+]], v{{[0-9]+}}, 16, 16
28; VI: v_lshlrev_b32_e32 v[[ADDRBASE:[0-9]+]], {{[^,]+}}, v[[BFE]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +000029; VI-SDWA: v_mov_b32_e32 v[[SHIFT:[0-9]+]], 15
30; VI-SDWA: v_lshlrev_b32_sdwa v[[ADDRBASE1:[0-9]+]], v[[SHIFT]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
31; VI-SDWA: v_lshlrev_b64 v{{\[}}[[ADDRBASE:[0-9]+]]:{{[^\]+}}], 2, v{{\[}}[[ADDRBASE1]]:{{[^\]+}}]
Dmitry Preobrazhenskya0342dc2017-11-20 18:24:21 +000032; VI-SDWA: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]]
Stanislav Mekhanoshin53a21292017-05-23 19:54:48 +000033; CI: v_lshrrev_b32_e32 v[[SHR:[0-9]+]], 1, v{{[0-9]+}}
34; CI: v_and_b32_e32 v[[AND:[0-9]+]], 0x7fff8000, v[[SHR]]
35; CI: v_lshl_b64 v{{\[}}[[ADDRLO:[0-9]+]]:{{[^\]+}}], v{{\[}}[[AND]]:{{[^\]+}}], 2
Dmitry Preobrazhenskya0342dc2017-11-20 18:24:21 +000036; VI: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]]
Stanislav Mekhanoshin53a21292017-05-23 19:54:48 +000037; GCN: load_dword v{{[0-9]+}}, v{{\[}}[[ADDRLO]]:
38define amdgpu_kernel void @bfe_combine16(i32 addrspace(1)* nocapture %arg, i32 %x) {
39 %id = tail call i32 @llvm.amdgcn.workitem.id.x() #2
40 %idx = add i32 %x, %id
41 %srl = lshr i32 %idx, 1
42 %and = and i32 %srl, 2147450880
43 %ptr = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %and
44 %val = load i32, i32 addrspace(1)* %ptr, align 4
45 store i32 %val, i32 addrspace(1)* %arg, align 4
46 ret void
47}
48
49declare i32 @llvm.amdgcn.workitem.id.x() #1