blob: 260aac8d159d4dcc39de22c6c1916788c56d24d1 [file] [log] [blame]
Matt Arsenault1349a042018-05-22 06:32:10 +00001; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI %s
3; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s
Matt Arsenaulteb522e62017-02-27 22:15:25 +00004
5; GCN-LABEL: {{^}}s_shl_v2i16:
6; GFX9: s_load_dword [[LHS:s[0-9]+]]
7; GFX9: s_load_dword [[RHS:s[0-9]+]]
8; GFX9: v_mov_b32_e32 [[VLHS:v[0-9]+]], [[LHS]]
9; GFX9: v_pk_lshlrev_b16 [[RESULT:v[0-9]+]], [[RHS]], [[VLHS]]
10
Matt Arsenault1349a042018-05-22 06:32:10 +000011; VI: s_load_dword s
12; VI: s_load_dword s
13; VI: s_lshr_b32
14; VI: s_lshr_b32
15; VI: s_and_b32
16; VI: s_and_b32
17; SI: s_and_B32
18; SI: s_or_b32
Sam Kolton9fa16962017-04-06 15:03:28 +000019
Stanislav Mekhanoshind4ae4702017-09-19 20:54:38 +000020; CI-DAG: v_lshlrev_b32_e32
21; CI-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
22; CI-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
23; CI-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}}
Sam Kolton9fa16962017-04-06 15:03:28 +000024; CI: v_or_b32_e32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000025define amdgpu_kernel void @s_shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %lhs, <2 x i16> %rhs) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000026 %result = shl <2 x i16> %lhs, %rhs
27 store <2 x i16> %result, <2 x i16> addrspace(1)* %out
28 ret void
29}
30
31; GCN-LABEL: {{^}}v_shl_v2i16:
Matt Arsenault4e309b02017-07-29 01:03:53 +000032; GCN: {{buffer|flat|global}}_load_dword [[LHS:v[0-9]+]]
33; GCN: {{buffer|flat|global}}_load_dword [[RHS:v[0-9]+]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +000034; GFX9: v_pk_lshlrev_b16 [[RESULT:v[0-9]+]], [[RHS]], [[LHS]]
35
Matt Arsenaulteb522e62017-02-27 22:15:25 +000036; VI: v_lshlrev_b16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
Sam Kolton9fa16962017-04-06 15:03:28 +000037; VI: v_lshlrev_b16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
Matt Arsenaulteb522e62017-02-27 22:15:25 +000038; VI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
39
40; CI: s_mov_b32 [[MASK:s[0-9]+]], 0xffff{{$}}
41; CI: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, [[LHS]]
42; CI: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}}
43; CI: v_lshlrev_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
44; CI: v_lshl_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
45; CI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}}
46; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}}
47; CI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000048define amdgpu_kernel void @v_shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000049 %tid = call i32 @llvm.amdgcn.workitem.id.x()
50 %tid.ext = sext i32 %tid to i64
51 %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
52 %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
53 %b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in.gep, i32 1
54 %a = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
55 %b = load <2 x i16>, <2 x i16> addrspace(1)* %b_ptr
56 %result = shl <2 x i16> %a, %b
57 store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
58 ret void
59}
60
61; GCN-LABEL: {{^}}shl_v_s_v2i16:
62; GFX9: s_load_dword [[RHS:s[0-9]+]]
Matt Arsenault4e309b02017-07-29 01:03:53 +000063; GFX9: {{buffer|flat|global}}_load_dword [[LHS:v[0-9]+]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +000064; GFX9: v_pk_lshlrev_b16 [[RESULT:v[0-9]+]], [[RHS]], [[LHS]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000065define amdgpu_kernel void @shl_v_s_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000066 %tid = call i32 @llvm.amdgcn.workitem.id.x()
67 %tid.ext = sext i32 %tid to i64
68 %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
69 %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
70 %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
71 %result = shl <2 x i16> %vgpr, %sgpr
72 store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
73 ret void
74}
75
76; GCN-LABEL: {{^}}shl_s_v_v2i16:
77; GFX9: s_load_dword [[LHS:s[0-9]+]]
Matt Arsenault4e309b02017-07-29 01:03:53 +000078; GFX9: {{buffer|flat|global}}_load_dword [[RHS:v[0-9]+]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +000079; GFX9: v_pk_lshlrev_b16 [[RESULT:v[0-9]+]], [[RHS]], [[LHS]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000080define amdgpu_kernel void @shl_s_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000081 %tid = call i32 @llvm.amdgcn.workitem.id.x()
82 %tid.ext = sext i32 %tid to i64
83 %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
84 %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
85 %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
86 %result = shl <2 x i16> %sgpr, %vgpr
87 store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
88 ret void
89}
90
91; GCN-LABEL: {{^}}shl_imm_v_v2i16:
Matt Arsenault4e309b02017-07-29 01:03:53 +000092; GCN: {{buffer|flat|global}}_load_dword [[RHS:v[0-9]+]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +000093; GFX9: v_pk_lshlrev_b16 [[RESULT:v[0-9]+]], [[RHS]], 8
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000094define amdgpu_kernel void @shl_imm_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +000095 %tid = call i32 @llvm.amdgcn.workitem.id.x()
96 %tid.ext = sext i32 %tid to i64
97 %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
98 %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
99 %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
100 %result = shl <2 x i16> <i16 8, i16 8>, %vgpr
101 store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
102 ret void
103}
104
105; GCN-LABEL: {{^}}shl_v_imm_v2i16:
Matt Arsenault4e309b02017-07-29 01:03:53 +0000106; GCN: {{buffer|flat|global}}_load_dword [[LHS:v[0-9]+]]
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000107; GFX9: v_pk_lshlrev_b16 [[RESULT:v[0-9]+]], 8, [[LHS]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000108define amdgpu_kernel void @shl_v_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000109 %tid = call i32 @llvm.amdgcn.workitem.id.x()
110 %tid.ext = sext i32 %tid to i64
111 %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
112 %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
113 %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
114 %result = shl <2 x i16> %vgpr, <i16 8, i16 8>
115 store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
116 ret void
117}
118
119; GCN-LABEL: {{^}}v_shl_v4i16:
Matt Arsenault4e309b02017-07-29 01:03:53 +0000120; GCN: {{buffer|flat|global}}_load_dwordx2
121; GCN: {{buffer|flat|global}}_load_dwordx2
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000122; GFX9: v_pk_lshlrev_b16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
123; GFX9: v_pk_lshlrev_b16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
Matt Arsenault4e309b02017-07-29 01:03:53 +0000124; GCN: {{buffer|flat|global}}_store_dwordx2
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000125define amdgpu_kernel void @v_shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000126 %tid = call i32 @llvm.amdgcn.workitem.id.x()
127 %tid.ext = sext i32 %tid to i64
128 %in.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i64 %tid.ext
129 %out.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %out, i64 %tid.ext
130 %b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in.gep, i32 1
131 %a = load <4 x i16>, <4 x i16> addrspace(1)* %in.gep
132 %b = load <4 x i16>, <4 x i16> addrspace(1)* %b_ptr
133 %result = shl <4 x i16> %a, %b
134 store <4 x i16> %result, <4 x i16> addrspace(1)* %out.gep
135 ret void
136}
137
138; GCN-LABEL: {{^}}shl_v_imm_v4i16:
Matt Arsenault4e309b02017-07-29 01:03:53 +0000139; GCN: {{buffer|flat|global}}_load_dwordx2
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000140; GFX9: v_pk_lshlrev_b16 v{{[0-9]+}}, 8, v{{[0-9]+}}
141; GFX9: v_pk_lshlrev_b16 v{{[0-9]+}}, 8, v{{[0-9]+}}
Matt Arsenault4e309b02017-07-29 01:03:53 +0000142; GCN: {{buffer|flat|global}}_store_dwordx2
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000143define amdgpu_kernel void @shl_v_imm_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
Matt Arsenaulteb522e62017-02-27 22:15:25 +0000144 %tid = call i32 @llvm.amdgcn.workitem.id.x()
145 %tid.ext = sext i32 %tid to i64
146 %in.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i64 %tid.ext
147 %out.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %out, i64 %tid.ext
148 %vgpr = load <4 x i16>, <4 x i16> addrspace(1)* %in.gep
149 %result = shl <4 x i16> %vgpr, <i16 8, i16 8, i16 8, i16 8>
150 store <4 x i16> %result, <4 x i16> addrspace(1)* %out.gep
151 ret void
152}
153
154declare i32 @llvm.amdgcn.workitem.id.x() #1
155
156attributes #0 = { nounwind }
157attributes #1 = { nounwind readnone }