blob: 65307ca6fa94fed1c985bcf5c4585cc7cb0ab514 [file] [log] [blame]
Matt Arsenaultb143d9a2018-05-09 20:52:43 +00001; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
2
3; Test combine to reduce the width of a 64-bit shift to 32-bit if
4; truncated to 16-bit.
5
6; GCN-LABEL: {{^}}trunc_srl_i64_16_to_i16:
7; GCN: s_waitcnt
8; GCN-NEXT: v_lshrrev_b32_e32 v0, 16, v0
9; GCN-NEXT: s_setpc_b64
10define i16 @trunc_srl_i64_16_to_i16(i64 %x) {
11 %shift = lshr i64 %x, 16
12 %trunc = trunc i64 %shift to i16
13 ret i16 %trunc
14}
15
16; GCN-LABEL: {{^}}trunc_srl_i64_17_to_i16:
17; GCN: s_waitcnt
18; GCN-NEXT: v_lshrrev_b64 v[0:1], 17, v[0:1]
19; GCN-NEXT: s_setpc_b64
20define i16 @trunc_srl_i64_17_to_i16(i64 %x) {
21 %shift = lshr i64 %x, 17
22 %trunc = trunc i64 %shift to i16
23 ret i16 %trunc
24}
25
26; GCN-LABEL: {{^}}trunc_srl_i55_16_to_i15:
27; GCN: s_waitcnt
28; GCN-NEXT: v_lshrrev_b32_e32 v0, 15, v0
29; GCN-NEXT: v_add_u16_e32 v0, 4, v0
30; GCN-NEXT: s_setpc_b64
31define i15 @trunc_srl_i55_16_to_i15(i55 %x) {
32 %shift = lshr i55 %x, 15
33 %trunc = trunc i55 %shift to i15
34 %add = add i15 %trunc, 4
35 ret i15 %add
36}
37
38; GCN-LABEL: {{^}}trunc_sra_i64_16_to_i16:
39; GCN: s_waitcnt
40; GCN-NEXT: v_lshrrev_b32_e32 v0, 16, v0
41; GCN-NEXT: s_setpc_b64
42define i16 @trunc_sra_i64_16_to_i16(i64 %x) {
43 %shift = ashr i64 %x, 16
44 %trunc = trunc i64 %shift to i16
45 ret i16 %trunc
46}
47
48; GCN-LABEL: {{^}}trunc_sra_i64_17_to_i16:
49; GCN: s_waitcnt
50; GCN-NEXT: v_lshrrev_b64 v[0:1], 17, v[0:1]
51; GCN-NEXT: s_setpc_b64
52define i16 @trunc_sra_i64_17_to_i16(i64 %x) {
53 %shift = ashr i64 %x, 17
54 %trunc = trunc i64 %shift to i16
55 ret i16 %trunc
56}
57
58; GCN-LABEL: {{^}}trunc_shl_i64_16_to_i16:
59; GCN: s_waitcnt
60; GCN-NEXT: v_mov_b32_e32 v0, 0
61; GCN-NEXT: s_setpc_b64
62define i16 @trunc_shl_i64_16_to_i16(i64 %x) {
63 %shift = shl i64 %x, 16
64 %trunc = trunc i64 %shift to i16
65 ret i16 %trunc
66}
67
68; GCN-LABEL: {{^}}trunc_shl_i64_17_to_i16:
69; GCN: s_waitcnt
70; GCN-NEXT: v_mov_b32_e32 v0, 0
71; GCN-NEXT: s_setpc_b64
72define i16 @trunc_shl_i64_17_to_i16(i64 %x) {
73 %shift = shl i64 %x, 17
74 %trunc = trunc i64 %shift to i16
75 ret i16 %trunc
76}
77
78; GCN-LABEL: {{^}}trunc_srl_v2i64_16_to_v2i16:
79; GCN: s_waitcnt
80; GCN-DAG: v_lshrrev_b32_e32 v0, 16, v0
81; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0000
82; GCN: v_and_or_b32 v0, v2, [[MASK]], v0
83; GCN-NEXT: s_setpc_b64
84define <2 x i16> @trunc_srl_v2i64_16_to_v2i16(<2 x i64> %x) {
85 %shift = lshr <2 x i64> %x, <i64 16, i64 16>
86 %trunc = trunc <2 x i64> %shift to <2 x i16>
87 ret <2 x i16> %trunc
88}
89
90; GCN-LABEL: {{^}}s_trunc_srl_i64_16_to_i16:
91; GCN: s_load_dword [[VAL:s[0-9]+]]
92; GCN: s_lshr_b32 [[VAL_SHIFT:s[0-9]+]], [[VAL]], 16
93; GCN: s_or_b32 [[RESULT:s[0-9]+]], [[VAL_SHIFT]], 4
94; GCN: v_mov_b32_e32 [[V_RESULT:v[0-9]+]], [[RESULT]]
95; GCN: global_store_short v{{\[[0-9]+:[0-9]+\]}}, [[V_RESULT]]
96define amdgpu_kernel void @s_trunc_srl_i64_16_to_i16(i64 %x) {
97 %shift = lshr i64 %x, 16
98 %trunc = trunc i64 %shift to i16
99 %add = or i16 %trunc, 4
100 store i16 %add, i16 addrspace(1)* undef
101 ret void
102}