Matt Arsenault | b143d9a | 2018-05-09 20:52:43 +0000 | [diff] [blame] | 1 | ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s |
| 2 | |
| 3 | ; Test combine to reduce the width of a 64-bit shift to 32-bit if |
| 4 | ; truncated to 16-bit. |
| 5 | |
| 6 | ; GCN-LABEL: {{^}}trunc_srl_i64_16_to_i16: |
| 7 | ; GCN: s_waitcnt |
| 8 | ; GCN-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
| 9 | ; GCN-NEXT: s_setpc_b64 |
| 10 | define i16 @trunc_srl_i64_16_to_i16(i64 %x) { |
| 11 | %shift = lshr i64 %x, 16 |
| 12 | %trunc = trunc i64 %shift to i16 |
| 13 | ret i16 %trunc |
| 14 | } |
| 15 | |
| 16 | ; GCN-LABEL: {{^}}trunc_srl_i64_17_to_i16: |
| 17 | ; GCN: s_waitcnt |
| 18 | ; GCN-NEXT: v_lshrrev_b64 v[0:1], 17, v[0:1] |
| 19 | ; GCN-NEXT: s_setpc_b64 |
| 20 | define i16 @trunc_srl_i64_17_to_i16(i64 %x) { |
| 21 | %shift = lshr i64 %x, 17 |
| 22 | %trunc = trunc i64 %shift to i16 |
| 23 | ret i16 %trunc |
| 24 | } |
| 25 | |
| 26 | ; GCN-LABEL: {{^}}trunc_srl_i55_16_to_i15: |
| 27 | ; GCN: s_waitcnt |
| 28 | ; GCN-NEXT: v_lshrrev_b32_e32 v0, 15, v0 |
| 29 | ; GCN-NEXT: v_add_u16_e32 v0, 4, v0 |
| 30 | ; GCN-NEXT: s_setpc_b64 |
| 31 | define i15 @trunc_srl_i55_16_to_i15(i55 %x) { |
| 32 | %shift = lshr i55 %x, 15 |
| 33 | %trunc = trunc i55 %shift to i15 |
| 34 | %add = add i15 %trunc, 4 |
| 35 | ret i15 %add |
| 36 | } |
| 37 | |
| 38 | ; GCN-LABEL: {{^}}trunc_sra_i64_16_to_i16: |
| 39 | ; GCN: s_waitcnt |
| 40 | ; GCN-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
| 41 | ; GCN-NEXT: s_setpc_b64 |
| 42 | define i16 @trunc_sra_i64_16_to_i16(i64 %x) { |
| 43 | %shift = ashr i64 %x, 16 |
| 44 | %trunc = trunc i64 %shift to i16 |
| 45 | ret i16 %trunc |
| 46 | } |
| 47 | |
| 48 | ; GCN-LABEL: {{^}}trunc_sra_i64_17_to_i16: |
| 49 | ; GCN: s_waitcnt |
| 50 | ; GCN-NEXT: v_lshrrev_b64 v[0:1], 17, v[0:1] |
| 51 | ; GCN-NEXT: s_setpc_b64 |
| 52 | define i16 @trunc_sra_i64_17_to_i16(i64 %x) { |
| 53 | %shift = ashr i64 %x, 17 |
| 54 | %trunc = trunc i64 %shift to i16 |
| 55 | ret i16 %trunc |
| 56 | } |
| 57 | |
| 58 | ; GCN-LABEL: {{^}}trunc_shl_i64_16_to_i16: |
| 59 | ; GCN: s_waitcnt |
| 60 | ; GCN-NEXT: v_mov_b32_e32 v0, 0 |
| 61 | ; GCN-NEXT: s_setpc_b64 |
| 62 | define i16 @trunc_shl_i64_16_to_i16(i64 %x) { |
| 63 | %shift = shl i64 %x, 16 |
| 64 | %trunc = trunc i64 %shift to i16 |
| 65 | ret i16 %trunc |
| 66 | } |
| 67 | |
| 68 | ; GCN-LABEL: {{^}}trunc_shl_i64_17_to_i16: |
| 69 | ; GCN: s_waitcnt |
| 70 | ; GCN-NEXT: v_mov_b32_e32 v0, 0 |
| 71 | ; GCN-NEXT: s_setpc_b64 |
| 72 | define i16 @trunc_shl_i64_17_to_i16(i64 %x) { |
| 73 | %shift = shl i64 %x, 17 |
| 74 | %trunc = trunc i64 %shift to i16 |
| 75 | ret i16 %trunc |
| 76 | } |
| 77 | |
| 78 | ; GCN-LABEL: {{^}}trunc_srl_v2i64_16_to_v2i16: |
| 79 | ; GCN: s_waitcnt |
| 80 | ; GCN-DAG: v_lshrrev_b32_e32 v0, 16, v0 |
| 81 | ; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0000 |
| 82 | ; GCN: v_and_or_b32 v0, v2, [[MASK]], v0 |
| 83 | ; GCN-NEXT: s_setpc_b64 |
| 84 | define <2 x i16> @trunc_srl_v2i64_16_to_v2i16(<2 x i64> %x) { |
| 85 | %shift = lshr <2 x i64> %x, <i64 16, i64 16> |
| 86 | %trunc = trunc <2 x i64> %shift to <2 x i16> |
| 87 | ret <2 x i16> %trunc |
| 88 | } |
| 89 | |
| 90 | ; GCN-LABEL: {{^}}s_trunc_srl_i64_16_to_i16: |
| 91 | ; GCN: s_load_dword [[VAL:s[0-9]+]] |
| 92 | ; GCN: s_lshr_b32 [[VAL_SHIFT:s[0-9]+]], [[VAL]], 16 |
| 93 | ; GCN: s_or_b32 [[RESULT:s[0-9]+]], [[VAL_SHIFT]], 4 |
| 94 | ; GCN: v_mov_b32_e32 [[V_RESULT:v[0-9]+]], [[RESULT]] |
| 95 | ; GCN: global_store_short v{{\[[0-9]+:[0-9]+\]}}, [[V_RESULT]] |
| 96 | define amdgpu_kernel void @s_trunc_srl_i64_16_to_i16(i64 %x) { |
| 97 | %shift = lshr i64 %x, 16 |
| 98 | %trunc = trunc i64 %shift to i16 |
| 99 | %add = or i16 %trunc, 4 |
| 100 | store i16 %add, i16 addrspace(1)* undef |
| 101 | ret void |
| 102 | } |
Matt Arsenault | 74fd760 | 2018-05-09 20:52:54 +0000 | [diff] [blame^] | 103 | |
| 104 | ; GCN-LABEL: {{^}}trunc_srl_i64_var_mask15_to_i16: |
| 105 | ; GCN: s_waitcnt |
| 106 | ; GCN-NEXT: v_and_b32_e32 v1, 15, v2 |
| 107 | ; GCN-NEXT: v_lshrrev_b32_e32 v0, v1, v0 |
| 108 | ; GCN-NEXT: s_setpc_b64 |
| 109 | define i16 @trunc_srl_i64_var_mask15_to_i16(i64 %x, i64 %amt) { |
| 110 | %amt.masked = and i64 %amt, 15 |
| 111 | %shift = lshr i64 %x, %amt.masked |
| 112 | %trunc = trunc i64 %shift to i16 |
| 113 | ret i16 %trunc |
| 114 | } |
| 115 | |
| 116 | ; GCN-LABEL: {{^}}trunc_srl_i64_var_mask16_to_i16: |
| 117 | ; GCN: s_waitcnt |
| 118 | ; GCN-NEXT: v_and_b32_e32 v2, 16, v2 |
| 119 | ; GCN-NEXT: v_lshrrev_b64 v[0:1], v2, v[0:1] |
| 120 | ; GCN-NEXT: s_setpc_b64 |
| 121 | define i16 @trunc_srl_i64_var_mask16_to_i16(i64 %x, i64 %amt) { |
| 122 | %amt.masked = and i64 %amt, 16 |
| 123 | %shift = lshr i64 %x, %amt.masked |
| 124 | %trunc = trunc i64 %shift to i16 |
| 125 | ret i16 %trunc |
| 126 | } |
| 127 | |
| 128 | ; GCN-LABEL: {{^}}trunc_srl_i64_var_mask31_to_i16: |
| 129 | ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 130 | ; GCN-NEXT: v_and_b32_e32 v2, 31, v2 |
| 131 | ; GCN-NEXT: v_lshrrev_b64 v[0:1], v2, v[0:1] |
| 132 | ; GCN-NEXT: s_setpc_b64 s[30:31] |
| 133 | define i16 @trunc_srl_i64_var_mask31_to_i16(i64 %x, i64 %amt) { |
| 134 | %amt.masked = and i64 %amt, 31 |
| 135 | %shift = lshr i64 %x, %amt.masked |
| 136 | %trunc = trunc i64 %shift to i16 |
| 137 | ret i16 %trunc |
| 138 | } |