Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 1 | ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s |
Matt Arsenault | 7aad8fd | 2017-01-24 22:02:15 +0000 | [diff] [blame] | 2 | ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s |
Tom Stellard | 046039e | 2013-06-03 17:40:03 +0000 | [diff] [blame] | 3 | |
Matt Arsenault | 7e8de01 | 2016-04-22 22:59:16 +0000 | [diff] [blame] | 4 | ; GCN-LABEL: {{^}}s_sext_i1_to_i32: |
| 5 | ; GCN: v_cndmask_b32_e64 |
| 6 | ; GCN: s_endpgm |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 7 | define amdgpu_kernel void @s_sext_i1_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { |
Matt Arsenault | b2cbf79 | 2014-06-10 18:54:59 +0000 | [diff] [blame] | 8 | %cmp = icmp eq i32 %a, %b |
| 9 | %sext = sext i1 %cmp to i32 |
| 10 | store i32 %sext, i32 addrspace(1)* %out, align 4 |
| 11 | ret void |
| 12 | } |
Tom Stellard | 046039e | 2013-06-03 17:40:03 +0000 | [diff] [blame] | 13 | |
Matt Arsenault | 7e8de01 | 2016-04-22 22:59:16 +0000 | [diff] [blame] | 14 | ; GCN-LABEL: {{^}}test_s_sext_i32_to_i64: |
| 15 | ; GCN: s_ashr_i32 |
| 16 | ; GCN: s_endpg |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 17 | define amdgpu_kernel void @test_s_sext_i32_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) nounwind { |
Tom Stellard | 046039e | 2013-06-03 17:40:03 +0000 | [diff] [blame] | 18 | entry: |
Matt Arsenault | b2cbf79 | 2014-06-10 18:54:59 +0000 | [diff] [blame] | 19 | %mul = mul i32 %a, %b |
| 20 | %add = add i32 %mul, %c |
| 21 | %sext = sext i32 %add to i64 |
| 22 | store i64 %sext, i64 addrspace(1)* %out, align 8 |
| 23 | ret void |
| 24 | } |
| 25 | |
Matt Arsenault | 7e8de01 | 2016-04-22 22:59:16 +0000 | [diff] [blame] | 26 | ; GCN-LABEL: {{^}}s_sext_i1_to_i64: |
| 27 | ; GCN: v_cndmask_b32_e64 v[[LOREG:[0-9]+]], 0, -1, vcc |
| 28 | ; GCN: v_mov_b32_e32 v[[HIREG:[0-9]+]], v[[LOREG]] |
| 29 | ; GCN: buffer_store_dwordx2 v{{\[}}[[LOREG]]:[[HIREG]]{{\]}} |
| 30 | ; GCN: s_endpgm |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 31 | define amdgpu_kernel void @s_sext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) nounwind { |
Matt Arsenault | b2cbf79 | 2014-06-10 18:54:59 +0000 | [diff] [blame] | 32 | %cmp = icmp eq i32 %a, %b |
| 33 | %sext = sext i1 %cmp to i64 |
| 34 | store i64 %sext, i64 addrspace(1)* %out, align 8 |
| 35 | ret void |
| 36 | } |
| 37 | |
Matt Arsenault | 7e8de01 | 2016-04-22 22:59:16 +0000 | [diff] [blame] | 38 | ; GCN-LABEL: {{^}}s_sext_i32_to_i64: |
| 39 | ; GCN: s_ashr_i32 |
| 40 | ; GCN: s_endpgm |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 41 | define amdgpu_kernel void @s_sext_i32_to_i64(i64 addrspace(1)* %out, i32 %a) nounwind { |
Matt Arsenault | b2cbf79 | 2014-06-10 18:54:59 +0000 | [diff] [blame] | 42 | %sext = sext i32 %a to i64 |
| 43 | store i64 %sext, i64 addrspace(1)* %out, align 8 |
| 44 | ret void |
| 45 | } |
| 46 | |
Matt Arsenault | 7e8de01 | 2016-04-22 22:59:16 +0000 | [diff] [blame] | 47 | ; GCN-LABEL: {{^}}v_sext_i32_to_i64: |
| 48 | ; GCN: v_ashr |
| 49 | ; GCN: s_endpgm |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 50 | define amdgpu_kernel void @v_sext_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { |
David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 51 | %val = load i32, i32 addrspace(1)* %in, align 4 |
Matt Arsenault | b2cbf79 | 2014-06-10 18:54:59 +0000 | [diff] [blame] | 52 | %sext = sext i32 %val to i64 |
| 53 | store i64 %sext, i64 addrspace(1)* %out, align 8 |
| 54 | ret void |
| 55 | } |
| 56 | |
Matt Arsenault | 7e8de01 | 2016-04-22 22:59:16 +0000 | [diff] [blame] | 57 | ; GCN-LABEL: {{^}}s_sext_i16_to_i64: |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 58 | ; GCN: s_bfe_i64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x100000 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 59 | define amdgpu_kernel void @s_sext_i16_to_i64(i64 addrspace(1)* %out, i16 %a) nounwind { |
Matt Arsenault | b2cbf79 | 2014-06-10 18:54:59 +0000 | [diff] [blame] | 60 | %sext = sext i16 %a to i64 |
| 61 | store i64 %sext, i64 addrspace(1)* %out, align 8 |
Tom Stellard | 046039e | 2013-06-03 17:40:03 +0000 | [diff] [blame] | 62 | ret void |
| 63 | } |
Matt Arsenault | 7e8de01 | 2016-04-22 22:59:16 +0000 | [diff] [blame] | 64 | |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 65 | ; GCN-LABEL: {{^}}s_sext_i1_to_i16: |
| 66 | ; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1 |
| 67 | ; GCN-NEXT: buffer_store_short [[RESULT]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 68 | define amdgpu_kernel void @s_sext_i1_to_i16(i16 addrspace(1)* %out, i32 %a, i32 %b) nounwind { |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 69 | %cmp = icmp eq i32 %a, %b |
| 70 | %sext = sext i1 %cmp to i16 |
| 71 | store i16 %sext, i16 addrspace(1)* %out |
| 72 | ret void |
| 73 | } |
| 74 | |
Tom Stellard | d23de36 | 2016-11-15 21:25:56 +0000 | [diff] [blame] | 75 | ; This purpose of this test is to make sure the i16 = sign_extend i1 node |
| 76 | ; makes it all the way throught the legalizer/optimizer to make sure |
| 77 | ; we select this correctly. In the s_sext_i1_to_i16, the sign_extend node |
| 78 | ; is optimized to a select very early. |
| 79 | ; GCN-LABEL: {{^}}s_sext_i1_to_i16_with_and: |
| 80 | ; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1 |
| 81 | ; GCN-NEXT: buffer_store_short [[RESULT]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 82 | define amdgpu_kernel void @s_sext_i1_to_i16_with_and(i16 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) nounwind { |
Tom Stellard | d23de36 | 2016-11-15 21:25:56 +0000 | [diff] [blame] | 83 | %cmp0 = icmp eq i32 %a, %b |
| 84 | %cmp1 = icmp eq i32 %c, %d |
| 85 | %cmp = and i1 %cmp0, %cmp1 |
| 86 | %sext = sext i1 %cmp to i16 |
| 87 | store i16 %sext, i16 addrspace(1)* %out |
| 88 | ret void |
| 89 | } |
| 90 | |
| 91 | ; GCN-LABEL: {{^}}v_sext_i1_to_i16_with_and: |
| 92 | ; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1 |
| 93 | ; GCN-NEXT: buffer_store_short [[RESULT]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 94 | define amdgpu_kernel void @v_sext_i1_to_i16_with_and(i16 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) nounwind { |
Tom Stellard | d23de36 | 2016-11-15 21:25:56 +0000 | [diff] [blame] | 95 | %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #1 |
| 96 | %cmp0 = icmp eq i32 %a, %tid |
| 97 | %cmp1 = icmp eq i32 %b, %c |
| 98 | %cmp = and i1 %cmp0, %cmp1 |
| 99 | %sext = sext i1 %cmp to i16 |
| 100 | store i16 %sext, i16 addrspace(1)* %out |
| 101 | ret void |
| 102 | } |
| 103 | |
Matt Arsenault | 7e8de01 | 2016-04-22 22:59:16 +0000 | [diff] [blame] | 104 | ; GCN-LABEL: {{^}}s_sext_v4i8_to_v4i32: |
| 105 | ; GCN: s_load_dword [[VAL:s[0-9]+]] |
Matt Arsenault | 7e8de01 | 2016-04-22 22:59:16 +0000 | [diff] [blame] | 106 | ; GCN-DAG: s_bfe_i32 [[EXT2:s[0-9]+]], [[VAL]], 0x80010 |
| 107 | ; GCN-DAG: s_ashr_i32 [[EXT3:s[0-9]+]], [[VAL]], 24 |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 108 | ; SI-DAG: s_bfe_i32 [[EXT1:s[0-9]+]], [[VAL]], 0x80008 |
| 109 | ; GCN-DAG: s_sext_i32_i8 [[EXT0:s[0-9]+]], [[VAL]] |
| 110 | |
| 111 | ; FIXME: We end up with a v_bfe instruction, because the i16 srl |
| 112 | ; gets selected to a v_lshrrev_b16 instructions, so the input to |
| 113 | ; the bfe is a vector registers. To fix this we need to be able to |
| 114 | ; optimize: |
| 115 | ; t29: i16 = truncate t10 |
| 116 | ; t55: i16 = srl t29, Constant:i32<8> |
| 117 | ; t63: i32 = any_extend t55 |
| 118 | ; t64: i32 = sign_extend_inreg t63, ValueType:ch:i8 |
| 119 | |
| 120 | ; VI-DAG: v_bfe_i32 [[VEXT1:v[0-9]+]], v{{[0-9]+}}, 0, 8 |
Matt Arsenault | 7e8de01 | 2016-04-22 22:59:16 +0000 | [diff] [blame] | 121 | |
| 122 | ; GCN-DAG: v_mov_b32_e32 [[VEXT0:v[0-9]+]], [[EXT0]] |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 123 | ; SI-DAG: v_mov_b32_e32 [[VEXT1:v[0-9]+]], [[EXT1]] |
Matt Arsenault | 7e8de01 | 2016-04-22 22:59:16 +0000 | [diff] [blame] | 124 | ; GCN-DAG: v_mov_b32_e32 [[VEXT2:v[0-9]+]], [[EXT2]] |
| 125 | ; GCN-DAG: v_mov_b32_e32 [[VEXT3:v[0-9]+]], [[EXT3]] |
| 126 | |
| 127 | ; GCN-DAG: buffer_store_dword [[VEXT0]] |
| 128 | ; GCN-DAG: buffer_store_dword [[VEXT1]] |
| 129 | ; GCN-DAG: buffer_store_dword [[VEXT2]] |
| 130 | ; GCN-DAG: buffer_store_dword [[VEXT3]] |
| 131 | |
| 132 | ; GCN: s_endpgm |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 133 | define amdgpu_kernel void @s_sext_v4i8_to_v4i32(i32 addrspace(1)* %out, i32 %a) nounwind { |
Matt Arsenault | 7e8de01 | 2016-04-22 22:59:16 +0000 | [diff] [blame] | 134 | %cast = bitcast i32 %a to <4 x i8> |
| 135 | %ext = sext <4 x i8> %cast to <4 x i32> |
| 136 | %elt0 = extractelement <4 x i32> %ext, i32 0 |
| 137 | %elt1 = extractelement <4 x i32> %ext, i32 1 |
| 138 | %elt2 = extractelement <4 x i32> %ext, i32 2 |
| 139 | %elt3 = extractelement <4 x i32> %ext, i32 3 |
| 140 | store volatile i32 %elt0, i32 addrspace(1)* %out |
| 141 | store volatile i32 %elt1, i32 addrspace(1)* %out |
| 142 | store volatile i32 %elt2, i32 addrspace(1)* %out |
| 143 | store volatile i32 %elt3, i32 addrspace(1)* %out |
| 144 | ret void |
| 145 | } |
| 146 | |
| 147 | ; GCN-LABEL: {{^}}v_sext_v4i8_to_v4i32: |
| 148 | ; GCN: buffer_load_dword [[VAL:v[0-9]+]] |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 149 | ; FIXME: need to optimize same sequence as above test to avoid |
| 150 | ; this shift. |
| 151 | ; VI-DAG: v_lshrrev_b16_e32 [[SH16:v[0-9]+]], 8, [[VAL]] |
Matt Arsenault | 7e8de01 | 2016-04-22 22:59:16 +0000 | [diff] [blame] | 152 | ; GCN-DAG: v_ashrrev_i32_e32 [[EXT3:v[0-9]+]], 24, [[VAL]] |
Tom Stellard | 115a615 | 2016-11-10 16:02:37 +0000 | [diff] [blame] | 153 | ; VI-DAG: v_bfe_i32 [[EXT0:v[0-9]+]], [[VAL]], 0, 8 |
| 154 | ; VI-DAG: v_bfe_i32 [[EXT2:v[0-9]+]], [[VAL]], 16, 8 |
| 155 | ; VI-DAG: v_bfe_i32 [[EXT1:v[0-9]+]], [[SH16]], 0, 8 |
| 156 | |
| 157 | ; SI-DAG: v_bfe_i32 [[EXT2:v[0-9]+]], [[VAL]], 16, 8 |
| 158 | ; SI-DAG: v_bfe_i32 [[EXT1:v[0-9]+]], [[VAL]], 8, 8 |
| 159 | ; SI: v_bfe_i32 [[EXT0:v[0-9]+]], [[VAL]], 0, 8 |
Matt Arsenault | 7e8de01 | 2016-04-22 22:59:16 +0000 | [diff] [blame] | 160 | |
| 161 | ; GCN: buffer_store_dword [[EXT0]] |
| 162 | ; GCN: buffer_store_dword [[EXT1]] |
| 163 | ; GCN: buffer_store_dword [[EXT2]] |
| 164 | ; GCN: buffer_store_dword [[EXT3]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 165 | define amdgpu_kernel void @v_sext_v4i8_to_v4i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { |
Matt Arsenault | 7e8de01 | 2016-04-22 22:59:16 +0000 | [diff] [blame] | 166 | %a = load i32, i32 addrspace(1)* %in |
| 167 | %cast = bitcast i32 %a to <4 x i8> |
| 168 | %ext = sext <4 x i8> %cast to <4 x i32> |
| 169 | %elt0 = extractelement <4 x i32> %ext, i32 0 |
| 170 | %elt1 = extractelement <4 x i32> %ext, i32 1 |
| 171 | %elt2 = extractelement <4 x i32> %ext, i32 2 |
| 172 | %elt3 = extractelement <4 x i32> %ext, i32 3 |
| 173 | store volatile i32 %elt0, i32 addrspace(1)* %out |
| 174 | store volatile i32 %elt1, i32 addrspace(1)* %out |
| 175 | store volatile i32 %elt2, i32 addrspace(1)* %out |
| 176 | store volatile i32 %elt3, i32 addrspace(1)* %out |
| 177 | ret void |
| 178 | } |
| 179 | |
| 180 | ; FIXME: s_bfe_i64 |
| 181 | ; GCN-LABEL: {{^}}s_sext_v4i16_to_v4i32: |
| 182 | ; GCN-DAG: s_ashr_i64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 48 |
| 183 | ; GCN-DAG: s_ashr_i32 s{{[0-9]+}}, s{{[0-9]+}}, 16 |
| 184 | ; GCN-DAG: s_sext_i32_i16 |
| 185 | ; GCN-DAG: s_sext_i32_i16 |
| 186 | ; GCN: s_endpgm |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 187 | define amdgpu_kernel void @s_sext_v4i16_to_v4i32(i32 addrspace(1)* %out, i64 %a) nounwind { |
Matt Arsenault | 7e8de01 | 2016-04-22 22:59:16 +0000 | [diff] [blame] | 188 | %cast = bitcast i64 %a to <4 x i16> |
| 189 | %ext = sext <4 x i16> %cast to <4 x i32> |
| 190 | %elt0 = extractelement <4 x i32> %ext, i32 0 |
| 191 | %elt1 = extractelement <4 x i32> %ext, i32 1 |
| 192 | %elt2 = extractelement <4 x i32> %ext, i32 2 |
| 193 | %elt3 = extractelement <4 x i32> %ext, i32 3 |
| 194 | store volatile i32 %elt0, i32 addrspace(1)* %out |
| 195 | store volatile i32 %elt1, i32 addrspace(1)* %out |
| 196 | store volatile i32 %elt2, i32 addrspace(1)* %out |
| 197 | store volatile i32 %elt3, i32 addrspace(1)* %out |
| 198 | ret void |
| 199 | } |
| 200 | |
| 201 | ; GCN-LABEL: {{^}}v_sext_v4i16_to_v4i32: |
| 202 | ; SI-DAG: v_ashr_i64 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, 48 |
| 203 | ; VI-DAG: v_ashrrev_i64 v{{\[[0-9]+:[0-9]+\]}}, 48, v{{\[[0-9]+:[0-9]+\]}} |
| 204 | ; GCN-DAG: v_ashrrev_i32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} |
| 205 | ; GCN-DAG: v_ashrrev_i32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} |
| 206 | ; GCN-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16 |
| 207 | ; GCN-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16 |
| 208 | ; GCN: s_endpgm |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 209 | define amdgpu_kernel void @v_sext_v4i16_to_v4i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind { |
Matt Arsenault | 7e8de01 | 2016-04-22 22:59:16 +0000 | [diff] [blame] | 210 | %a = load i64, i64 addrspace(1)* %in |
| 211 | %cast = bitcast i64 %a to <4 x i16> |
| 212 | %ext = sext <4 x i16> %cast to <4 x i32> |
| 213 | %elt0 = extractelement <4 x i32> %ext, i32 0 |
| 214 | %elt1 = extractelement <4 x i32> %ext, i32 1 |
| 215 | %elt2 = extractelement <4 x i32> %ext, i32 2 |
| 216 | %elt3 = extractelement <4 x i32> %ext, i32 3 |
| 217 | store volatile i32 %elt0, i32 addrspace(1)* %out |
| 218 | store volatile i32 %elt1, i32 addrspace(1)* %out |
| 219 | store volatile i32 %elt2, i32 addrspace(1)* %out |
| 220 | store volatile i32 %elt3, i32 addrspace(1)* %out |
| 221 | ret void |
| 222 | } |
Tom Stellard | d23de36 | 2016-11-15 21:25:56 +0000 | [diff] [blame] | 223 | |
| 224 | declare i32 @llvm.amdgcn.workitem.id.x() #1 |
| 225 | |
| 226 | attributes #1 = { nounwind readnone } |