Matt Arsenault | d99ef11 | 2016-09-17 15:44:16 +0000 | [diff] [blame] | 1 | ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s |
Matt Arsenault | 7aad8fd | 2017-01-24 22:02:15 +0000 | [diff] [blame] | 2 | ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s |
Matt Arsenault | d99ef11 | 2016-09-17 15:44:16 +0000 | [diff] [blame] | 3 | |
| 4 | ; The bitcast should be pushed through the bitcasts so the vectors can |
| 5 | ; be broken down and the shared components can be CSEd |
| 6 | |
| 7 | ; GCN-LABEL: {{^}}store_bitcast_constant_v8i32_to_v8f32: |
| 8 | ; GCN: buffer_store_dwordx4 |
| 9 | ; GCN: buffer_store_dwordx4 |
| 10 | ; GCN-NOT: v_mov_b32 |
| 11 | ; GCN: buffer_store_dwordx4 |
| 12 | ; GCN-NOT: v_mov_b32 |
| 13 | ; GCN: buffer_store_dwordx4 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 14 | define amdgpu_kernel void @store_bitcast_constant_v8i32_to_v8f32(<8 x float> addrspace(1)* %out, <8 x i32> %vec) { |
Matt Arsenault | d99ef11 | 2016-09-17 15:44:16 +0000 | [diff] [blame] | 15 | %vec0.bc = bitcast <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8> to <8 x float> |
| 16 | store volatile <8 x float> %vec0.bc, <8 x float> addrspace(1)* %out |
| 17 | |
| 18 | %vec1.bc = bitcast <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 9> to <8 x float> |
| 19 | store volatile <8 x float> %vec1.bc, <8 x float> addrspace(1)* %out |
| 20 | ret void |
| 21 | } |
| 22 | |
| 23 | ; GCN-LABEL: {{^}}store_bitcast_constant_v4i64_to_v8f32: |
| 24 | ; GCN: buffer_store_dwordx4 |
| 25 | ; GCN: buffer_store_dwordx4 |
| 26 | ; GCN-NOT: v_mov_b32 |
| 27 | ; GCN: buffer_store_dwordx4 |
| 28 | ; GCN-NOT: v_mov_b32 |
| 29 | ; GCN: buffer_store_dwordx4 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 30 | define amdgpu_kernel void @store_bitcast_constant_v4i64_to_v8f32(<8 x float> addrspace(1)* %out, <4 x i64> %vec) { |
Matt Arsenault | d99ef11 | 2016-09-17 15:44:16 +0000 | [diff] [blame] | 31 | %vec0.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 8> to <8 x float> |
| 32 | store volatile <8 x float> %vec0.bc, <8 x float> addrspace(1)* %out |
| 33 | |
| 34 | %vec1.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 9> to <8 x float> |
| 35 | store volatile <8 x float> %vec1.bc, <8 x float> addrspace(1)* %out |
| 36 | ret void |
| 37 | } |
| 38 | |
| 39 | ; GCN-LABEL: {{^}}store_bitcast_constant_v4i64_to_v4f64: |
| 40 | ; GCN: buffer_store_dwordx4 |
| 41 | ; GCN: buffer_store_dwordx4 |
| 42 | ; GCN-NOT: v_mov_b32 |
| 43 | ; GCN: buffer_store_dwordx4 |
| 44 | ; GCN-NOT: v_mov_b32 |
| 45 | ; GCN: buffer_store_dwordx4 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 46 | define amdgpu_kernel void @store_bitcast_constant_v4i64_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i64> %vec) { |
Matt Arsenault | d99ef11 | 2016-09-17 15:44:16 +0000 | [diff] [blame] | 47 | %vec0.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 8> to <4 x double> |
| 48 | store volatile <4 x double> %vec0.bc, <4 x double> addrspace(1)* %out |
| 49 | |
| 50 | %vec1.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 9> to <4 x double> |
| 51 | store volatile <4 x double> %vec1.bc, <4 x double> addrspace(1)* %out |
| 52 | ret void |
| 53 | } |
| 54 | |
| 55 | ; GCN-LABEL: {{^}}store_bitcast_constant_v8i32_to_v16i16: |
| 56 | ; GCN: buffer_store_dwordx4 |
| 57 | ; GCN: buffer_store_dwordx4 |
| 58 | ; GCN-NOT: v_mov_b32 |
| 59 | ; GCN: buffer_store_dwordx4 |
| 60 | ; GCN-NOT: v_mov_b32 |
| 61 | ; GCN: buffer_store_dwordx4 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 62 | define amdgpu_kernel void @store_bitcast_constant_v8i32_to_v16i16(<8 x float> addrspace(1)* %out, <16 x i16> %vec) { |
Matt Arsenault | d99ef11 | 2016-09-17 15:44:16 +0000 | [diff] [blame] | 63 | %vec0.bc = bitcast <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 8> to <8 x float> |
| 64 | store volatile <8 x float> %vec0.bc, <8 x float> addrspace(1)* %out |
| 65 | |
| 66 | %vec1.bc = bitcast <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 9> to <8 x float> |
| 67 | store volatile <8 x float> %vec1.bc, <8 x float> addrspace(1)* %out |
| 68 | ret void |
| 69 | } |
Matt Arsenault | dce313c | 2017-03-21 16:20:16 +0000 | [diff] [blame] | 70 | |
| 71 | ; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source: |
| 72 | ; GCN-NOT: store_dword |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 73 | define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source(<2 x i32> addrspace(1)* %out, i64 %a, i64 %b, i32 %c) #0 { |
Matt Arsenault | dce313c | 2017-03-21 16:20:16 +0000 | [diff] [blame] | 74 | %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 %c) #1 |
| 75 | %bc = bitcast i64 %undef to <2 x i32> |
| 76 | store volatile <2 x i32> %bc, <2 x i32> addrspace(1)* %out |
| 77 | ret void |
| 78 | } |
| 79 | |
| 80 | ; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source_extractelt: |
| 81 | ; GCN-NOT: store_dword |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 82 | define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source_extractelt(i32 addrspace(1)* %out, i64 %a, i64 %b, i32 %c) #0 { |
Matt Arsenault | dce313c | 2017-03-21 16:20:16 +0000 | [diff] [blame] | 83 | %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 %c) #1 |
| 84 | %bc = bitcast i64 %undef to <2 x i32> |
| 85 | %elt1 = extractelement <2 x i32> %bc, i32 1 |
| 86 | store volatile i32 %elt1, i32 addrspace(1)* %out |
| 87 | ret void |
| 88 | } |
| 89 | |
| 90 | declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #1 |
| 91 | |
| 92 | attributes #0 = { nounwind } |
| 93 | attributes #1 = { nounwind readnone convergent } |