blob: cf95f74afb84d887fd79ce108a3c4b76479307c3 [file] [log] [blame]
Matt Arsenaultd99ef112016-09-17 15:44:16 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
Matt Arsenault7aad8fd2017-01-24 22:02:15 +00002; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
Matt Arsenaultd99ef112016-09-17 15:44:16 +00003
4; The bitcast should be pushed through the bitcasts so the vectors can
5; be broken down and the shared components can be CSEd
6
7; GCN-LABEL: {{^}}store_bitcast_constant_v8i32_to_v8f32:
8; GCN: buffer_store_dwordx4
9; GCN: buffer_store_dwordx4
10; GCN-NOT: v_mov_b32
11; GCN: buffer_store_dwordx4
12; GCN-NOT: v_mov_b32
13; GCN: buffer_store_dwordx4
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000014define amdgpu_kernel void @store_bitcast_constant_v8i32_to_v8f32(<8 x float> addrspace(1)* %out, <8 x i32> %vec) {
Matt Arsenaultd99ef112016-09-17 15:44:16 +000015 %vec0.bc = bitcast <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8> to <8 x float>
16 store volatile <8 x float> %vec0.bc, <8 x float> addrspace(1)* %out
17
18 %vec1.bc = bitcast <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 9> to <8 x float>
19 store volatile <8 x float> %vec1.bc, <8 x float> addrspace(1)* %out
20 ret void
21}
22
23; GCN-LABEL: {{^}}store_bitcast_constant_v4i64_to_v8f32:
24; GCN: buffer_store_dwordx4
25; GCN: buffer_store_dwordx4
26; GCN-NOT: v_mov_b32
27; GCN: buffer_store_dwordx4
28; GCN-NOT: v_mov_b32
29; GCN: buffer_store_dwordx4
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000030define amdgpu_kernel void @store_bitcast_constant_v4i64_to_v8f32(<8 x float> addrspace(1)* %out, <4 x i64> %vec) {
Matt Arsenaultd99ef112016-09-17 15:44:16 +000031 %vec0.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 8> to <8 x float>
32 store volatile <8 x float> %vec0.bc, <8 x float> addrspace(1)* %out
33
34 %vec1.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 9> to <8 x float>
35 store volatile <8 x float> %vec1.bc, <8 x float> addrspace(1)* %out
36 ret void
37}
38
39; GCN-LABEL: {{^}}store_bitcast_constant_v4i64_to_v4f64:
40; GCN: buffer_store_dwordx4
41; GCN: buffer_store_dwordx4
42; GCN-NOT: v_mov_b32
43; GCN: buffer_store_dwordx4
44; GCN-NOT: v_mov_b32
45; GCN: buffer_store_dwordx4
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000046define amdgpu_kernel void @store_bitcast_constant_v4i64_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i64> %vec) {
Matt Arsenaultd99ef112016-09-17 15:44:16 +000047 %vec0.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 8> to <4 x double>
48 store volatile <4 x double> %vec0.bc, <4 x double> addrspace(1)* %out
49
50 %vec1.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 9> to <4 x double>
51 store volatile <4 x double> %vec1.bc, <4 x double> addrspace(1)* %out
52 ret void
53}
54
55; GCN-LABEL: {{^}}store_bitcast_constant_v8i32_to_v16i16:
56; GCN: buffer_store_dwordx4
57; GCN: buffer_store_dwordx4
58; GCN-NOT: v_mov_b32
59; GCN: buffer_store_dwordx4
60; GCN-NOT: v_mov_b32
61; GCN: buffer_store_dwordx4
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000062define amdgpu_kernel void @store_bitcast_constant_v8i32_to_v16i16(<8 x float> addrspace(1)* %out, <16 x i16> %vec) {
Matt Arsenaultd99ef112016-09-17 15:44:16 +000063 %vec0.bc = bitcast <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 8> to <8 x float>
64 store volatile <8 x float> %vec0.bc, <8 x float> addrspace(1)* %out
65
66 %vec1.bc = bitcast <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 9> to <8 x float>
67 store volatile <8 x float> %vec1.bc, <8 x float> addrspace(1)* %out
68 ret void
69}
Matt Arsenaultdce313c2017-03-21 16:20:16 +000070
71; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source:
72; GCN-NOT: store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000073define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source(<2 x i32> addrspace(1)* %out, i64 %a, i64 %b, i32 %c) #0 {
Matt Arsenaultdce313c2017-03-21 16:20:16 +000074 %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 %c) #1
75 %bc = bitcast i64 %undef to <2 x i32>
76 store volatile <2 x i32> %bc, <2 x i32> addrspace(1)* %out
77 ret void
78}
79
80; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source_extractelt:
81; GCN-NOT: store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000082define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source_extractelt(i32 addrspace(1)* %out, i64 %a, i64 %b, i32 %c) #0 {
Matt Arsenaultdce313c2017-03-21 16:20:16 +000083 %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 %c) #1
84 %bc = bitcast i64 %undef to <2 x i32>
85 %elt1 = extractelement <2 x i32> %bc, i32 1
86 store volatile i32 %elt1, i32 addrspace(1)* %out
87 ret void
88}
89
90declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #1
91
92attributes #0 = { nounwind }
93attributes #1 = { nounwind readnone convergent }