Matt Arsenault | 3c5e423 | 2017-05-10 21:29:33 +0000 | [diff] [blame] | 1 | ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GFX9,GCN %s |
| 2 | ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=VI,GCN %s |
| 3 | |
| 4 | ; GFX9: estimated cost of 0 for {{.*}} shufflevector <2 x i16> %vec, <2 x i16> undef, <2 x i32> zeroinitializer |
| 5 | define amdgpu_kernel void @shufflevector_00_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) { |
| 6 | %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr |
| 7 | %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <2 x i32> zeroinitializer |
| 8 | store <2 x i16> %shuf, <2 x i16> addrspace(1)* %out |
| 9 | ret void |
| 10 | } |
| 11 | |
| 12 | ; GFX9: estimated cost of 0 for {{.*}} shufflevector <2 x i16> %vec, <2 x i16> undef, <2 x i32> <i32 0, i32 1> |
| 13 | define amdgpu_kernel void @shufflevector_01_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) { |
| 14 | %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr |
| 15 | %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <2 x i32> <i32 0, i32 1> |
| 16 | store <2 x i16> %shuf, <2 x i16> addrspace(1)* %out |
| 17 | ret void |
| 18 | } |
| 19 | |
| 20 | ; GFX9: estimated cost of 0 for {{.*}} shufflevector <2 x i16> %vec, <2 x i16> undef, <2 x i32> <i32 1, i32 0> |
| 21 | define amdgpu_kernel void @shufflevector_10_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) { |
| 22 | %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr |
| 23 | %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <2 x i32> <i32 1, i32 0> |
| 24 | store <2 x i16> %shuf, <2 x i16> addrspace(1)* %out |
| 25 | ret void |
| 26 | } |
| 27 | |
| 28 | ; GFX9: estimated cost of 0 for {{.*}} shufflevector <2 x i16> %vec, <2 x i16> undef, <2 x i32> <i32 1, i32 1> |
| 29 | define amdgpu_kernel void @shufflevector_11_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) { |
| 30 | %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr |
| 31 | %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <2 x i32> <i32 1, i32 1> |
| 32 | store <2 x i16> %shuf, <2 x i16> addrspace(1)* %out |
| 33 | ret void |
| 34 | } |
| 35 | |
| 36 | ; GCN: estimated cost of 2 for {{.*}} shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 0, i32 2> |
| 37 | define amdgpu_kernel void @shufflevector_02_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr0, <2 x i16> addrspace(1)* %vaddr1) { |
| 38 | %vec0 = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr0 |
| 39 | %vec1 = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr1 |
| 40 | %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 0, i32 2> |
| 41 | store <2 x i16> %shuf, <2 x i16> addrspace(1)* %out |
| 42 | ret void |
| 43 | } |