Matt Arsenault | 9651813 | 2016-03-25 01:00:32 +0000 | [diff] [blame] | 1 | ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=+half-rate-64-ops < %s | FileCheck -check-prefix=FASTF64 -check-prefix=ALL %s |
| 2 | ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefix=SLOWF64 -check-prefix=ALL %s |
| 3 | |
| 4 | ; ALL: 'fsub_f32' |
| 5 | ; ALL: estimated cost of 1 for {{.*}} fsub float |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 6 | define amdgpu_kernel void @fsub_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr, float %b) #0 { |
Matt Arsenault | 9651813 | 2016-03-25 01:00:32 +0000 | [diff] [blame] | 7 | %vec = load float, float addrspace(1)* %vaddr |
| 8 | %add = fsub float %vec, %b |
| 9 | store float %add, float addrspace(1)* %out |
| 10 | ret void |
| 11 | } |
| 12 | |
| 13 | ; ALL: 'fsub_v2f32' |
| 14 | ; ALL: estimated cost of 2 for {{.*}} fsub <2 x float> |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 15 | define amdgpu_kernel void @fsub_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr, <2 x float> %b) #0 { |
Matt Arsenault | 9651813 | 2016-03-25 01:00:32 +0000 | [diff] [blame] | 16 | %vec = load <2 x float>, <2 x float> addrspace(1)* %vaddr |
| 17 | %add = fsub <2 x float> %vec, %b |
| 18 | store <2 x float> %add, <2 x float> addrspace(1)* %out |
| 19 | ret void |
| 20 | } |
| 21 | |
| 22 | ; ALL: 'fsub_v3f32' |
| 23 | ; ALL: estimated cost of 3 for {{.*}} fsub <3 x float> |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 24 | define amdgpu_kernel void @fsub_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %vaddr, <3 x float> %b) #0 { |
Matt Arsenault | 9651813 | 2016-03-25 01:00:32 +0000 | [diff] [blame] | 25 | %vec = load <3 x float>, <3 x float> addrspace(1)* %vaddr |
| 26 | %add = fsub <3 x float> %vec, %b |
| 27 | store <3 x float> %add, <3 x float> addrspace(1)* %out |
| 28 | ret void |
| 29 | } |
| 30 | |
| 31 | ; ALL: 'fsub_f64' |
| 32 | ; FASTF64: estimated cost of 2 for {{.*}} fsub double |
| 33 | ; SLOWF64: estimated cost of 3 for {{.*}} fsub double |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 34 | define amdgpu_kernel void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %vaddr, double %b) #0 { |
Matt Arsenault | 9651813 | 2016-03-25 01:00:32 +0000 | [diff] [blame] | 35 | %vec = load double, double addrspace(1)* %vaddr |
| 36 | %add = fsub double %vec, %b |
| 37 | store double %add, double addrspace(1)* %out |
| 38 | ret void |
| 39 | } |
| 40 | |
| 41 | ; ALL: 'fsub_v2f64' |
| 42 | ; FASTF64: estimated cost of 4 for {{.*}} fsub <2 x double> |
| 43 | ; SLOWF64: estimated cost of 6 for {{.*}} fsub <2 x double> |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 44 | define amdgpu_kernel void @fsub_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %vaddr, <2 x double> %b) #0 { |
Matt Arsenault | 9651813 | 2016-03-25 01:00:32 +0000 | [diff] [blame] | 45 | %vec = load <2 x double>, <2 x double> addrspace(1)* %vaddr |
| 46 | %add = fsub <2 x double> %vec, %b |
| 47 | store <2 x double> %add, <2 x double> addrspace(1)* %out |
| 48 | ret void |
| 49 | } |
| 50 | |
| 51 | ; ALL: 'fsub_v3f64' |
| 52 | ; FASTF64: estimated cost of 6 for {{.*}} fsub <3 x double> |
| 53 | ; SLOWF64: estimated cost of 9 for {{.*}} fsub <3 x double> |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 54 | define amdgpu_kernel void @fsub_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(1)* %vaddr, <3 x double> %b) #0 { |
Matt Arsenault | 9651813 | 2016-03-25 01:00:32 +0000 | [diff] [blame] | 55 | %vec = load <3 x double>, <3 x double> addrspace(1)* %vaddr |
| 56 | %add = fsub <3 x double> %vec, %b |
| 57 | store <3 x double> %add, <3 x double> addrspace(1)* %out |
| 58 | ret void |
| 59 | } |
| 60 | |
| 61 | ; ALL: 'fsub_f16' |
| 62 | ; ALL: estimated cost of 1 for {{.*}} fsub half |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 63 | define amdgpu_kernel void @fsub_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr, half %b) #0 { |
Matt Arsenault | 9651813 | 2016-03-25 01:00:32 +0000 | [diff] [blame] | 64 | %vec = load half, half addrspace(1)* %vaddr |
| 65 | %add = fsub half %vec, %b |
| 66 | store half %add, half addrspace(1)* %out |
| 67 | ret void |
| 68 | } |
| 69 | |
| 70 | ; ALL: 'fsub_v2f16' |
| 71 | ; ALL: estimated cost of 2 for {{.*}} fsub <2 x half> |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 72 | define amdgpu_kernel void @fsub_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr, <2 x half> %b) #0 { |
Matt Arsenault | 9651813 | 2016-03-25 01:00:32 +0000 | [diff] [blame] | 73 | %vec = load <2 x half>, <2 x half> addrspace(1)* %vaddr |
| 74 | %add = fsub <2 x half> %vec, %b |
| 75 | store <2 x half> %add, <2 x half> addrspace(1)* %out |
| 76 | ret void |
| 77 | } |
| 78 | |
| 79 | ; ALL: 'fsub_v4f16' |
| 80 | ; ALL: estimated cost of 4 for {{.*}} fsub <4 x half> |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 81 | define amdgpu_kernel void @fsub_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %vaddr, <4 x half> %b) #0 { |
Matt Arsenault | 9651813 | 2016-03-25 01:00:32 +0000 | [diff] [blame] | 82 | %vec = load <4 x half>, <4 x half> addrspace(1)* %vaddr |
| 83 | %add = fsub <4 x half> %vec, %b |
| 84 | store <4 x half> %add, <4 x half> addrspace(1)* %out |
| 85 | ret void |
| 86 | } |