Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 1 | ; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906 |
| 2 | |
Konstantin Zhuravlyov | bb30ef7 | 2018-08-01 01:31:30 +0000 | [diff] [blame] | 3 | declare i32 @llvm.amdgcn.sdot4(i32 %a, i32 %b, i32 %c, i1 %clamp) |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 4 | |
Konstantin Zhuravlyov | bb30ef7 | 2018-08-01 01:31:30 +0000 | [diff] [blame] | 5 | ; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot4_clamp |
| 6 | ; GFX906: v_dot4_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} |
| 7 | define amdgpu_kernel void @test_llvm_amdgcn_sdot4_clamp( |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 8 | i32 addrspace(1)* %r, |
| 9 | <4 x i8> addrspace(1)* %a, |
| 10 | <4 x i8> addrspace(1)* %b, |
| 11 | i32 addrspace(1)* %c) { |
| 12 | entry: |
| 13 | %a.val = load <4 x i8>, <4 x i8> addrspace(1)* %a |
| 14 | %b.val = load <4 x i8>, <4 x i8> addrspace(1)* %b |
| 15 | %a.val.cast = bitcast <4 x i8> %a.val to i32 |
| 16 | %b.val.cast = bitcast <4 x i8> %b.val to i32 |
| 17 | %c.val = load i32, i32 addrspace(1)* %c |
Konstantin Zhuravlyov | bb30ef7 | 2018-08-01 01:31:30 +0000 | [diff] [blame] | 18 | %r.val = call i32 @llvm.amdgcn.sdot4(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val, i1 1) |
| 19 | store i32 %r.val, i32 addrspace(1)* %r |
| 20 | ret void |
| 21 | } |
| 22 | |
| 23 | ; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot4_no_clamp |
| 24 | ; GFX906: v_dot4_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} |
| 25 | define amdgpu_kernel void @test_llvm_amdgcn_sdot4_no_clamp( |
| 26 | i32 addrspace(1)* %r, |
| 27 | <4 x i8> addrspace(1)* %a, |
| 28 | <4 x i8> addrspace(1)* %b, |
| 29 | i32 addrspace(1)* %c) { |
| 30 | entry: |
| 31 | %a.val = load <4 x i8>, <4 x i8> addrspace(1)* %a |
| 32 | %b.val = load <4 x i8>, <4 x i8> addrspace(1)* %b |
| 33 | %a.val.cast = bitcast <4 x i8> %a.val to i32 |
| 34 | %b.val.cast = bitcast <4 x i8> %b.val to i32 |
| 35 | %c.val = load i32, i32 addrspace(1)* %c |
| 36 | %r.val = call i32 @llvm.amdgcn.sdot4(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val, i1 0) |
Matt Arsenault | 0084adc | 2018-04-30 19:08:16 +0000 | [diff] [blame] | 37 | store i32 %r.val, i32 addrspace(1)* %r |
| 38 | ret void |
| 39 | } |