Daniil Fukalov | d5fca55 | 2018-01-17 14:05:05 +0000 | [diff] [blame] | 1 | ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s |
| 2 | ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s |
| 3 | |
Daniil Fukalov | 6e1dc68 | 2018-01-26 11:09:38 +0000 | [diff] [blame] | 4 | declare float @llvm.amdgcn.ds.fadd(float addrspace(3)* nocapture, float, i32, i32, i1) |
| 5 | declare float @llvm.amdgcn.ds.fmin(float addrspace(3)* nocapture, float, i32, i32, i1) |
| 6 | declare float @llvm.amdgcn.ds.fmax(float addrspace(3)* nocapture, float, i32, i32, i1) |
Daniil Fukalov | d5fca55 | 2018-01-17 14:05:05 +0000 | [diff] [blame] | 7 | |
Daniil Fukalov | 6e1dc68 | 2018-01-26 11:09:38 +0000 | [diff] [blame] | 8 | ; GCN-LABEL: {{^}}lds_ds_fadd: |
Daniil Fukalov | d5fca55 | 2018-01-17 14:05:05 +0000 | [diff] [blame] | 9 | ; VI-DAG: s_mov_b32 m0 |
| 10 | ; GFX9-NOT: m0 |
| 11 | ; GCN-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000 |
| 12 | ; GCN: ds_add_rtn_f32 [[V2:v[0-9]+]], [[V1:v[0-9]+]], [[V0]] offset:32 |
| 13 | ; GCN: ds_add_f32 [[V3:v[0-9]+]], [[V0]] offset:64 |
| 14 | ; GCN: s_waitcnt lgkmcnt(1) |
| 15 | ; GCN: ds_add_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]] |
Daniil Fukalov | 6e1dc68 | 2018-01-26 11:09:38 +0000 | [diff] [blame] | 16 | define amdgpu_kernel void @lds_ds_fadd(float addrspace(1)* %out, float addrspace(3)* %ptrf, i32 %idx) { |
Daniil Fukalov | d5fca55 | 2018-01-17 14:05:05 +0000 | [diff] [blame] | 17 | %idx.add = add nuw i32 %idx, 4 |
| 18 | %shl0 = shl i32 %idx.add, 3 |
| 19 | %shl1 = shl i32 %idx.add, 4 |
| 20 | %ptr0 = inttoptr i32 %shl0 to float addrspace(3)* |
| 21 | %ptr1 = inttoptr i32 %shl1 to float addrspace(3)* |
Daniil Fukalov | 6e1dc68 | 2018-01-26 11:09:38 +0000 | [diff] [blame] | 22 | %a1 = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %ptr0, float 4.2e+1, i32 0, i32 0, i1 false) |
| 23 | %a2 = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %ptr1, float 4.2e+1, i32 0, i32 0, i1 false) |
| 24 | %a3 = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %ptrf, float %a1, i32 0, i32 0, i1 false) |
Daniil Fukalov | d5fca55 | 2018-01-17 14:05:05 +0000 | [diff] [blame] | 25 | store float %a3, float addrspace(1)* %out |
| 26 | ret void |
| 27 | } |
| 28 | |
Daniil Fukalov | 6e1dc68 | 2018-01-26 11:09:38 +0000 | [diff] [blame] | 29 | ; GCN-LABEL: {{^}}lds_ds_fmin: |
Daniil Fukalov | d5fca55 | 2018-01-17 14:05:05 +0000 | [diff] [blame] | 30 | ; VI-DAG: s_mov_b32 m0 |
| 31 | ; GFX9-NOT: m0 |
| 32 | ; GCN-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000 |
| 33 | ; GCN: ds_min_rtn_f32 [[V2:v[0-9]+]], [[V1:v[0-9]+]], [[V0]] offset:32 |
| 34 | ; GCN: ds_min_f32 [[V3:v[0-9]+]], [[V0]] offset:64 |
| 35 | ; GCN: s_waitcnt lgkmcnt(1) |
| 36 | ; GCN: ds_min_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]] |
Daniil Fukalov | 6e1dc68 | 2018-01-26 11:09:38 +0000 | [diff] [blame] | 37 | define amdgpu_kernel void @lds_ds_fmin(float addrspace(1)* %out, float addrspace(3)* %ptrf, i32 %idx) { |
Daniil Fukalov | d5fca55 | 2018-01-17 14:05:05 +0000 | [diff] [blame] | 38 | %idx.add = add nuw i32 %idx, 4 |
| 39 | %shl0 = shl i32 %idx.add, 3 |
| 40 | %shl1 = shl i32 %idx.add, 4 |
| 41 | %ptr0 = inttoptr i32 %shl0 to float addrspace(3)* |
| 42 | %ptr1 = inttoptr i32 %shl1 to float addrspace(3)* |
Daniil Fukalov | 6e1dc68 | 2018-01-26 11:09:38 +0000 | [diff] [blame] | 43 | %a1 = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptr0, float 4.2e+1, i32 0, i32 0, i1 false) |
| 44 | %a2 = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptr1, float 4.2e+1, i32 0, i32 0, i1 false) |
| 45 | %a3 = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptrf, float %a1, i32 0, i32 0, i1 false) |
Daniil Fukalov | d5fca55 | 2018-01-17 14:05:05 +0000 | [diff] [blame] | 46 | store float %a3, float addrspace(1)* %out |
| 47 | ret void |
| 48 | } |
| 49 | |
Daniil Fukalov | 6e1dc68 | 2018-01-26 11:09:38 +0000 | [diff] [blame] | 50 | ; GCN-LABEL: {{^}}lds_ds_fmax: |
Daniil Fukalov | d5fca55 | 2018-01-17 14:05:05 +0000 | [diff] [blame] | 51 | ; VI-DAG: s_mov_b32 m0 |
| 52 | ; GFX9-NOT: m0 |
| 53 | ; GCN-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000 |
| 54 | ; GCN: ds_max_rtn_f32 [[V2:v[0-9]+]], [[V1:v[0-9]+]], [[V0]] offset:32 |
| 55 | ; GCN: ds_max_f32 [[V3:v[0-9]+]], [[V0]] offset:64 |
| 56 | ; GCN: s_waitcnt lgkmcnt(1) |
| 57 | ; GCN: ds_max_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]] |
Daniil Fukalov | 6e1dc68 | 2018-01-26 11:09:38 +0000 | [diff] [blame] | 58 | define amdgpu_kernel void @lds_ds_fmax(float addrspace(1)* %out, float addrspace(3)* %ptrf, i32 %idx) { |
Daniil Fukalov | d5fca55 | 2018-01-17 14:05:05 +0000 | [diff] [blame] | 59 | %idx.add = add nuw i32 %idx, 4 |
| 60 | %shl0 = shl i32 %idx.add, 3 |
| 61 | %shl1 = shl i32 %idx.add, 4 |
| 62 | %ptr0 = inttoptr i32 %shl0 to float addrspace(3)* |
| 63 | %ptr1 = inttoptr i32 %shl1 to float addrspace(3)* |
Daniil Fukalov | 6e1dc68 | 2018-01-26 11:09:38 +0000 | [diff] [blame] | 64 | %a1 = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr0, float 4.2e+1, i32 0, i32 0, i1 false) |
| 65 | %a2 = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr1, float 4.2e+1, i32 0, i32 0, i1 false) |
| 66 | %a3 = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptrf, float %a1, i32 0, i32 0, i1 false) |
Daniil Fukalov | d5fca55 | 2018-01-17 14:05:05 +0000 | [diff] [blame] | 67 | store float %a3, float addrspace(1)* %out |
| 68 | ret void |
| 69 | } |