Matt Arsenault | 7aad8fd | 2017-01-24 22:02:15 +0000 | [diff] [blame] | 1 | ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI -check-prefix=VI-OPT %s |
| 2 | ; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOOPT %s |
Tom Stellard | 4409051 | 2016-02-13 02:09:49 +0000 | [diff] [blame] | 3 | |
Tom Stellard | 331f981 | 2016-03-14 17:05:56 +0000 | [diff] [blame] | 4 | ; FIXME: The register allocator / scheduler should be able to avoid these hazards. |
| 5 | |
Tom Stellard | 4409051 | 2016-02-13 02:09:49 +0000 | [diff] [blame] | 6 | ; VI-LABEL: {{^}}dpp_test: |
Tom Stellard | 331f981 | 2016-03-14 17:05:56 +0000 | [diff] [blame] | 7 | ; VI: v_mov_b32_e32 v0, s{{[0-9]+}} |
Connor Abbott | 79f3ade | 2017-08-07 19:10:56 +0000 | [diff] [blame] | 8 | ; VI-NOOPT: v_mov_b32_e32 v1, s{{[0-9]+}} |
Mark Searles | 72da47d | 2018-07-16 10:02:41 +0000 | [diff] [blame] | 9 | ; VI-OPT: s_nop 1 |
| 10 | ; VI-NOOPT: s_nop 0 |
| 11 | ; VI-NOOPT: s_nop 0 |
Connor Abbott | 79f3ade | 2017-08-07 19:10:56 +0000 | [diff] [blame] | 12 | ; VI-OPT: v_mov_b32_dpp v0, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0x11] |
| 13 | ; VI-NOOPT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x01,0x08,0x11] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 14 | define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in) { |
Sam Kolton | dfa29f7 | 2016-03-09 12:29:31 +0000 | [diff] [blame] | 15 | %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in, i32 1, i32 1, i32 1, i1 1) #0 |
Tom Stellard | 4409051 | 2016-02-13 02:09:49 +0000 | [diff] [blame] | 16 | store i32 %tmp0, i32 addrspace(1)* %out |
| 17 | ret void |
| 18 | } |
| 19 | |
Tom Stellard | 331f981 | 2016-03-14 17:05:56 +0000 | [diff] [blame] | 20 | ; VI-LABEL: {{^}}dpp_wait_states: |
Connor Abbott | 79f3ade | 2017-08-07 19:10:56 +0000 | [diff] [blame] | 21 | ; VI-NOOPT: v_mov_b32_e32 [[VGPR1:v[0-9]+]], s{{[0-9]+}} |
Tom Stellard | 331f981 | 2016-03-14 17:05:56 +0000 | [diff] [blame] | 22 | ; VI: v_mov_b32_e32 [[VGPR0:v[0-9]+]], s{{[0-9]+}} |
Mark Searles | 72da47d | 2018-07-16 10:02:41 +0000 | [diff] [blame] | 23 | ; VI-OPT: s_nop 1 |
| 24 | ; VI-NOOPT: s_nop 0 |
| 25 | ; VI-NOOPT: s_nop 0 |
Connor Abbott | 79f3ade | 2017-08-07 19:10:56 +0000 | [diff] [blame] | 26 | ; VI-OPT: v_mov_b32_dpp [[VGPR0]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 |
Mark Searles | 72da47d | 2018-07-16 10:02:41 +0000 | [diff] [blame] | 27 | ; VI-NOOPT: v_mov_b32_dpp [[VGPR1]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl: |
| 28 | ; VI-OPT: s_nop 1 |
| 29 | ; VI-NOOPT: s_nop 0 |
| 30 | ; VI-NOOPT: s_nop 0 |
Connor Abbott | 79f3ade | 2017-08-07 19:10:56 +0000 | [diff] [blame] | 31 | ; VI-OPT: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 |
| 32 | ; VI-NOOPT: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR1]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 33 | define amdgpu_kernel void @dpp_wait_states(i32 addrspace(1)* %out, i32 %in) { |
Tom Stellard | 331f981 | 2016-03-14 17:05:56 +0000 | [diff] [blame] | 34 | %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in, i32 1, i32 1, i32 1, i1 1) #0 |
| 35 | %tmp1 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %tmp0, i32 1, i32 1, i32 1, i1 1) #0 |
| 36 | store i32 %tmp1, i32 addrspace(1)* %out |
| 37 | ret void |
| 38 | } |
| 39 | |
| 40 | ; VI-LABEL: {{^}}dpp_first_in_bb: |
Tom Stellard | a27007e | 2016-05-02 16:23:09 +0000 | [diff] [blame] | 41 | ; VI: ; %endif |
| 42 | ; VI-OPT: s_mov_b32 |
| 43 | ; VI-OPT: s_mov_b32 |
Mark Searles | 72da47d | 2018-07-16 10:02:41 +0000 | [diff] [blame] | 44 | ; VI-NOOPT: s_waitcnt |
| 45 | ; VI-NOOPT-NEXT: s_nop 0 |
Sam Kolton | a74cd52 | 2016-03-18 15:35:51 +0000 | [diff] [blame] | 46 | ; VI: v_mov_b32_dpp [[VGPR0:v[0-9]+]], v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 |
Mark Searles | 72da47d | 2018-07-16 10:02:41 +0000 | [diff] [blame] | 47 | ; VI-OPT: s_nop 1 |
Sam Kolton | a74cd52 | 2016-03-18 15:35:51 +0000 | [diff] [blame] | 48 | ; VI: v_mov_b32_dpp [[VGPR1:v[0-9]+]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 |
Mark Searles | 72da47d | 2018-07-16 10:02:41 +0000 | [diff] [blame] | 49 | ; VI-OPT: s_nop 1 |
| 50 | ; VI-NOOPT: s_nop 0 |
| 51 | ; VI-NOOPT: s_nop 0 |
Sam Kolton | a74cd52 | 2016-03-18 15:35:51 +0000 | [diff] [blame] | 52 | ; VI: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR1]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 53 | define amdgpu_kernel void @dpp_first_in_bb(float addrspace(1)* %out, float addrspace(1)* %in, float %cond, float %a, float %b) { |
Tom Stellard | 331f981 | 2016-03-14 17:05:56 +0000 | [diff] [blame] | 54 | %cmp = fcmp oeq float %cond, 0.0 |
| 55 | br i1 %cmp, label %if, label %else |
| 56 | |
| 57 | if: |
| 58 | %out_val = load float, float addrspace(1)* %out |
| 59 | %if_val = fadd float %a, %out_val |
| 60 | br label %endif |
| 61 | |
| 62 | else: |
| 63 | %in_val = load float, float addrspace(1)* %in |
| 64 | %else_val = fadd float %b, %in_val |
| 65 | br label %endif |
| 66 | |
| 67 | endif: |
| 68 | %val = phi float [%if_val, %if], [%else_val, %else] |
| 69 | %val_i32 = bitcast float %val to i32 |
| 70 | %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %val_i32, i32 1, i32 1, i32 1, i1 1) #0 |
| 71 | %tmp1 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %tmp0, i32 1, i32 1, i32 1, i1 1) #0 |
| 72 | %tmp2 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %tmp1, i32 1, i32 1, i32 1, i1 1) #0 |
| 73 | %tmp_float = bitcast i32 %tmp2 to float |
| 74 | store float %tmp_float, float addrspace(1)* %out |
| 75 | ret void |
| 76 | } |
| 77 | |
Sam Kolton | dfa29f7 | 2016-03-09 12:29:31 +0000 | [diff] [blame] | 78 | declare i32 @llvm.amdgcn.mov.dpp.i32(i32, i32, i32, i32, i1) #0 |
Tom Stellard | 4409051 | 2016-02-13 02:09:49 +0000 | [diff] [blame] | 79 | |
| 80 | attributes #0 = { nounwind readnone convergent } |
Connor Abbott | 82267a5 | 2017-08-03 20:22:30 +0000 | [diff] [blame] | 81 | |