Matt Arsenault | 9ab1fa6 | 2017-10-04 22:59:12 +0000 | [diff] [blame] | 1 | ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-s-branch-bits=5 < %s | FileCheck -check-prefix=GCN %s |
| 2 | |
| 3 | ; Restrict maximum branch to between +15 and -16 dwords |
| 4 | |
| 5 | ; Instructions inside a bundle were collectively counted as |
| 6 | ; 0-bytes. Make sure this is accounted for when estimating branch |
| 7 | ; distances |
| 8 | |
| 9 | ; Bundle used for address in call sequence: 20 bytes |
| 10 | ; s_getpc_b64 |
| 11 | ; s_add_u32 |
| 12 | ; s_addc_u32 |
| 13 | |
| 14 | ; plus additional overhead |
| 15 | ; s_setpc_b64 |
| 16 | ; and some register copies |
| 17 | |
| 18 | declare void @func() #0 |
| 19 | |
| 20 | ; GCN-LABEL: {{^}}bundle_size: |
| 21 | ; GCN: s_cbranch_scc0 [[BB_EXPANSION:BB[0-9]+_[0-9]+]] |
| 22 | ; GCN: s_getpc_b64 |
| 23 | ; GCN-NEXT: s_add_u32 |
| 24 | ; GCN-NEXT: s_addc_u32 |
| 25 | ; GCN-NEXT: s_setpc_b64 |
| 26 | |
| 27 | ; GCN: {{^}}[[BB_EXPANSION]]: |
| 28 | ; GCN: s_getpc_b64 |
| 29 | ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, func@ |
| 30 | ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, func@ |
| 31 | ; GCN: s_swappc_b64 |
| 32 | define amdgpu_kernel void @bundle_size(i32 addrspace(1)* %arg, i32 %cnd) #0 { |
| 33 | bb: |
| 34 | %cmp = icmp eq i32 %cnd, 0 |
| 35 | br i1 %cmp, label %bb3, label %bb2 ; +8 dword branch |
| 36 | |
| 37 | bb2: |
| 38 | call void @func() |
| 39 | call void asm sideeffect |
| 40 | "v_nop_e64 |
| 41 | v_nop_e64 |
| 42 | v_nop_e64 |
| 43 | v_nop_e64 |
| 44 | v_nop_e64", ""() #0 |
| 45 | br label %bb3 |
| 46 | |
| 47 | bb3: |
| 48 | store volatile i32 %cnd, i32 addrspace(1)* %arg |
| 49 | ret void |
| 50 | } |
| 51 | |
| 52 | attributes #0 = { nounwind } |
| 53 | attributes #1 = { nounwind readnone } |