Matt Arsenault | 70b9282 | 2017-11-12 23:53:44 +0000 | [diff] [blame] | 1 | ; RUN: llc -O0 -mtriple=amdgcn--amdhsa -march=amdgcn -amdgpu-spill-sgpr-to-vgpr=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=VMEM -check-prefix=GCN %s |
| 2 | ; RUN: llc -O0 -mtriple=amdgcn--amdhsa -march=amdgcn -amdgpu-spill-sgpr-to-vgpr=1 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=VGPR -check-prefix=GCN %s |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 3 | |
| 4 | ; Verify registers used for tracking exec mask changes when all |
| 5 | ; registers are spilled at the end of the block. The SGPR spill |
| 6 | ; placement relative to the exec modifications are important. |
| 7 | |
| 8 | ; FIXME: This checks with SGPR to VGPR spilling disabled, but this may |
| 9 | ; not work correctly in cases where no workitems take a branch. |
| 10 | |
| 11 | |
| 12 | ; GCN-LABEL: {{^}}divergent_if_endif: |
Matt Arsenault | e0bf7d0 | 2017-02-21 19:12:08 +0000 | [diff] [blame] | 13 | ; VGPR: workitem_private_segment_byte_size = 12{{$}} |
| 14 | |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 15 | |
Francis Visoiu Mistrih | 25528d6 | 2017-12-04 17:18:51 +0000 | [diff] [blame] | 16 | ; GCN: {{^}}; %bb.0: |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 17 | ; GCN: s_mov_b32 m0, -1 |
| 18 | ; GCN: ds_read_b32 [[LOAD0:v[0-9]+]] |
| 19 | |
Matt Arsenault | 3d46319 | 2016-11-01 22:55:07 +0000 | [diff] [blame] | 20 | ; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], s{{[0-9]+}}, v0 |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 21 | ; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec |
| 22 | ; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, [[CMP0]] |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 23 | |
| 24 | ; Spill saved exec |
| 25 | ; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]] |
| 26 | ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]] |
| 27 | |
| 28 | |
| 29 | ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_LO:[0-9]+]], s[[SAVEEXEC_LO]] |
Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 30 | ; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:4 ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 31 | ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_HI:[0-9]+]], s[[SAVEEXEC_HI]] |
Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 32 | ; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:8 ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 33 | |
| 34 | ; Spill load |
Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 35 | ; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill |
Matt Arsenault | e0bf7d0 | 2017-02-21 19:12:08 +0000 | [diff] [blame] | 36 | |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 37 | ; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}} |
| 38 | |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 39 | ; GCN: mask branch [[ENDIF:BB[0-9]+_[0-9]+]] |
| 40 | |
| 41 | ; GCN: {{^}}BB{{[0-9]+}}_1: ; %if |
| 42 | ; GCN: s_mov_b32 m0, -1 |
| 43 | ; GCN: ds_read_b32 [[LOAD1:v[0-9]+]] |
Mark Searles | 70359ac | 2017-06-02 14:19:25 +0000 | [diff] [blame] | 44 | ; GCN: s_waitcnt lgkmcnt(0) |
Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 45 | ; GCN: buffer_load_dword [[RELOAD_LOAD0:v[0-9]+]], off, s[0:3], s7 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 46 | |
| 47 | ; Spill val register |
| 48 | ; GCN: v_add_i32_e32 [[VAL:v[0-9]+]], vcc, [[LOAD1]], [[RELOAD_LOAD0]] |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 49 | ; GCN: buffer_store_dword [[VAL]], off, s[0:3], s7 offset:[[VAL_OFFSET:[0-9]+]] ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 50 | |
| 51 | ; VMEM: [[ENDIF]]: |
| 52 | ; Reload and restore exec mask |
Mark Searles | 70359ac | 2017-06-02 14:19:25 +0000 | [diff] [blame] | 53 | ; VGPR: s_waitcnt lgkmcnt(0) |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 54 | ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] |
| 55 | ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] |
| 56 | |
| 57 | |
| 58 | |
Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 59 | ; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 60 | ; VMEM: s_waitcnt vmcnt(0) |
| 61 | ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC_LO]] |
| 62 | |
Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 63 | ; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:8 ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 64 | ; VMEM: s_waitcnt vmcnt(0) |
| 65 | ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC_HI]] |
| 66 | |
| 67 | ; GCN: s_or_b64 exec, exec, s{{\[}}[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]{{\]}} |
| 68 | |
| 69 | ; Restore val |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 70 | ; GCN: buffer_load_dword [[RELOAD_VAL:v[0-9]+]], off, s[0:3], s7 offset:[[VAL_OFFSET]] ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 71 | |
| 72 | ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RELOAD_VAL]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 73 | define amdgpu_kernel void @divergent_if_endif(i32 addrspace(1)* %out) #0 { |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 74 | entry: |
| 75 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
| 76 | %load0 = load volatile i32, i32 addrspace(3)* undef |
| 77 | %cmp0 = icmp eq i32 %tid, 0 |
| 78 | br i1 %cmp0, label %if, label %endif |
| 79 | |
| 80 | if: |
| 81 | %load1 = load volatile i32, i32 addrspace(3)* undef |
| 82 | %val = add i32 %load0, %load1 |
| 83 | br label %endif |
| 84 | |
| 85 | endif: |
| 86 | %tmp4 = phi i32 [ %val, %if ], [ 0, %entry ] |
| 87 | store i32 %tmp4, i32 addrspace(1)* %out |
| 88 | ret void |
| 89 | } |
| 90 | |
| 91 | ; GCN-LABEL: {{^}}divergent_loop: |
Alexander Timofeev | c1425c9d | 2017-12-01 11:56:34 +0000 | [diff] [blame] | 92 | ; VGPR: workitem_private_segment_byte_size = 12{{$}} |
Matt Arsenault | e0bf7d0 | 2017-02-21 19:12:08 +0000 | [diff] [blame] | 93 | |
Francis Visoiu Mistrih | 25528d6 | 2017-12-04 17:18:51 +0000 | [diff] [blame] | 94 | ; GCN: {{^}}; %bb.0: |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 95 | |
| 96 | ; GCN: s_mov_b32 m0, -1 |
| 97 | ; GCN: ds_read_b32 [[LOAD0:v[0-9]+]] |
| 98 | |
Matt Arsenault | 3d46319 | 2016-11-01 22:55:07 +0000 | [diff] [blame] | 99 | ; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], s{{[0-9]+}}, v0 |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 100 | |
| 101 | ; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec |
| 102 | ; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]] |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 103 | |
Matt Arsenault | 3d46319 | 2016-11-01 22:55:07 +0000 | [diff] [blame] | 104 | ; Spill load |
Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 105 | ; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:4 ; 4-byte Folded Spill |
Matt Arsenault | 3d46319 | 2016-11-01 22:55:07 +0000 | [diff] [blame] | 106 | |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 107 | ; Spill saved exec |
| 108 | ; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]] |
| 109 | ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]] |
| 110 | |
| 111 | |
| 112 | ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_LO:[0-9]+]], s[[SAVEEXEC_LO]] |
Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 113 | ; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:20 ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 114 | ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_HI:[0-9]+]], s[[SAVEEXEC_HI]] |
Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 115 | ; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:24 ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 116 | |
| 117 | ; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}} |
| 118 | |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 119 | ; GCN-NEXT: ; mask branch [[END:BB[0-9]+_[0-9]+]] |
| 120 | ; GCN-NEXT: s_cbranch_execz [[END]] |
| 121 | |
| 122 | |
| 123 | ; GCN: [[LOOP:BB[0-9]+_[0-9]+]]: |
Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 124 | ; GCN: buffer_load_dword v[[VAL_LOOP_RELOAD:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 125 | ; GCN: v_subrev_i32_e32 [[VAL_LOOP:v[0-9]+]], vcc, v{{[0-9]+}}, v[[VAL_LOOP_RELOAD]] |
Alexander Timofeev | c1425c9d | 2017-12-01 11:56:34 +0000 | [diff] [blame] | 126 | ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, s{{[0-9]+}} |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 127 | ; GCN: buffer_store_dword [[VAL_LOOP]], off, s[0:3], s7 offset:[[VAL_SUB_OFFSET:[0-9]+]] ; 4-byte Folded Spill |
Alexander Timofeev | c1425c9d | 2017-12-01 11:56:34 +0000 | [diff] [blame] | 128 | ; GCN-NEXT: s_cbranch_scc1 [[LOOP]] |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 129 | |
| 130 | |
| 131 | ; GCN: [[END]]: |
| 132 | ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] |
| 133 | ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] |
| 134 | |
Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 135 | ; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:20 ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 136 | ; VMEM: s_waitcnt vmcnt(0) |
| 137 | ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC_LO]] |
| 138 | |
Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 139 | ; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:24 ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 140 | ; VMEM: s_waitcnt vmcnt(0) |
| 141 | ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC_HI]] |
| 142 | |
| 143 | ; GCN: s_or_b64 exec, exec, s{{\[}}[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]{{\]}} |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 144 | ; GCN: buffer_load_dword v[[VAL_END:[0-9]+]], off, s[0:3], s7 offset:[[VAL_SUB_OFFSET]] ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 145 | |
| 146 | ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[VAL_END]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 147 | define amdgpu_kernel void @divergent_loop(i32 addrspace(1)* %out) #0 { |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 148 | entry: |
| 149 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
| 150 | %load0 = load volatile i32, i32 addrspace(3)* undef |
| 151 | %cmp0 = icmp eq i32 %tid, 0 |
| 152 | br i1 %cmp0, label %loop, label %end |
| 153 | |
| 154 | loop: |
| 155 | %i = phi i32 [ %i.inc, %loop ], [ 0, %entry ] |
| 156 | %val = phi i32 [ %val.sub, %loop ], [ %load0, %entry ] |
| 157 | %load1 = load volatile i32, i32 addrspace(3)* undef |
| 158 | %i.inc = add i32 %i, 1 |
| 159 | %val.sub = sub i32 %val, %load1 |
| 160 | %cmp1 = icmp ne i32 %i, 256 |
| 161 | br i1 %cmp1, label %loop, label %end |
| 162 | |
| 163 | end: |
| 164 | %tmp4 = phi i32 [ %val.sub, %loop ], [ 0, %entry ] |
| 165 | store i32 %tmp4, i32 addrspace(1)* %out |
| 166 | ret void |
| 167 | } |
| 168 | |
| 169 | ; GCN-LABEL: {{^}}divergent_if_else_endif: |
Francis Visoiu Mistrih | 25528d6 | 2017-12-04 17:18:51 +0000 | [diff] [blame] | 170 | ; GCN: {{^}}; %bb.0: |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 171 | |
| 172 | ; GCN: s_mov_b32 m0, -1 |
Matt Arsenault | 70b9282 | 2017-11-12 23:53:44 +0000 | [diff] [blame] | 173 | ; GCN: ds_read_b32 [[LOAD0:v[0-9]+]] |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 174 | |
Matt Arsenault | 5d8eb25 | 2016-09-30 01:50:20 +0000 | [diff] [blame] | 175 | ; GCN: v_cmp_ne_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], v0, |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 176 | |
| 177 | ; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec |
| 178 | ; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]] |
| 179 | ; GCN: s_xor_b64 s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}} |
| 180 | |
| 181 | ; Spill load |
Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 182 | ; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:4 ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 183 | |
| 184 | ; Spill saved exec |
| 185 | ; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]] |
| 186 | ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]] |
| 187 | |
| 188 | ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_LO:[0-9]+]], s[[SAVEEXEC_LO]] |
Marek Olsak | 79c0587 | 2016-11-25 17:37:09 +0000 | [diff] [blame] | 189 | ; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:[[SAVEEXEC_LO_OFFSET:[0-9]+]] ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 190 | ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_HI:[0-9]+]], s[[SAVEEXEC_HI]] |
Marek Olsak | 79c0587 | 2016-11-25 17:37:09 +0000 | [diff] [blame] | 191 | ; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:[[SAVEEXEC_HI_OFFSET:[0-9]+]] ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 192 | |
| 193 | ; GCN: s_mov_b64 exec, [[CMP0]] |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 194 | |
| 195 | ; FIXME: It makes no sense to put this skip here |
| 196 | ; GCN-NEXT: ; mask branch [[FLOW:BB[0-9]+_[0-9]+]] |
| 197 | ; GCN: s_cbranch_execz [[FLOW]] |
| 198 | ; GCN-NEXT: s_branch [[ELSE:BB[0-9]+_[0-9]+]] |
| 199 | |
| 200 | ; GCN: [[FLOW]]: ; %Flow |
| 201 | ; VGPR: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] |
| 202 | ; VGPR: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] |
| 203 | |
| 204 | |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 205 | ; VMEM: buffer_load_dword v[[FLOW_V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:[[SAVEEXEC_LO_OFFSET]] |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 206 | ; VMEM: s_waitcnt vmcnt(0) |
| 207 | ; VMEM: v_readfirstlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[FLOW_V_RELOAD_SAVEEXEC_LO]] |
| 208 | |
Marek Olsak | 79c0587 | 2016-11-25 17:37:09 +0000 | [diff] [blame] | 209 | ; VMEM: buffer_load_dword v[[FLOW_V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:[[SAVEEXEC_HI_OFFSET]] ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 210 | ; VMEM: s_waitcnt vmcnt(0) |
| 211 | ; VMEM: v_readfirstlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[FLOW_V_RELOAD_SAVEEXEC_HI]] |
| 212 | |
| 213 | ; GCN: s_or_saveexec_b64 s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]{{\]}}, s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]{{\]}} |
| 214 | |
| 215 | ; Regular spill value restored after exec modification |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 216 | ; GCN: buffer_load_dword [[FLOW_VAL:v[0-9]+]], off, s[0:3], s7 offset:[[FLOW_VAL_OFFSET:[0-9]+]] ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 217 | |
| 218 | |
| 219 | ; Spill saved exec |
| 220 | ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[FLOW_S_RELOAD_SAVEEXEC_LO]], [[FLOW_SAVEEXEC_LO_LANE:[0-9]+]] |
| 221 | ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[FLOW_S_RELOAD_SAVEEXEC_HI]], [[FLOW_SAVEEXEC_HI_LANE:[0-9]+]] |
| 222 | |
| 223 | |
| 224 | ; VMEM: v_mov_b32_e32 v[[FLOW_V_SAVEEXEC_LO:[0-9]+]], s[[FLOW_S_RELOAD_SAVEEXEC_LO]] |
Marek Olsak | 79c0587 | 2016-11-25 17:37:09 +0000 | [diff] [blame] | 225 | ; VMEM: buffer_store_dword v[[FLOW_V_SAVEEXEC_LO]], off, s[0:3], s7 offset:[[FLOW_SAVEEXEC_LO_OFFSET:[0-9]+]] ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 226 | ; VMEM: v_mov_b32_e32 v[[FLOW_V_SAVEEXEC_HI:[0-9]+]], s[[FLOW_S_RELOAD_SAVEEXEC_HI]] |
Marek Olsak | 79c0587 | 2016-11-25 17:37:09 +0000 | [diff] [blame] | 227 | ; VMEM: buffer_store_dword v[[FLOW_V_SAVEEXEC_HI]], off, s[0:3], s7 offset:[[FLOW_SAVEEXEC_HI_OFFSET:[0-9]+]] ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 228 | |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 229 | ; GCN: buffer_store_dword [[FLOW_VAL]], off, s[0:3], s7 offset:[[RESULT_OFFSET:[0-9]+]] ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 230 | ; GCN: s_xor_b64 exec, exec, s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]{{\]}} |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 231 | ; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9]+_[0-9]+]] |
| 232 | ; GCN-NEXT: s_cbranch_execz [[ENDIF]] |
| 233 | |
| 234 | |
| 235 | ; GCN: BB{{[0-9]+}}_2: ; %if |
| 236 | ; GCN: ds_read_b32 |
Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 237 | ; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 238 | ; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, v{{[0-9]+}}, v[[LOAD0_RELOAD]] |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 239 | ; GCN: buffer_store_dword [[ADD]], off, s[0:3], s7 offset:[[RESULT_OFFSET]] ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 240 | ; GCN-NEXT: s_branch [[ENDIF:BB[0-9]+_[0-9]+]] |
| 241 | |
| 242 | ; GCN: [[ELSE]]: ; %else |
Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 243 | ; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 244 | ; GCN: v_subrev_i32_e32 [[SUB:v[0-9]+]], vcc, v{{[0-9]+}}, v[[LOAD0_RELOAD]] |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 245 | ; GCN: buffer_store_dword [[ADD]], off, s[0:3], s7 offset:[[FLOW_RESULT_OFFSET:[0-9]+]] ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 246 | ; GCN-NEXT: s_branch [[FLOW]] |
| 247 | |
| 248 | ; GCN: [[ENDIF]]: |
| 249 | ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[FLOW_SAVEEXEC_LO_LANE]] |
| 250 | ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[FLOW_SAVEEXEC_HI_LANE]] |
| 251 | |
| 252 | |
Marek Olsak | 79c0587 | 2016-11-25 17:37:09 +0000 | [diff] [blame] | 253 | ; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:[[FLOW_SAVEEXEC_LO_OFFSET]] ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 254 | ; VMEM: s_waitcnt vmcnt(0) |
| 255 | ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC_LO]] |
| 256 | |
Marek Olsak | 79c0587 | 2016-11-25 17:37:09 +0000 | [diff] [blame] | 257 | ; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:[[FLOW_SAVEEXEC_HI_OFFSET]] ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 258 | ; VMEM: s_waitcnt vmcnt(0) |
| 259 | ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC_HI]] |
| 260 | |
| 261 | ; GCN: s_or_b64 exec, exec, s{{\[}}[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]{{\]}} |
| 262 | |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 263 | ; GCN: buffer_load_dword v[[RESULT:[0-9]+]], off, s[0:3], s7 offset:[[RESULT_OFFSET]] ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 264 | ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RESULT]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 265 | define amdgpu_kernel void @divergent_if_else_endif(i32 addrspace(1)* %out) #0 { |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 266 | entry: |
| 267 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
| 268 | %load0 = load volatile i32, i32 addrspace(3)* undef |
| 269 | %cmp0 = icmp eq i32 %tid, 0 |
| 270 | br i1 %cmp0, label %if, label %else |
| 271 | |
| 272 | if: |
| 273 | %load1 = load volatile i32, i32 addrspace(3)* undef |
| 274 | %val0 = add i32 %load0, %load1 |
| 275 | br label %endif |
| 276 | |
| 277 | else: |
| 278 | %load2 = load volatile i32, i32 addrspace(3)* undef |
| 279 | %val1 = sub i32 %load0, %load2 |
| 280 | br label %endif |
| 281 | |
| 282 | endif: |
| 283 | %result = phi i32 [ %val0, %if ], [ %val1, %else ] |
| 284 | store i32 %result, i32 addrspace(1)* %out |
| 285 | ret void |
| 286 | } |
| 287 | |
| 288 | declare i32 @llvm.amdgcn.workitem.id.x() #1 |
| 289 | |
| 290 | attributes #0 = { nounwind } |
| 291 | attributes #1 = { nounwind readnone } |