Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 1 | ; RUN: llc -O0 -mtriple=amdgcn--amdhsa -march=amdgcn -amdgpu-spill-sgpr-to-vgpr=0 -verify-machineinstrs < %s | FileCheck -check-prefix=VMEM -check-prefix=GCN %s |
| 2 | ; RUN: llc -O0 -mtriple=amdgcn--amdhsa -march=amdgcn -amdgpu-spill-sgpr-to-vgpr=1 -verify-machineinstrs < %s | FileCheck -check-prefix=VGPR -check-prefix=GCN %s |
| 3 | |
| 4 | ; Verify registers used for tracking exec mask changes when all |
| 5 | ; registers are spilled at the end of the block. The SGPR spill |
| 6 | ; placement relative to the exec modifications are important. |
| 7 | |
| 8 | ; FIXME: This checks with SGPR to VGPR spilling disabled, but this may |
| 9 | ; not work correctly in cases where no workitems take a branch. |
| 10 | |
| 11 | |
| 12 | ; GCN-LABEL: {{^}}divergent_if_endif: |
| 13 | |
| 14 | ; GCN: {{^}}; BB#0: |
| 15 | ; GCN: s_mov_b32 m0, -1 |
| 16 | ; GCN: ds_read_b32 [[LOAD0:v[0-9]+]] |
| 17 | |
Matt Arsenault | 3d46319 | 2016-11-01 22:55:07 +0000 | [diff] [blame^] | 18 | ; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], s{{[0-9]+}}, v0 |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 19 | ; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec |
| 20 | ; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, [[CMP0]] |
| 21 | ; GCN: s_xor_b64 s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}} |
| 22 | |
| 23 | ; Spill saved exec |
| 24 | ; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]] |
| 25 | ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]] |
| 26 | |
| 27 | |
| 28 | ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_LO:[0-9]+]], s[[SAVEEXEC_LO]] |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 29 | ; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 ; 8-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 30 | ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_HI:[0-9]+]], s[[SAVEEXEC_HI]] |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 31 | ; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:4 ; 8-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 32 | |
| 33 | ; Spill load |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 34 | ; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 35 | ; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}} |
| 36 | |
| 37 | ; GCN: s_waitcnt vmcnt(0) expcnt(0) |
| 38 | ; GCN: mask branch [[ENDIF:BB[0-9]+_[0-9]+]] |
| 39 | |
| 40 | ; GCN: {{^}}BB{{[0-9]+}}_1: ; %if |
| 41 | ; GCN: s_mov_b32 m0, -1 |
| 42 | ; GCN: ds_read_b32 [[LOAD1:v[0-9]+]] |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 43 | ; GCN: buffer_load_dword [[RELOAD_LOAD0:v[0-9]+]], off, s[0:3], s7 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 44 | ; GCN: s_waitcnt vmcnt(0) |
| 45 | |
| 46 | ; Spill val register |
| 47 | ; GCN: v_add_i32_e32 [[VAL:v[0-9]+]], vcc, [[LOAD1]], [[RELOAD_LOAD0]] |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 48 | ; GCN: buffer_store_dword [[VAL]], off, s[0:3], s7 offset:[[VAL_OFFSET:[0-9]+]] ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 49 | ; GCN: s_waitcnt vmcnt(0) |
| 50 | |
| 51 | ; VMEM: [[ENDIF]]: |
| 52 | ; Reload and restore exec mask |
| 53 | ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] |
| 54 | ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] |
| 55 | |
| 56 | |
| 57 | |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 58 | ; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 ; 8-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 59 | ; VMEM: s_waitcnt vmcnt(0) |
| 60 | ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC_LO]] |
| 61 | |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 62 | ; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:4 ; 8-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 63 | ; VMEM: s_waitcnt vmcnt(0) |
| 64 | ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC_HI]] |
| 65 | |
| 66 | ; GCN: s_or_b64 exec, exec, s{{\[}}[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]{{\]}} |
| 67 | |
| 68 | ; Restore val |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 69 | ; GCN: buffer_load_dword [[RELOAD_VAL:v[0-9]+]], off, s[0:3], s7 offset:[[VAL_OFFSET]] ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 70 | |
| 71 | ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RELOAD_VAL]] |
| 72 | define void @divergent_if_endif(i32 addrspace(1)* %out) #0 { |
| 73 | entry: |
| 74 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
| 75 | %load0 = load volatile i32, i32 addrspace(3)* undef |
| 76 | %cmp0 = icmp eq i32 %tid, 0 |
| 77 | br i1 %cmp0, label %if, label %endif |
| 78 | |
| 79 | if: |
| 80 | %load1 = load volatile i32, i32 addrspace(3)* undef |
| 81 | %val = add i32 %load0, %load1 |
| 82 | br label %endif |
| 83 | |
| 84 | endif: |
| 85 | %tmp4 = phi i32 [ %val, %if ], [ 0, %entry ] |
| 86 | store i32 %tmp4, i32 addrspace(1)* %out |
| 87 | ret void |
| 88 | } |
| 89 | |
| 90 | ; GCN-LABEL: {{^}}divergent_loop: |
| 91 | ; GCN: {{^}}; BB#0: |
| 92 | |
| 93 | ; GCN: s_mov_b32 m0, -1 |
| 94 | ; GCN: ds_read_b32 [[LOAD0:v[0-9]+]] |
| 95 | |
Matt Arsenault | 3d46319 | 2016-11-01 22:55:07 +0000 | [diff] [blame^] | 96 | ; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], s{{[0-9]+}}, v0 |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 97 | |
| 98 | ; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec |
| 99 | ; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]] |
| 100 | ; GCN: s_xor_b64 s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}} |
| 101 | |
Matt Arsenault | 3d46319 | 2016-11-01 22:55:07 +0000 | [diff] [blame^] | 102 | ; Spill load |
| 103 | ; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 ; 4-byte Folded Spill |
| 104 | |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 105 | ; Spill saved exec |
| 106 | ; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]] |
| 107 | ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]] |
| 108 | |
| 109 | |
| 110 | ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_LO:[0-9]+]], s[[SAVEEXEC_LO]] |
Matt Arsenault | 3d46319 | 2016-11-01 22:55:07 +0000 | [diff] [blame^] | 111 | ; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:12 ; 8-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 112 | ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_HI:[0-9]+]], s[[SAVEEXEC_HI]] |
Matt Arsenault | 3d46319 | 2016-11-01 22:55:07 +0000 | [diff] [blame^] | 113 | ; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:16 ; 8-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 114 | |
| 115 | ; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}} |
| 116 | |
| 117 | ; GCN: s_waitcnt vmcnt(0) expcnt(0) |
| 118 | ; GCN-NEXT: ; mask branch [[END:BB[0-9]+_[0-9]+]] |
| 119 | ; GCN-NEXT: s_cbranch_execz [[END]] |
| 120 | |
| 121 | |
| 122 | ; GCN: [[LOOP:BB[0-9]+_[0-9]+]]: |
Matt Arsenault | 3d46319 | 2016-11-01 22:55:07 +0000 | [diff] [blame^] | 123 | ; GCN: buffer_load_dword v[[VAL_LOOP_RELOAD:[0-9]+]], off, s[0:3], s7 ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 124 | ; GCN: v_subrev_i32_e32 [[VAL_LOOP:v[0-9]+]], vcc, v{{[0-9]+}}, v[[VAL_LOOP_RELOAD]] |
Matt Arsenault | 5d8eb25 | 2016-09-30 01:50:20 +0000 | [diff] [blame] | 125 | ; GCN: v_cmp_ne_u32_e32 vcc, |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 126 | ; GCN: s_and_b64 vcc, exec, vcc |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 127 | ; GCN: buffer_store_dword [[VAL_LOOP]], off, s[0:3], s7 offset:[[VAL_SUB_OFFSET:[0-9]+]] ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 128 | ; GCN: s_waitcnt vmcnt(0) expcnt(0) |
| 129 | ; GCN-NEXT: s_cbranch_vccnz [[LOOP]] |
| 130 | |
| 131 | |
| 132 | ; GCN: [[END]]: |
| 133 | ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] |
| 134 | ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] |
| 135 | |
Matt Arsenault | 3d46319 | 2016-11-01 22:55:07 +0000 | [diff] [blame^] | 136 | ; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:12 ; 8-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 137 | ; VMEM: s_waitcnt vmcnt(0) |
| 138 | ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC_LO]] |
| 139 | |
Matt Arsenault | 3d46319 | 2016-11-01 22:55:07 +0000 | [diff] [blame^] | 140 | ; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:16 ; 8-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 141 | ; VMEM: s_waitcnt vmcnt(0) |
| 142 | ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC_HI]] |
| 143 | |
| 144 | ; GCN: s_or_b64 exec, exec, s{{\[}}[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]{{\]}} |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 145 | ; GCN: buffer_load_dword v[[VAL_END:[0-9]+]], off, s[0:3], s7 offset:[[VAL_SUB_OFFSET]] ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 146 | |
| 147 | ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[VAL_END]] |
| 148 | define void @divergent_loop(i32 addrspace(1)* %out) #0 { |
| 149 | entry: |
| 150 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
| 151 | %load0 = load volatile i32, i32 addrspace(3)* undef |
| 152 | %cmp0 = icmp eq i32 %tid, 0 |
| 153 | br i1 %cmp0, label %loop, label %end |
| 154 | |
| 155 | loop: |
| 156 | %i = phi i32 [ %i.inc, %loop ], [ 0, %entry ] |
| 157 | %val = phi i32 [ %val.sub, %loop ], [ %load0, %entry ] |
| 158 | %load1 = load volatile i32, i32 addrspace(3)* undef |
| 159 | %i.inc = add i32 %i, 1 |
| 160 | %val.sub = sub i32 %val, %load1 |
| 161 | %cmp1 = icmp ne i32 %i, 256 |
| 162 | br i1 %cmp1, label %loop, label %end |
| 163 | |
| 164 | end: |
| 165 | %tmp4 = phi i32 [ %val.sub, %loop ], [ 0, %entry ] |
| 166 | store i32 %tmp4, i32 addrspace(1)* %out |
| 167 | ret void |
| 168 | } |
| 169 | |
| 170 | ; GCN-LABEL: {{^}}divergent_if_else_endif: |
| 171 | ; GCN: {{^}}; BB#0: |
| 172 | |
| 173 | ; GCN: s_mov_b32 m0, -1 |
| 174 | ; VMEM: ds_read_b32 [[LOAD0:v[0-9]+]] |
| 175 | |
Matt Arsenault | 5d8eb25 | 2016-09-30 01:50:20 +0000 | [diff] [blame] | 176 | ; GCN: v_cmp_ne_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], v0, |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 177 | |
| 178 | ; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec |
| 179 | ; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]] |
| 180 | ; GCN: s_xor_b64 s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}} |
| 181 | |
| 182 | ; Spill load |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 183 | ; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 184 | |
| 185 | ; Spill saved exec |
| 186 | ; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]] |
| 187 | ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]] |
| 188 | |
| 189 | ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_LO:[0-9]+]], s[[SAVEEXEC_LO]] |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 190 | ; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:[[SAVEEXEC_LO_OFFSET:[0-9]+]] ; 8-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 191 | ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_HI:[0-9]+]], s[[SAVEEXEC_HI]] |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 192 | ; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:[[SAVEEXEC_HI_OFFSET:[0-9]+]] ; 8-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 193 | |
| 194 | ; GCN: s_mov_b64 exec, [[CMP0]] |
| 195 | ; GCN: s_waitcnt vmcnt(0) expcnt(0) |
| 196 | |
| 197 | ; FIXME: It makes no sense to put this skip here |
| 198 | ; GCN-NEXT: ; mask branch [[FLOW:BB[0-9]+_[0-9]+]] |
| 199 | ; GCN: s_cbranch_execz [[FLOW]] |
| 200 | ; GCN-NEXT: s_branch [[ELSE:BB[0-9]+_[0-9]+]] |
| 201 | |
| 202 | ; GCN: [[FLOW]]: ; %Flow |
| 203 | ; VGPR: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] |
| 204 | ; VGPR: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] |
| 205 | |
| 206 | |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 207 | ; VMEM: buffer_load_dword v[[FLOW_V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:[[SAVEEXEC_LO_OFFSET]] |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 208 | ; VMEM: s_waitcnt vmcnt(0) |
| 209 | ; VMEM: v_readfirstlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[FLOW_V_RELOAD_SAVEEXEC_LO]] |
| 210 | |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 211 | ; VMEM: buffer_load_dword v[[FLOW_V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:[[SAVEEXEC_HI_OFFSET]] ; 8-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 212 | ; VMEM: s_waitcnt vmcnt(0) |
| 213 | ; VMEM: v_readfirstlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[FLOW_V_RELOAD_SAVEEXEC_HI]] |
| 214 | |
| 215 | ; GCN: s_or_saveexec_b64 s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]{{\]}}, s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]{{\]}} |
| 216 | |
| 217 | ; Regular spill value restored after exec modification |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 218 | ; GCN: buffer_load_dword [[FLOW_VAL:v[0-9]+]], off, s[0:3], s7 offset:[[FLOW_VAL_OFFSET:[0-9]+]] ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 219 | |
| 220 | |
| 221 | ; Spill saved exec |
| 222 | ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[FLOW_S_RELOAD_SAVEEXEC_LO]], [[FLOW_SAVEEXEC_LO_LANE:[0-9]+]] |
| 223 | ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[FLOW_S_RELOAD_SAVEEXEC_HI]], [[FLOW_SAVEEXEC_HI_LANE:[0-9]+]] |
| 224 | |
| 225 | |
| 226 | ; VMEM: v_mov_b32_e32 v[[FLOW_V_SAVEEXEC_LO:[0-9]+]], s[[FLOW_S_RELOAD_SAVEEXEC_LO]] |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 227 | ; VMEM: buffer_store_dword v[[FLOW_V_SAVEEXEC_LO]], off, s[0:3], s7 offset:[[FLOW_SAVEEXEC_LO_OFFSET:[0-9]+]] ; 8-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 228 | ; VMEM: v_mov_b32_e32 v[[FLOW_V_SAVEEXEC_HI:[0-9]+]], s[[FLOW_S_RELOAD_SAVEEXEC_HI]] |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 229 | ; VMEM: buffer_store_dword v[[FLOW_V_SAVEEXEC_HI]], off, s[0:3], s7 offset:[[FLOW_SAVEEXEC_HI_OFFSET:[0-9]+]] ; 8-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 230 | |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 231 | ; GCN: buffer_store_dword [[FLOW_VAL]], off, s[0:3], s7 offset:[[RESULT_OFFSET:[0-9]+]] ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 232 | ; GCN: s_xor_b64 exec, exec, s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]{{\]}} |
| 233 | ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| 234 | ; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9]+_[0-9]+]] |
| 235 | ; GCN-NEXT: s_cbranch_execz [[ENDIF]] |
| 236 | |
| 237 | |
| 238 | ; GCN: BB{{[0-9]+}}_2: ; %if |
| 239 | ; GCN: ds_read_b32 |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 240 | ; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], s7 ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 241 | ; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, v{{[0-9]+}}, v[[LOAD0_RELOAD]] |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 242 | ; GCN: buffer_store_dword [[ADD]], off, s[0:3], s7 offset:[[RESULT_OFFSET]] ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 243 | ; GCN: s_waitcnt vmcnt(0) expcnt(0) |
| 244 | ; GCN-NEXT: s_branch [[ENDIF:BB[0-9]+_[0-9]+]] |
| 245 | |
| 246 | ; GCN: [[ELSE]]: ; %else |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 247 | ; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], s7 ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 248 | ; GCN: v_subrev_i32_e32 [[SUB:v[0-9]+]], vcc, v{{[0-9]+}}, v[[LOAD0_RELOAD]] |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 249 | ; GCN: buffer_store_dword [[ADD]], off, s[0:3], s7 offset:[[FLOW_RESULT_OFFSET:[0-9]+]] ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 250 | ; GCN: s_waitcnt vmcnt(0) expcnt(0) |
| 251 | ; GCN-NEXT: s_branch [[FLOW]] |
| 252 | |
| 253 | ; GCN: [[ENDIF]]: |
| 254 | ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[FLOW_SAVEEXEC_LO_LANE]] |
| 255 | ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[FLOW_SAVEEXEC_HI_LANE]] |
| 256 | |
| 257 | |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 258 | ; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:[[FLOW_SAVEEXEC_LO_OFFSET]] ; 8-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 259 | ; VMEM: s_waitcnt vmcnt(0) |
| 260 | ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC_LO]] |
| 261 | |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 262 | ; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:[[FLOW_SAVEEXEC_HI_OFFSET]] ; 8-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 263 | ; VMEM: s_waitcnt vmcnt(0) |
| 264 | ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC_HI]] |
| 265 | |
| 266 | ; GCN: s_or_b64 exec, exec, s{{\[}}[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]{{\]}} |
| 267 | |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 268 | ; GCN: buffer_load_dword v[[RESULT:[0-9]+]], off, s[0:3], s7 offset:[[RESULT_OFFSET]] ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 269 | ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RESULT]] |
| 270 | define void @divergent_if_else_endif(i32 addrspace(1)* %out) #0 { |
| 271 | entry: |
| 272 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
| 273 | %load0 = load volatile i32, i32 addrspace(3)* undef |
| 274 | %cmp0 = icmp eq i32 %tid, 0 |
| 275 | br i1 %cmp0, label %if, label %else |
| 276 | |
| 277 | if: |
| 278 | %load1 = load volatile i32, i32 addrspace(3)* undef |
| 279 | %val0 = add i32 %load0, %load1 |
| 280 | br label %endif |
| 281 | |
| 282 | else: |
| 283 | %load2 = load volatile i32, i32 addrspace(3)* undef |
| 284 | %val1 = sub i32 %load0, %load2 |
| 285 | br label %endif |
| 286 | |
| 287 | endif: |
| 288 | %result = phi i32 [ %val0, %if ], [ %val1, %else ] |
| 289 | store i32 %result, i32 addrspace(1)* %out |
| 290 | ret void |
| 291 | } |
| 292 | |
| 293 | declare i32 @llvm.amdgcn.workitem.id.x() #1 |
| 294 | |
| 295 | attributes #0 = { nounwind } |
| 296 | attributes #1 = { nounwind readnone } |