Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 1 | ; RUN: llc -O0 -mtriple=amdgcn--amdhsa -march=amdgcn -amdgpu-spill-sgpr-to-vgpr=0 -verify-machineinstrs < %s | FileCheck -check-prefix=VMEM -check-prefix=GCN %s |
| 2 | ; RUN: llc -O0 -mtriple=amdgcn--amdhsa -march=amdgcn -amdgpu-spill-sgpr-to-vgpr=1 -verify-machineinstrs < %s | FileCheck -check-prefix=VGPR -check-prefix=GCN %s |
| 3 | |
| 4 | ; Verify registers used for tracking exec mask changes when all |
| 5 | ; registers are spilled at the end of the block. The SGPR spill |
| 6 | ; placement relative to the exec modifications are important. |
| 7 | |
| 8 | ; FIXME: This checks with SGPR to VGPR spilling disabled, but this may |
| 9 | ; not work correctly in cases where no workitems take a branch. |
| 10 | |
| 11 | |
| 12 | ; GCN-LABEL: {{^}}divergent_if_endif: |
Matt Arsenault | e0bf7d0 | 2017-02-21 19:12:08 +0000 | [diff] [blame] | 13 | ; VGPR: workitem_private_segment_byte_size = 12{{$}} |
| 14 | |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 15 | |
| 16 | ; GCN: {{^}}; BB#0: |
| 17 | ; GCN: s_mov_b32 m0, -1 |
| 18 | ; GCN: ds_read_b32 [[LOAD0:v[0-9]+]] |
| 19 | |
Matt Arsenault | 3d46319 | 2016-11-01 22:55:07 +0000 | [diff] [blame] | 20 | ; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], s{{[0-9]+}}, v0 |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 21 | ; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec |
| 22 | ; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, [[CMP0]] |
| 23 | ; GCN: s_xor_b64 s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}} |
| 24 | |
| 25 | ; Spill saved exec |
| 26 | ; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]] |
| 27 | ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]] |
| 28 | |
| 29 | |
| 30 | ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_LO:[0-9]+]], s[[SAVEEXEC_LO]] |
Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 31 | ; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:4 ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 32 | ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_HI:[0-9]+]], s[[SAVEEXEC_HI]] |
Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 33 | ; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:8 ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 34 | |
| 35 | ; Spill load |
Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 36 | ; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill |
Matt Arsenault | e0bf7d0 | 2017-02-21 19:12:08 +0000 | [diff] [blame] | 37 | |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 38 | ; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}} |
| 39 | |
| 40 | ; GCN: s_waitcnt vmcnt(0) expcnt(0) |
| 41 | ; GCN: mask branch [[ENDIF:BB[0-9]+_[0-9]+]] |
| 42 | |
| 43 | ; GCN: {{^}}BB{{[0-9]+}}_1: ; %if |
| 44 | ; GCN: s_mov_b32 m0, -1 |
| 45 | ; GCN: ds_read_b32 [[LOAD1:v[0-9]+]] |
Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 46 | ; GCN: buffer_load_dword [[RELOAD_LOAD0:v[0-9]+]], off, s[0:3], s7 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 47 | ; GCN: s_waitcnt vmcnt(0) |
| 48 | |
| 49 | ; Spill val register |
| 50 | ; GCN: v_add_i32_e32 [[VAL:v[0-9]+]], vcc, [[LOAD1]], [[RELOAD_LOAD0]] |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 51 | ; GCN: buffer_store_dword [[VAL]], off, s[0:3], s7 offset:[[VAL_OFFSET:[0-9]+]] ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 52 | ; GCN: s_waitcnt vmcnt(0) |
| 53 | |
| 54 | ; VMEM: [[ENDIF]]: |
| 55 | ; Reload and restore exec mask |
| 56 | ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] |
| 57 | ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] |
| 58 | |
| 59 | |
| 60 | |
Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 61 | ; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 62 | ; VMEM: s_waitcnt vmcnt(0) |
| 63 | ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC_LO]] |
| 64 | |
Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 65 | ; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:8 ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 66 | ; VMEM: s_waitcnt vmcnt(0) |
| 67 | ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC_HI]] |
| 68 | |
| 69 | ; GCN: s_or_b64 exec, exec, s{{\[}}[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]{{\]}} |
| 70 | |
| 71 | ; Restore val |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 72 | ; GCN: buffer_load_dword [[RELOAD_VAL:v[0-9]+]], off, s[0:3], s7 offset:[[VAL_OFFSET]] ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 73 | |
| 74 | ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RELOAD_VAL]] |
| 75 | define void @divergent_if_endif(i32 addrspace(1)* %out) #0 { |
| 76 | entry: |
| 77 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
| 78 | %load0 = load volatile i32, i32 addrspace(3)* undef |
| 79 | %cmp0 = icmp eq i32 %tid, 0 |
| 80 | br i1 %cmp0, label %if, label %endif |
| 81 | |
| 82 | if: |
| 83 | %load1 = load volatile i32, i32 addrspace(3)* undef |
| 84 | %val = add i32 %load0, %load1 |
| 85 | br label %endif |
| 86 | |
| 87 | endif: |
| 88 | %tmp4 = phi i32 [ %val, %if ], [ 0, %entry ] |
| 89 | store i32 %tmp4, i32 addrspace(1)* %out |
| 90 | ret void |
| 91 | } |
| 92 | |
| 93 | ; GCN-LABEL: {{^}}divergent_loop: |
Matt Arsenault | e0bf7d0 | 2017-02-21 19:12:08 +0000 | [diff] [blame] | 94 | ; VGPR: workitem_private_segment_byte_size = 16{{$}} |
| 95 | |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 96 | ; GCN: {{^}}; BB#0: |
| 97 | |
| 98 | ; GCN: s_mov_b32 m0, -1 |
| 99 | ; GCN: ds_read_b32 [[LOAD0:v[0-9]+]] |
| 100 | |
Matt Arsenault | 3d46319 | 2016-11-01 22:55:07 +0000 | [diff] [blame] | 101 | ; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], s{{[0-9]+}}, v0 |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 102 | |
| 103 | ; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec |
| 104 | ; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]] |
| 105 | ; GCN: s_xor_b64 s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}} |
| 106 | |
Matt Arsenault | 3d46319 | 2016-11-01 22:55:07 +0000 | [diff] [blame] | 107 | ; Spill load |
Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 108 | ; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:4 ; 4-byte Folded Spill |
Matt Arsenault | 3d46319 | 2016-11-01 22:55:07 +0000 | [diff] [blame] | 109 | |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 110 | ; Spill saved exec |
| 111 | ; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]] |
| 112 | ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]] |
| 113 | |
| 114 | |
| 115 | ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_LO:[0-9]+]], s[[SAVEEXEC_LO]] |
Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 116 | ; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:20 ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 117 | ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_HI:[0-9]+]], s[[SAVEEXEC_HI]] |
Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 118 | ; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:24 ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 119 | |
| 120 | ; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}} |
| 121 | |
| 122 | ; GCN: s_waitcnt vmcnt(0) expcnt(0) |
| 123 | ; GCN-NEXT: ; mask branch [[END:BB[0-9]+_[0-9]+]] |
| 124 | ; GCN-NEXT: s_cbranch_execz [[END]] |
| 125 | |
| 126 | |
| 127 | ; GCN: [[LOOP:BB[0-9]+_[0-9]+]]: |
Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 128 | ; GCN: buffer_load_dword v[[VAL_LOOP_RELOAD:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 129 | ; GCN: v_subrev_i32_e32 [[VAL_LOOP:v[0-9]+]], vcc, v{{[0-9]+}}, v[[VAL_LOOP_RELOAD]] |
Matt Arsenault | 5d8eb25 | 2016-09-30 01:50:20 +0000 | [diff] [blame] | 130 | ; GCN: v_cmp_ne_u32_e32 vcc, |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 131 | ; GCN: s_and_b64 vcc, exec, vcc |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 132 | ; GCN: buffer_store_dword [[VAL_LOOP]], off, s[0:3], s7 offset:[[VAL_SUB_OFFSET:[0-9]+]] ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 133 | ; GCN: s_waitcnt vmcnt(0) expcnt(0) |
| 134 | ; GCN-NEXT: s_cbranch_vccnz [[LOOP]] |
| 135 | |
| 136 | |
| 137 | ; GCN: [[END]]: |
| 138 | ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] |
| 139 | ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] |
| 140 | |
Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 141 | ; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:20 ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 142 | ; VMEM: s_waitcnt vmcnt(0) |
| 143 | ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC_LO]] |
| 144 | |
Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 145 | ; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:24 ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 146 | ; VMEM: s_waitcnt vmcnt(0) |
| 147 | ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC_HI]] |
| 148 | |
| 149 | ; GCN: s_or_b64 exec, exec, s{{\[}}[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]{{\]}} |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 150 | ; GCN: buffer_load_dword v[[VAL_END:[0-9]+]], off, s[0:3], s7 offset:[[VAL_SUB_OFFSET]] ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 151 | |
| 152 | ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[VAL_END]] |
| 153 | define void @divergent_loop(i32 addrspace(1)* %out) #0 { |
| 154 | entry: |
| 155 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
| 156 | %load0 = load volatile i32, i32 addrspace(3)* undef |
| 157 | %cmp0 = icmp eq i32 %tid, 0 |
| 158 | br i1 %cmp0, label %loop, label %end |
| 159 | |
| 160 | loop: |
| 161 | %i = phi i32 [ %i.inc, %loop ], [ 0, %entry ] |
| 162 | %val = phi i32 [ %val.sub, %loop ], [ %load0, %entry ] |
| 163 | %load1 = load volatile i32, i32 addrspace(3)* undef |
| 164 | %i.inc = add i32 %i, 1 |
| 165 | %val.sub = sub i32 %val, %load1 |
| 166 | %cmp1 = icmp ne i32 %i, 256 |
| 167 | br i1 %cmp1, label %loop, label %end |
| 168 | |
| 169 | end: |
| 170 | %tmp4 = phi i32 [ %val.sub, %loop ], [ 0, %entry ] |
| 171 | store i32 %tmp4, i32 addrspace(1)* %out |
| 172 | ret void |
| 173 | } |
| 174 | |
| 175 | ; GCN-LABEL: {{^}}divergent_if_else_endif: |
| 176 | ; GCN: {{^}}; BB#0: |
| 177 | |
| 178 | ; GCN: s_mov_b32 m0, -1 |
| 179 | ; VMEM: ds_read_b32 [[LOAD0:v[0-9]+]] |
| 180 | |
Matt Arsenault | 5d8eb25 | 2016-09-30 01:50:20 +0000 | [diff] [blame] | 181 | ; GCN: v_cmp_ne_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], v0, |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 182 | |
| 183 | ; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec |
| 184 | ; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]] |
| 185 | ; GCN: s_xor_b64 s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}} |
| 186 | |
| 187 | ; Spill load |
Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 188 | ; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:4 ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 189 | |
| 190 | ; Spill saved exec |
| 191 | ; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]] |
| 192 | ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]] |
| 193 | |
| 194 | ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_LO:[0-9]+]], s[[SAVEEXEC_LO]] |
Marek Olsak | 79c0587 | 2016-11-25 17:37:09 +0000 | [diff] [blame] | 195 | ; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:[[SAVEEXEC_LO_OFFSET:[0-9]+]] ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 196 | ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_HI:[0-9]+]], s[[SAVEEXEC_HI]] |
Marek Olsak | 79c0587 | 2016-11-25 17:37:09 +0000 | [diff] [blame] | 197 | ; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:[[SAVEEXEC_HI_OFFSET:[0-9]+]] ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 198 | |
| 199 | ; GCN: s_mov_b64 exec, [[CMP0]] |
| 200 | ; GCN: s_waitcnt vmcnt(0) expcnt(0) |
| 201 | |
| 202 | ; FIXME: It makes no sense to put this skip here |
| 203 | ; GCN-NEXT: ; mask branch [[FLOW:BB[0-9]+_[0-9]+]] |
| 204 | ; GCN: s_cbranch_execz [[FLOW]] |
| 205 | ; GCN-NEXT: s_branch [[ELSE:BB[0-9]+_[0-9]+]] |
| 206 | |
| 207 | ; GCN: [[FLOW]]: ; %Flow |
| 208 | ; VGPR: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] |
| 209 | ; VGPR: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] |
| 210 | |
| 211 | |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 212 | ; VMEM: buffer_load_dword v[[FLOW_V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:[[SAVEEXEC_LO_OFFSET]] |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 213 | ; VMEM: s_waitcnt vmcnt(0) |
| 214 | ; VMEM: v_readfirstlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[FLOW_V_RELOAD_SAVEEXEC_LO]] |
| 215 | |
Marek Olsak | 79c0587 | 2016-11-25 17:37:09 +0000 | [diff] [blame] | 216 | ; VMEM: buffer_load_dword v[[FLOW_V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:[[SAVEEXEC_HI_OFFSET]] ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 217 | ; VMEM: s_waitcnt vmcnt(0) |
| 218 | ; VMEM: v_readfirstlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[FLOW_V_RELOAD_SAVEEXEC_HI]] |
| 219 | |
| 220 | ; GCN: s_or_saveexec_b64 s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]{{\]}}, s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]{{\]}} |
| 221 | |
| 222 | ; Regular spill value restored after exec modification |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 223 | ; GCN: buffer_load_dword [[FLOW_VAL:v[0-9]+]], off, s[0:3], s7 offset:[[FLOW_VAL_OFFSET:[0-9]+]] ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 224 | |
| 225 | |
| 226 | ; Spill saved exec |
| 227 | ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[FLOW_S_RELOAD_SAVEEXEC_LO]], [[FLOW_SAVEEXEC_LO_LANE:[0-9]+]] |
| 228 | ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[FLOW_S_RELOAD_SAVEEXEC_HI]], [[FLOW_SAVEEXEC_HI_LANE:[0-9]+]] |
| 229 | |
| 230 | |
| 231 | ; VMEM: v_mov_b32_e32 v[[FLOW_V_SAVEEXEC_LO:[0-9]+]], s[[FLOW_S_RELOAD_SAVEEXEC_LO]] |
Marek Olsak | 79c0587 | 2016-11-25 17:37:09 +0000 | [diff] [blame] | 232 | ; VMEM: buffer_store_dword v[[FLOW_V_SAVEEXEC_LO]], off, s[0:3], s7 offset:[[FLOW_SAVEEXEC_LO_OFFSET:[0-9]+]] ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 233 | ; VMEM: v_mov_b32_e32 v[[FLOW_V_SAVEEXEC_HI:[0-9]+]], s[[FLOW_S_RELOAD_SAVEEXEC_HI]] |
Marek Olsak | 79c0587 | 2016-11-25 17:37:09 +0000 | [diff] [blame] | 234 | ; VMEM: buffer_store_dword v[[FLOW_V_SAVEEXEC_HI]], off, s[0:3], s7 offset:[[FLOW_SAVEEXEC_HI_OFFSET:[0-9]+]] ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 235 | |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 236 | ; GCN: buffer_store_dword [[FLOW_VAL]], off, s[0:3], s7 offset:[[RESULT_OFFSET:[0-9]+]] ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 237 | ; GCN: s_xor_b64 exec, exec, s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]{{\]}} |
| 238 | ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
| 239 | ; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9]+_[0-9]+]] |
| 240 | ; GCN-NEXT: s_cbranch_execz [[ENDIF]] |
| 241 | |
| 242 | |
| 243 | ; GCN: BB{{[0-9]+}}_2: ; %if |
| 244 | ; GCN: ds_read_b32 |
Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 245 | ; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 246 | ; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, v{{[0-9]+}}, v[[LOAD0_RELOAD]] |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 247 | ; GCN: buffer_store_dword [[ADD]], off, s[0:3], s7 offset:[[RESULT_OFFSET]] ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 248 | ; GCN: s_waitcnt vmcnt(0) expcnt(0) |
| 249 | ; GCN-NEXT: s_branch [[ENDIF:BB[0-9]+_[0-9]+]] |
| 250 | |
| 251 | ; GCN: [[ELSE]]: ; %else |
Matt Arsenault | 707780b | 2017-02-22 21:05:25 +0000 | [diff] [blame] | 252 | ; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 253 | ; GCN: v_subrev_i32_e32 [[SUB:v[0-9]+]], vcc, v{{[0-9]+}}, v[[LOAD0_RELOAD]] |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 254 | ; GCN: buffer_store_dword [[ADD]], off, s[0:3], s7 offset:[[FLOW_RESULT_OFFSET:[0-9]+]] ; 4-byte Folded Spill |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 255 | ; GCN: s_waitcnt vmcnt(0) expcnt(0) |
| 256 | ; GCN-NEXT: s_branch [[FLOW]] |
| 257 | |
| 258 | ; GCN: [[ENDIF]]: |
| 259 | ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[FLOW_SAVEEXEC_LO_LANE]] |
| 260 | ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[FLOW_SAVEEXEC_HI_LANE]] |
| 261 | |
| 262 | |
Marek Olsak | 79c0587 | 2016-11-25 17:37:09 +0000 | [diff] [blame] | 263 | ; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:[[FLOW_SAVEEXEC_LO_OFFSET]] ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 264 | ; VMEM: s_waitcnt vmcnt(0) |
| 265 | ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC_LO]] |
| 266 | |
Marek Olsak | 79c0587 | 2016-11-25 17:37:09 +0000 | [diff] [blame] | 267 | ; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:[[FLOW_SAVEEXEC_HI_OFFSET]] ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 268 | ; VMEM: s_waitcnt vmcnt(0) |
| 269 | ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC_HI]] |
| 270 | |
| 271 | ; GCN: s_or_b64 exec, exec, s{{\[}}[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]{{\]}} |
| 272 | |
Matt Arsenault | 253640e | 2016-10-13 13:10:00 +0000 | [diff] [blame] | 273 | ; GCN: buffer_load_dword v[[RESULT:[0-9]+]], off, s[0:3], s7 offset:[[RESULT_OFFSET]] ; 4-byte Folded Reload |
Matt Arsenault | e674075 | 2016-09-29 01:44:16 +0000 | [diff] [blame] | 274 | ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RESULT]] |
| 275 | define void @divergent_if_else_endif(i32 addrspace(1)* %out) #0 { |
| 276 | entry: |
| 277 | %tid = call i32 @llvm.amdgcn.workitem.id.x() |
| 278 | %load0 = load volatile i32, i32 addrspace(3)* undef |
| 279 | %cmp0 = icmp eq i32 %tid, 0 |
| 280 | br i1 %cmp0, label %if, label %else |
| 281 | |
| 282 | if: |
| 283 | %load1 = load volatile i32, i32 addrspace(3)* undef |
| 284 | %val0 = add i32 %load0, %load1 |
| 285 | br label %endif |
| 286 | |
| 287 | else: |
| 288 | %load2 = load volatile i32, i32 addrspace(3)* undef |
| 289 | %val1 = sub i32 %load0, %load2 |
| 290 | br label %endif |
| 291 | |
| 292 | endif: |
| 293 | %result = phi i32 [ %val0, %if ], [ %val1, %else ] |
| 294 | store i32 %result, i32 addrspace(1)* %out |
| 295 | ret void |
| 296 | } |
| 297 | |
| 298 | declare i32 @llvm.amdgcn.workitem.id.x() #1 |
| 299 | |
| 300 | attributes #0 = { nounwind } |
| 301 | attributes #1 = { nounwind readnone } |