| Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 1 | //=- AArch64InstrAtomics.td - AArch64 Atomic codegen support -*- tablegen -*-=// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // AArch64 Atomic operand code-gen constructs. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | //===---------------------------------- |
| 15 | // Atomic fences |
| 16 | //===---------------------------------- |
| Tim Northover | 100b7f6 | 2017-04-20 21:57:45 +0000 | [diff] [blame] | 17 | let AddedComplexity = 15, Size = 0 in |
| 18 | def CompilerBarrier : Pseudo<(outs), (ins i32imm:$ordering), |
| 19 | [(atomic_fence imm:$ordering, 0)]>, Sched<[]>; |
| Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 20 | def : Pat<(atomic_fence (i64 4), (imm)), (DMB (i32 0x9))>; |
| 21 | def : Pat<(atomic_fence (imm), (imm)), (DMB (i32 0xb))>; |
| 22 | |
| 23 | //===---------------------------------- |
| 24 | // Atomic loads |
| 25 | //===---------------------------------- |
| 26 | |
| 27 | // When they're actually atomic, only one addressing mode (GPR64sp) is |
| 28 | // supported, but when they're relaxed and anything can be used, all the |
| 29 | // standard modes would be valid and may give efficiency gains. |
| 30 | |
| 31 | // A atomic load operation that actually needs acquire semantics. |
| 32 | class acquiring_load<PatFrag base> |
| Daniel Sanders | 0c43b3a | 2017-11-30 21:05:59 +0000 | [diff] [blame] | 33 | : PatFrag<(ops node:$ptr), (base node:$ptr)> { |
| 34 | let IsAtomic = 1; |
| 35 | let IsAtomicOrderingAcquireOrStronger = 1; |
| 36 | } |
| Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 37 | |
| 38 | // An atomic load operation that does not need either acquire or release |
| 39 | // semantics. |
| 40 | class relaxed_load<PatFrag base> |
| Daniel Sanders | 0c43b3a | 2017-11-30 21:05:59 +0000 | [diff] [blame] | 41 | : PatFrag<(ops node:$ptr), (base node:$ptr)> { |
| 42 | let IsAtomic = 1; |
| 43 | let IsAtomicOrderingAcquireOrStronger = 0; |
| 44 | } |
| Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 45 | |
| 46 | // 8-bit loads |
| 47 | def : Pat<(acquiring_load<atomic_load_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>; |
| 48 | def : Pat<(relaxed_load<atomic_load_8> (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm, |
| 49 | ro_Wextend8:$offset)), |
| 50 | (LDRBBroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$offset)>; |
| 51 | def : Pat<(relaxed_load<atomic_load_8> (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm, |
| 52 | ro_Xextend8:$offset)), |
| 53 | (LDRBBroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$offset)>; |
| 54 | def : Pat<(relaxed_load<atomic_load_8> (am_indexed8 GPR64sp:$Rn, |
| 55 | uimm12s1:$offset)), |
| 56 | (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; |
| 57 | def : Pat<(relaxed_load<atomic_load_8> |
| 58 | (am_unscaled8 GPR64sp:$Rn, simm9:$offset)), |
| 59 | (LDURBBi GPR64sp:$Rn, simm9:$offset)>; |
| 60 | |
| 61 | // 16-bit loads |
| 62 | def : Pat<(acquiring_load<atomic_load_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>; |
| 63 | def : Pat<(relaxed_load<atomic_load_16> (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, |
| 64 | ro_Wextend16:$extend)), |
| 65 | (LDRHHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend)>; |
| 66 | def : Pat<(relaxed_load<atomic_load_16> (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, |
| 67 | ro_Xextend16:$extend)), |
| 68 | (LDRHHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend)>; |
| 69 | def : Pat<(relaxed_load<atomic_load_16> (am_indexed16 GPR64sp:$Rn, |
| 70 | uimm12s2:$offset)), |
| 71 | (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>; |
| 72 | def : Pat<(relaxed_load<atomic_load_16> |
| 73 | (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), |
| 74 | (LDURHHi GPR64sp:$Rn, simm9:$offset)>; |
| 75 | |
| 76 | // 32-bit loads |
| 77 | def : Pat<(acquiring_load<atomic_load_32> GPR64sp:$ptr), (LDARW GPR64sp:$ptr)>; |
| 78 | def : Pat<(relaxed_load<atomic_load_32> (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm, |
| 79 | ro_Wextend32:$extend)), |
| 80 | (LDRWroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>; |
| 81 | def : Pat<(relaxed_load<atomic_load_32> (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm, |
| 82 | ro_Xextend32:$extend)), |
| 83 | (LDRWroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>; |
| 84 | def : Pat<(relaxed_load<atomic_load_32> (am_indexed32 GPR64sp:$Rn, |
| 85 | uimm12s4:$offset)), |
| 86 | (LDRWui GPR64sp:$Rn, uimm12s4:$offset)>; |
| 87 | def : Pat<(relaxed_load<atomic_load_32> |
| 88 | (am_unscaled32 GPR64sp:$Rn, simm9:$offset)), |
| 89 | (LDURWi GPR64sp:$Rn, simm9:$offset)>; |
| 90 | |
| 91 | // 64-bit loads |
| 92 | def : Pat<(acquiring_load<atomic_load_64> GPR64sp:$ptr), (LDARX GPR64sp:$ptr)>; |
| 93 | def : Pat<(relaxed_load<atomic_load_64> (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, |
| 94 | ro_Wextend64:$extend)), |
| 95 | (LDRXroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; |
| 96 | def : Pat<(relaxed_load<atomic_load_64> (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, |
| 97 | ro_Xextend64:$extend)), |
| 98 | (LDRXroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; |
| 99 | def : Pat<(relaxed_load<atomic_load_64> (am_indexed64 GPR64sp:$Rn, |
| 100 | uimm12s8:$offset)), |
| 101 | (LDRXui GPR64sp:$Rn, uimm12s8:$offset)>; |
| 102 | def : Pat<(relaxed_load<atomic_load_64> |
| 103 | (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), |
| 104 | (LDURXi GPR64sp:$Rn, simm9:$offset)>; |
| 105 | |
| 106 | //===---------------------------------- |
| 107 | // Atomic stores |
| 108 | //===---------------------------------- |
| 109 | |
| 110 | // When they're actually atomic, only one addressing mode (GPR64sp) is |
| 111 | // supported, but when they're relaxed and anything can be used, all the |
| 112 | // standard modes would be valid and may give efficiency gains. |
| 113 | |
| 114 | // A store operation that actually needs release semantics. |
| 115 | class releasing_store<PatFrag base> |
| Daniel Sanders | 0c43b3a | 2017-11-30 21:05:59 +0000 | [diff] [blame] | 116 | : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val)> { |
| 117 | let IsAtomic = 1; |
| 118 | let IsAtomicOrderingReleaseOrStronger = 1; |
| 119 | } |
| Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 120 | |
| 121 | // An atomic store operation that doesn't actually need to be atomic on AArch64. |
| 122 | class relaxed_store<PatFrag base> |
| Daniel Sanders | 0c43b3a | 2017-11-30 21:05:59 +0000 | [diff] [blame] | 123 | : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val)> { |
| 124 | let IsAtomic = 1; |
| 125 | let IsAtomicOrderingReleaseOrStronger = 0; |
| 126 | } |
| Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 127 | |
| 128 | // 8-bit stores |
| 129 | def : Pat<(releasing_store<atomic_store_8> GPR64sp:$ptr, GPR32:$val), |
| 130 | (STLRB GPR32:$val, GPR64sp:$ptr)>; |
| 131 | def : Pat<(relaxed_store<atomic_store_8> |
| 132 | (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$extend), |
| 133 | GPR32:$val), |
| 134 | (STRBBroW GPR32:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$extend)>; |
| 135 | def : Pat<(relaxed_store<atomic_store_8> |
| 136 | (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$extend), |
| 137 | GPR32:$val), |
| 138 | (STRBBroX GPR32:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$extend)>; |
| 139 | def : Pat<(relaxed_store<atomic_store_8> |
| 140 | (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset), GPR32:$val), |
| 141 | (STRBBui GPR32:$val, GPR64sp:$Rn, uimm12s1:$offset)>; |
| 142 | def : Pat<(relaxed_store<atomic_store_8> |
| 143 | (am_unscaled8 GPR64sp:$Rn, simm9:$offset), GPR32:$val), |
| 144 | (STURBBi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>; |
| 145 | |
| 146 | // 16-bit stores |
| 147 | def : Pat<(releasing_store<atomic_store_16> GPR64sp:$ptr, GPR32:$val), |
| 148 | (STLRH GPR32:$val, GPR64sp:$ptr)>; |
| 149 | def : Pat<(relaxed_store<atomic_store_16> (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, |
| 150 | ro_Wextend16:$extend), |
| 151 | GPR32:$val), |
| 152 | (STRHHroW GPR32:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend)>; |
| 153 | def : Pat<(relaxed_store<atomic_store_16> (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, |
| 154 | ro_Xextend16:$extend), |
| 155 | GPR32:$val), |
| 156 | (STRHHroX GPR32:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend)>; |
| 157 | def : Pat<(relaxed_store<atomic_store_16> |
| 158 | (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), GPR32:$val), |
| 159 | (STRHHui GPR32:$val, GPR64sp:$Rn, uimm12s2:$offset)>; |
| 160 | def : Pat<(relaxed_store<atomic_store_16> |
| 161 | (am_unscaled16 GPR64sp:$Rn, simm9:$offset), GPR32:$val), |
| 162 | (STURHHi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>; |
| 163 | |
| 164 | // 32-bit stores |
| 165 | def : Pat<(releasing_store<atomic_store_32> GPR64sp:$ptr, GPR32:$val), |
| 166 | (STLRW GPR32:$val, GPR64sp:$ptr)>; |
| 167 | def : Pat<(relaxed_store<atomic_store_32> (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm, |
| 168 | ro_Wextend32:$extend), |
| 169 | GPR32:$val), |
| 170 | (STRWroW GPR32:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>; |
| 171 | def : Pat<(relaxed_store<atomic_store_32> (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm, |
| 172 | ro_Xextend32:$extend), |
| 173 | GPR32:$val), |
| 174 | (STRWroX GPR32:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>; |
| 175 | def : Pat<(relaxed_store<atomic_store_32> |
| 176 | (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), GPR32:$val), |
| 177 | (STRWui GPR32:$val, GPR64sp:$Rn, uimm12s4:$offset)>; |
| 178 | def : Pat<(relaxed_store<atomic_store_32> |
| 179 | (am_unscaled32 GPR64sp:$Rn, simm9:$offset), GPR32:$val), |
| 180 | (STURWi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>; |
| 181 | |
| 182 | // 64-bit stores |
| 183 | def : Pat<(releasing_store<atomic_store_64> GPR64sp:$ptr, GPR64:$val), |
| 184 | (STLRX GPR64:$val, GPR64sp:$ptr)>; |
| 185 | def : Pat<(relaxed_store<atomic_store_64> (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, |
| 186 | ro_Wextend16:$extend), |
| 187 | GPR64:$val), |
| 188 | (STRXroW GPR64:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; |
| 189 | def : Pat<(relaxed_store<atomic_store_64> (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, |
| 190 | ro_Xextend16:$extend), |
| 191 | GPR64:$val), |
| 192 | (STRXroX GPR64:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; |
| 193 | def : Pat<(relaxed_store<atomic_store_64> |
| 194 | (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset), GPR64:$val), |
| 195 | (STRXui GPR64:$val, GPR64sp:$Rn, uimm12s8:$offset)>; |
| 196 | def : Pat<(relaxed_store<atomic_store_64> |
| 197 | (am_unscaled64 GPR64sp:$Rn, simm9:$offset), GPR64:$val), |
| 198 | (STURXi GPR64:$val, GPR64sp:$Rn, simm9:$offset)>; |
| 199 | |
| 200 | //===---------------------------------- |
| 201 | // Low-level exclusive operations |
| 202 | //===---------------------------------- |
| 203 | |
| 204 | // Load-exclusives. |
| 205 | |
| 206 | def ldxr_1 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ |
| 207 | return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8; |
| 208 | }]>; |
| 209 | |
| 210 | def ldxr_2 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ |
| 211 | return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16; |
| 212 | }]>; |
| 213 | |
| 214 | def ldxr_4 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ |
| 215 | return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32; |
| 216 | }]>; |
| 217 | |
| 218 | def ldxr_8 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ |
| 219 | return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64; |
| 220 | }]>; |
| 221 | |
| 222 | def : Pat<(ldxr_1 GPR64sp:$addr), |
| 223 | (SUBREG_TO_REG (i64 0), (LDXRB GPR64sp:$addr), sub_32)>; |
| 224 | def : Pat<(ldxr_2 GPR64sp:$addr), |
| 225 | (SUBREG_TO_REG (i64 0), (LDXRH GPR64sp:$addr), sub_32)>; |
| 226 | def : Pat<(ldxr_4 GPR64sp:$addr), |
| 227 | (SUBREG_TO_REG (i64 0), (LDXRW GPR64sp:$addr), sub_32)>; |
| 228 | def : Pat<(ldxr_8 GPR64sp:$addr), (LDXRX GPR64sp:$addr)>; |
| 229 | |
| 230 | def : Pat<(and (ldxr_1 GPR64sp:$addr), 0xff), |
| 231 | (SUBREG_TO_REG (i64 0), (LDXRB GPR64sp:$addr), sub_32)>; |
| 232 | def : Pat<(and (ldxr_2 GPR64sp:$addr), 0xffff), |
| 233 | (SUBREG_TO_REG (i64 0), (LDXRH GPR64sp:$addr), sub_32)>; |
| 234 | def : Pat<(and (ldxr_4 GPR64sp:$addr), 0xffffffff), |
| 235 | (SUBREG_TO_REG (i64 0), (LDXRW GPR64sp:$addr), sub_32)>; |
| 236 | |
| 237 | // Load-exclusives. |
| 238 | |
| 239 | def ldaxr_1 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ |
| 240 | return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8; |
| 241 | }]>; |
| 242 | |
| 243 | def ldaxr_2 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ |
| 244 | return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16; |
| 245 | }]>; |
| 246 | |
| 247 | def ldaxr_4 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ |
| 248 | return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32; |
| 249 | }]>; |
| 250 | |
| 251 | def ldaxr_8 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ |
| 252 | return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64; |
| 253 | }]>; |
| 254 | |
| 255 | def : Pat<(ldaxr_1 GPR64sp:$addr), |
| 256 | (SUBREG_TO_REG (i64 0), (LDAXRB GPR64sp:$addr), sub_32)>; |
| 257 | def : Pat<(ldaxr_2 GPR64sp:$addr), |
| 258 | (SUBREG_TO_REG (i64 0), (LDAXRH GPR64sp:$addr), sub_32)>; |
| 259 | def : Pat<(ldaxr_4 GPR64sp:$addr), |
| 260 | (SUBREG_TO_REG (i64 0), (LDAXRW GPR64sp:$addr), sub_32)>; |
| 261 | def : Pat<(ldaxr_8 GPR64sp:$addr), (LDAXRX GPR64sp:$addr)>; |
| 262 | |
| 263 | def : Pat<(and (ldaxr_1 GPR64sp:$addr), 0xff), |
| 264 | (SUBREG_TO_REG (i64 0), (LDAXRB GPR64sp:$addr), sub_32)>; |
| 265 | def : Pat<(and (ldaxr_2 GPR64sp:$addr), 0xffff), |
| 266 | (SUBREG_TO_REG (i64 0), (LDAXRH GPR64sp:$addr), sub_32)>; |
| 267 | def : Pat<(and (ldaxr_4 GPR64sp:$addr), 0xffffffff), |
| 268 | (SUBREG_TO_REG (i64 0), (LDAXRW GPR64sp:$addr), sub_32)>; |
| 269 | |
| 270 | // Store-exclusives. |
| 271 | |
| 272 | def stxr_1 : PatFrag<(ops node:$val, node:$ptr), |
| 273 | (int_aarch64_stxr node:$val, node:$ptr), [{ |
| 274 | return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8; |
| 275 | }]>; |
| 276 | |
| 277 | def stxr_2 : PatFrag<(ops node:$val, node:$ptr), |
| 278 | (int_aarch64_stxr node:$val, node:$ptr), [{ |
| 279 | return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16; |
| 280 | }]>; |
| 281 | |
| 282 | def stxr_4 : PatFrag<(ops node:$val, node:$ptr), |
| 283 | (int_aarch64_stxr node:$val, node:$ptr), [{ |
| 284 | return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32; |
| 285 | }]>; |
| 286 | |
| 287 | def stxr_8 : PatFrag<(ops node:$val, node:$ptr), |
| 288 | (int_aarch64_stxr node:$val, node:$ptr), [{ |
| 289 | return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64; |
| 290 | }]>; |
| 291 | |
| 292 | |
| 293 | def : Pat<(stxr_1 GPR64:$val, GPR64sp:$addr), |
| 294 | (STXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; |
| 295 | def : Pat<(stxr_2 GPR64:$val, GPR64sp:$addr), |
| 296 | (STXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; |
| 297 | def : Pat<(stxr_4 GPR64:$val, GPR64sp:$addr), |
| 298 | (STXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; |
| 299 | def : Pat<(stxr_8 GPR64:$val, GPR64sp:$addr), |
| 300 | (STXRX GPR64:$val, GPR64sp:$addr)>; |
| 301 | |
| 302 | def : Pat<(stxr_1 (zext (and GPR32:$val, 0xff)), GPR64sp:$addr), |
| 303 | (STXRB GPR32:$val, GPR64sp:$addr)>; |
| 304 | def : Pat<(stxr_2 (zext (and GPR32:$val, 0xffff)), GPR64sp:$addr), |
| 305 | (STXRH GPR32:$val, GPR64sp:$addr)>; |
| 306 | def : Pat<(stxr_4 (zext GPR32:$val), GPR64sp:$addr), |
| 307 | (STXRW GPR32:$val, GPR64sp:$addr)>; |
| 308 | |
| 309 | def : Pat<(stxr_1 (and GPR64:$val, 0xff), GPR64sp:$addr), |
| 310 | (STXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; |
| 311 | def : Pat<(stxr_2 (and GPR64:$val, 0xffff), GPR64sp:$addr), |
| 312 | (STXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; |
| 313 | def : Pat<(stxr_4 (and GPR64:$val, 0xffffffff), GPR64sp:$addr), |
| 314 | (STXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; |
| 315 | |
| 316 | // Store-release-exclusives. |
| 317 | |
| 318 | def stlxr_1 : PatFrag<(ops node:$val, node:$ptr), |
| 319 | (int_aarch64_stlxr node:$val, node:$ptr), [{ |
| 320 | return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8; |
| 321 | }]>; |
| 322 | |
| 323 | def stlxr_2 : PatFrag<(ops node:$val, node:$ptr), |
| 324 | (int_aarch64_stlxr node:$val, node:$ptr), [{ |
| 325 | return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16; |
| 326 | }]>; |
| 327 | |
| 328 | def stlxr_4 : PatFrag<(ops node:$val, node:$ptr), |
| 329 | (int_aarch64_stlxr node:$val, node:$ptr), [{ |
| 330 | return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32; |
| 331 | }]>; |
| 332 | |
| 333 | def stlxr_8 : PatFrag<(ops node:$val, node:$ptr), |
| 334 | (int_aarch64_stlxr node:$val, node:$ptr), [{ |
| 335 | return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64; |
| 336 | }]>; |
| 337 | |
| 338 | |
| 339 | def : Pat<(stlxr_1 GPR64:$val, GPR64sp:$addr), |
| 340 | (STLXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; |
| 341 | def : Pat<(stlxr_2 GPR64:$val, GPR64sp:$addr), |
| 342 | (STLXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; |
| 343 | def : Pat<(stlxr_4 GPR64:$val, GPR64sp:$addr), |
| 344 | (STLXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; |
| 345 | def : Pat<(stlxr_8 GPR64:$val, GPR64sp:$addr), |
| 346 | (STLXRX GPR64:$val, GPR64sp:$addr)>; |
| 347 | |
| 348 | def : Pat<(stlxr_1 (zext (and GPR32:$val, 0xff)), GPR64sp:$addr), |
| 349 | (STLXRB GPR32:$val, GPR64sp:$addr)>; |
| 350 | def : Pat<(stlxr_2 (zext (and GPR32:$val, 0xffff)), GPR64sp:$addr), |
| 351 | (STLXRH GPR32:$val, GPR64sp:$addr)>; |
| 352 | def : Pat<(stlxr_4 (zext GPR32:$val), GPR64sp:$addr), |
| 353 | (STLXRW GPR32:$val, GPR64sp:$addr)>; |
| 354 | |
| 355 | def : Pat<(stlxr_1 (and GPR64:$val, 0xff), GPR64sp:$addr), |
| 356 | (STLXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; |
| 357 | def : Pat<(stlxr_2 (and GPR64:$val, 0xffff), GPR64sp:$addr), |
| 358 | (STLXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; |
| 359 | def : Pat<(stlxr_4 (and GPR64:$val, 0xffffffff), GPR64sp:$addr), |
| 360 | (STLXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; |
| 361 | |
| 362 | |
| 363 | // And clear exclusive. |
| 364 | |
| 365 | def : Pat<(int_aarch64_clrex), (CLREX 0xf)>; |
| Tim Northover | cdf1529 | 2016-04-14 17:03:29 +0000 | [diff] [blame] | 366 | |
| 367 | //===---------------------------------- |
| 368 | // Atomic cmpxchg for -O0 |
| 369 | //===---------------------------------- |
| 370 | |
| 371 | // The fast register allocator used during -O0 inserts spills to cover any VRegs |
| 372 | // live across basic block boundaries. When this happens between an LDXR and an |
| 373 | // STXR it can clear the exclusive monitor, causing all cmpxchg attempts to |
| 374 | // fail. |
| 375 | |
| 376 | // Unfortunately, this means we have to have an alternative (expanded |
| 377 | // post-regalloc) path for -O0 compilations. Fortunately this path can be |
| 378 | // significantly more naive than the standard expansion: we conservatively |
| 379 | // assume seq_cst, strong cmpxchg and omit clrex on failure. |
| 380 | |
| Tim Northover | 1021d89 | 2016-08-02 20:22:36 +0000 | [diff] [blame] | 381 | let Constraints = "@earlyclobber $Rd,@earlyclobber $scratch", |
| Tim Northover | cdf1529 | 2016-04-14 17:03:29 +0000 | [diff] [blame] | 382 | mayLoad = 1, mayStore = 1 in { |
| Tim Northover | 1021d89 | 2016-08-02 20:22:36 +0000 | [diff] [blame] | 383 | def CMP_SWAP_8 : Pseudo<(outs GPR32:$Rd, GPR32:$scratch), |
| Tim Northover | cdf1529 | 2016-04-14 17:03:29 +0000 | [diff] [blame] | 384 | (ins GPR64:$addr, GPR32:$desired, GPR32:$new), []>, |
| 385 | Sched<[WriteAtomic]>; |
| 386 | |
| Tim Northover | 1021d89 | 2016-08-02 20:22:36 +0000 | [diff] [blame] | 387 | def CMP_SWAP_16 : Pseudo<(outs GPR32:$Rd, GPR32:$scratch), |
| Tim Northover | cdf1529 | 2016-04-14 17:03:29 +0000 | [diff] [blame] | 388 | (ins GPR64:$addr, GPR32:$desired, GPR32:$new), []>, |
| 389 | Sched<[WriteAtomic]>; |
| 390 | |
| Tim Northover | 1021d89 | 2016-08-02 20:22:36 +0000 | [diff] [blame] | 391 | def CMP_SWAP_32 : Pseudo<(outs GPR32:$Rd, GPR32:$scratch), |
| Tim Northover | cdf1529 | 2016-04-14 17:03:29 +0000 | [diff] [blame] | 392 | (ins GPR64:$addr, GPR32:$desired, GPR32:$new), []>, |
| 393 | Sched<[WriteAtomic]>; |
| 394 | |
| Tim Northover | 1021d89 | 2016-08-02 20:22:36 +0000 | [diff] [blame] | 395 | def CMP_SWAP_64 : Pseudo<(outs GPR64:$Rd, GPR32:$scratch), |
| Tim Northover | cdf1529 | 2016-04-14 17:03:29 +0000 | [diff] [blame] | 396 | (ins GPR64:$addr, GPR64:$desired, GPR64:$new), []>, |
| 397 | Sched<[WriteAtomic]>; |
| 398 | } |
| 399 | |
| Tim Northover | 1021d89 | 2016-08-02 20:22:36 +0000 | [diff] [blame] | 400 | let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi,@earlyclobber $scratch", |
| Tim Northover | cdf1529 | 2016-04-14 17:03:29 +0000 | [diff] [blame] | 401 | mayLoad = 1, mayStore = 1 in |
| Tim Northover | 1021d89 | 2016-08-02 20:22:36 +0000 | [diff] [blame] | 402 | def CMP_SWAP_128 : Pseudo<(outs GPR64:$RdLo, GPR64:$RdHi, GPR32:$scratch), |
| Tim Northover | cdf1529 | 2016-04-14 17:03:29 +0000 | [diff] [blame] | 403 | (ins GPR64:$addr, GPR64:$desiredLo, GPR64:$desiredHi, |
| 404 | GPR64:$newLo, GPR64:$newHi), []>, |
| 405 | Sched<[WriteAtomic]>; |
| Christof Douma | c1c2805 | 2017-06-21 10:58:31 +0000 | [diff] [blame] | 406 | |
| 407 | // v8.1 Atomic instructions: |
| Joel Jones | 60711ca | 2017-08-05 04:30:55 +0000 | [diff] [blame] | 408 | let Predicates = [HasLSE] in { |
| 409 | defm : LDOPregister_patterns<"LDADD", "atomic_load_add">; |
| 410 | defm : LDOPregister_patterns<"LDSET", "atomic_load_or">; |
| 411 | defm : LDOPregister_patterns<"LDEOR", "atomic_load_xor">; |
| Oliver Stannard | 02f08c9 | 2018-02-12 17:03:11 +0000 | [diff] [blame] | 412 | defm : LDOPregister_patterns<"LDCLR", "atomic_load_clr">; |
| Joel Jones | 60711ca | 2017-08-05 04:30:55 +0000 | [diff] [blame] | 413 | defm : LDOPregister_patterns<"LDSMAX", "atomic_load_max">; |
| 414 | defm : LDOPregister_patterns<"LDSMIN", "atomic_load_min">; |
| 415 | defm : LDOPregister_patterns<"LDUMAX", "atomic_load_umax">; |
| 416 | defm : LDOPregister_patterns<"LDUMIN", "atomic_load_umin">; |
| 417 | defm : LDOPregister_patterns<"SWP", "atomic_swap">; |
| Oliver Stannard | 02f08c9 | 2018-02-12 17:03:11 +0000 | [diff] [blame] | 418 | defm : CASregister_patterns<"CAS", "atomic_cmp_swap">; |
| 419 | |
| 420 | // These two patterns are only needed for global isel, selection dag isel |
| 421 | // converts atomic load-sub into a sub and atomic load-add, and likewise for |
| 422 | // and -> clr. |
| Joel Jones | 60711ca | 2017-08-05 04:30:55 +0000 | [diff] [blame] | 423 | defm : LDOPregister_patterns_mod<"LDADD", "atomic_load_sub", "SUB">; |
| 424 | defm : LDOPregister_patterns_mod<"LDCLR", "atomic_load_and", "ORN">; |
| Joel Jones | 60711ca | 2017-08-05 04:30:55 +0000 | [diff] [blame] | 425 | } |
| Christof Douma | c1c2805 | 2017-06-21 10:58:31 +0000 | [diff] [blame] | 426 | |