[WebAssembly] Support for binary atomic RMW instructions

Summary:
This adds support for binary atomic read-modify-write instructions:
add, sub, and, or, xor, and xchg.

This does not yet support translations of some of LLVM IR atomicrmw
instructions (nand, max, min, umax, and umin) that do not have a direct
counterpart in wasm instructions.

Reviewers: dschuff

Subscribers: sbc100, jgravelle-google, sunfish, llvm-commits

Differential Revision: https://reviews.llvm.org/D49088

llvm-svn: 336615
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index 8575601..af4ebd5 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -128,6 +128,30 @@
   case WebAssembly::ATOMIC_STORE8_I32_S:
   case WebAssembly::ATOMIC_STORE8_I64:
   case WebAssembly::ATOMIC_STORE8_I64_S:
+  case WebAssembly::ATOMIC_RMW8_U_ADD_I32:
+  case WebAssembly::ATOMIC_RMW8_U_ADD_I32_S:
+  case WebAssembly::ATOMIC_RMW8_U_ADD_I64:
+  case WebAssembly::ATOMIC_RMW8_U_ADD_I64_S:
+  case WebAssembly::ATOMIC_RMW8_U_SUB_I32:
+  case WebAssembly::ATOMIC_RMW8_U_SUB_I32_S:
+  case WebAssembly::ATOMIC_RMW8_U_SUB_I64:
+  case WebAssembly::ATOMIC_RMW8_U_SUB_I64_S:
+  case WebAssembly::ATOMIC_RMW8_U_AND_I32:
+  case WebAssembly::ATOMIC_RMW8_U_AND_I32_S:
+  case WebAssembly::ATOMIC_RMW8_U_AND_I64:
+  case WebAssembly::ATOMIC_RMW8_U_AND_I64_S:
+  case WebAssembly::ATOMIC_RMW8_U_OR_I32:
+  case WebAssembly::ATOMIC_RMW8_U_OR_I32_S:
+  case WebAssembly::ATOMIC_RMW8_U_OR_I64:
+  case WebAssembly::ATOMIC_RMW8_U_OR_I64_S:
+  case WebAssembly::ATOMIC_RMW8_U_XOR_I32:
+  case WebAssembly::ATOMIC_RMW8_U_XOR_I32_S:
+  case WebAssembly::ATOMIC_RMW8_U_XOR_I64:
+  case WebAssembly::ATOMIC_RMW8_U_XOR_I64_S:
+  case WebAssembly::ATOMIC_RMW8_U_XCHG_I32:
+  case WebAssembly::ATOMIC_RMW8_U_XCHG_I32_S:
+  case WebAssembly::ATOMIC_RMW8_U_XCHG_I64:
+  case WebAssembly::ATOMIC_RMW8_U_XCHG_I64_S:
     return 0;
   case WebAssembly::LOAD16_S_I32:
   case WebAssembly::LOAD16_S_I32_S:
@@ -149,6 +173,30 @@
   case WebAssembly::ATOMIC_STORE16_I32_S:
   case WebAssembly::ATOMIC_STORE16_I64:
   case WebAssembly::ATOMIC_STORE16_I64_S:
+  case WebAssembly::ATOMIC_RMW16_U_ADD_I32:
+  case WebAssembly::ATOMIC_RMW16_U_ADD_I32_S:
+  case WebAssembly::ATOMIC_RMW16_U_ADD_I64:
+  case WebAssembly::ATOMIC_RMW16_U_ADD_I64_S:
+  case WebAssembly::ATOMIC_RMW16_U_SUB_I32:
+  case WebAssembly::ATOMIC_RMW16_U_SUB_I32_S:
+  case WebAssembly::ATOMIC_RMW16_U_SUB_I64:
+  case WebAssembly::ATOMIC_RMW16_U_SUB_I64_S:
+  case WebAssembly::ATOMIC_RMW16_U_AND_I32:
+  case WebAssembly::ATOMIC_RMW16_U_AND_I32_S:
+  case WebAssembly::ATOMIC_RMW16_U_AND_I64:
+  case WebAssembly::ATOMIC_RMW16_U_AND_I64_S:
+  case WebAssembly::ATOMIC_RMW16_U_OR_I32:
+  case WebAssembly::ATOMIC_RMW16_U_OR_I32_S:
+  case WebAssembly::ATOMIC_RMW16_U_OR_I64:
+  case WebAssembly::ATOMIC_RMW16_U_OR_I64_S:
+  case WebAssembly::ATOMIC_RMW16_U_XOR_I32:
+  case WebAssembly::ATOMIC_RMW16_U_XOR_I32_S:
+  case WebAssembly::ATOMIC_RMW16_U_XOR_I64:
+  case WebAssembly::ATOMIC_RMW16_U_XOR_I64_S:
+  case WebAssembly::ATOMIC_RMW16_U_XCHG_I32:
+  case WebAssembly::ATOMIC_RMW16_U_XCHG_I32_S:
+  case WebAssembly::ATOMIC_RMW16_U_XCHG_I64:
+  case WebAssembly::ATOMIC_RMW16_U_XCHG_I64_S:
     return 1;
   case WebAssembly::LOAD_I32:
   case WebAssembly::LOAD_I32_S:
@@ -172,6 +220,30 @@
   case WebAssembly::ATOMIC_STORE_I32_S:
   case WebAssembly::ATOMIC_STORE32_I64:
   case WebAssembly::ATOMIC_STORE32_I64_S:
+  case WebAssembly::ATOMIC_RMW_ADD_I32:
+  case WebAssembly::ATOMIC_RMW_ADD_I32_S:
+  case WebAssembly::ATOMIC_RMW32_U_ADD_I64:
+  case WebAssembly::ATOMIC_RMW32_U_ADD_I64_S:
+  case WebAssembly::ATOMIC_RMW_SUB_I32:
+  case WebAssembly::ATOMIC_RMW_SUB_I32_S:
+  case WebAssembly::ATOMIC_RMW32_U_SUB_I64:
+  case WebAssembly::ATOMIC_RMW32_U_SUB_I64_S:
+  case WebAssembly::ATOMIC_RMW_AND_I32:
+  case WebAssembly::ATOMIC_RMW_AND_I32_S:
+  case WebAssembly::ATOMIC_RMW32_U_AND_I64:
+  case WebAssembly::ATOMIC_RMW32_U_AND_I64_S:
+  case WebAssembly::ATOMIC_RMW_OR_I32:
+  case WebAssembly::ATOMIC_RMW_OR_I32_S:
+  case WebAssembly::ATOMIC_RMW32_U_OR_I64:
+  case WebAssembly::ATOMIC_RMW32_U_OR_I64_S:
+  case WebAssembly::ATOMIC_RMW_XOR_I32:
+  case WebAssembly::ATOMIC_RMW_XOR_I32_S:
+  case WebAssembly::ATOMIC_RMW32_U_XOR_I64:
+  case WebAssembly::ATOMIC_RMW32_U_XOR_I64_S:
+  case WebAssembly::ATOMIC_RMW_XCHG_I32:
+  case WebAssembly::ATOMIC_RMW_XCHG_I32_S:
+  case WebAssembly::ATOMIC_RMW32_U_XCHG_I64:
+  case WebAssembly::ATOMIC_RMW32_U_XCHG_I64_S:
     return 2;
   case WebAssembly::LOAD_I64:
   case WebAssembly::LOAD_I64_S:
@@ -185,6 +257,18 @@
   case WebAssembly::ATOMIC_LOAD_I64_S:
   case WebAssembly::ATOMIC_STORE_I64:
   case WebAssembly::ATOMIC_STORE_I64_S:
+  case WebAssembly::ATOMIC_RMW_ADD_I64:
+  case WebAssembly::ATOMIC_RMW_ADD_I64_S:
+  case WebAssembly::ATOMIC_RMW_SUB_I64:
+  case WebAssembly::ATOMIC_RMW_SUB_I64_S:
+  case WebAssembly::ATOMIC_RMW_AND_I64:
+  case WebAssembly::ATOMIC_RMW_AND_I64_S:
+  case WebAssembly::ATOMIC_RMW_OR_I64:
+  case WebAssembly::ATOMIC_RMW_OR_I64_S:
+  case WebAssembly::ATOMIC_RMW_XOR_I64:
+  case WebAssembly::ATOMIC_RMW_XOR_I64_S:
+  case WebAssembly::ATOMIC_RMW_XCHG_I64:
+  case WebAssembly::ATOMIC_RMW_XCHG_I64_S:
     return 3;
   default:
     llvm_unreachable("Only loads and stores have p2align values");
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
index cac651d..d879932 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
@@ -330,16 +330,328 @@
 } // Predicates = [HasAtomics]
 
 //===----------------------------------------------------------------------===//
-// Low-level exclusive operations
+// Atomic binary read-modify-writes
 //===----------------------------------------------------------------------===//
 
-// TODO: add exclusive operations here...
+let Defs = [ARGUMENTS] in {
 
-// Load-exclusives.
+multiclass WebAssemblyBinRMW<WebAssemblyRegClass rc, string Name, int Opcode> {
+  defm "" : I<(outs rc:$dst),
+              (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$val),
+              (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+              !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}, $val"),
+              !strconcat(Name, "\t${off}, ${p2align}"), Opcode>;
+}
 
-// Store-exclusives.
+defm ATOMIC_RMW_ADD_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.add", 0xfe1e>;
+defm ATOMIC_RMW_ADD_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.add", 0xfe1f>;
+defm ATOMIC_RMW8_U_ADD_I32 :
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw8_u.add", 0xfe20>;
+defm ATOMIC_RMW16_U_ADD_I32 :
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw16_u.add", 0xfe21>;
+defm ATOMIC_RMW8_U_ADD_I64 :
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw8_u.add", 0xfe22>;
+defm ATOMIC_RMW16_U_ADD_I64 :
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw16_u.add", 0xfe23>;
+defm ATOMIC_RMW32_U_ADD_I64 :
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw32_u.add", 0xfe24>;
 
-// Store-release-exclusives.
+defm ATOMIC_RMW_SUB_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.sub", 0xfe25>;
+defm ATOMIC_RMW_SUB_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.sub", 0xfe26>;
+defm ATOMIC_RMW8_U_SUB_I32 :
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw8_u.sub", 0xfe27>;
+defm ATOMIC_RMW16_U_SUB_I32 :
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw16_u.sub", 0xfe28>;
+defm ATOMIC_RMW8_U_SUB_I64 :
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw8_u.sub", 0xfe29>;
+defm ATOMIC_RMW16_U_SUB_I64 :
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw16_u.sub", 0xfe2a>;
+defm ATOMIC_RMW32_U_SUB_I64 :
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw32_u.sub", 0xfe2b>;
 
-// And clear exclusive.
+defm ATOMIC_RMW_AND_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.and", 0xfe2c>;
+defm ATOMIC_RMW_AND_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.and", 0xfe2d>;
+defm ATOMIC_RMW8_U_AND_I32 :
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw8_u.and", 0xfe2e>;
+defm ATOMIC_RMW16_U_AND_I32 :
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw16_u.and", 0xfe2f>;
+defm ATOMIC_RMW8_U_AND_I64 :
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw8_u.and", 0xfe30>;
+defm ATOMIC_RMW16_U_AND_I64 :
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw16_u.and", 0xfe31>;
+defm ATOMIC_RMW32_U_AND_I64 :
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw32_u.and", 0xfe32>;
 
+defm ATOMIC_RMW_OR_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.or", 0xfe33>;
+defm ATOMIC_RMW_OR_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.or", 0xfe34>;
+defm ATOMIC_RMW8_U_OR_I32 :
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw8_u.or", 0xfe35>;
+defm ATOMIC_RMW16_U_OR_I32 :
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw16_u.or", 0xfe36>;
+defm ATOMIC_RMW8_U_OR_I64 :
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw8_u.or", 0xfe37>;
+defm ATOMIC_RMW16_U_OR_I64 :
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw16_u.or", 0xfe38>;
+defm ATOMIC_RMW32_U_OR_I64 :
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw32_u.or", 0xfe39>;
+
+defm ATOMIC_RMW_XOR_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.xor", 0xfe3a>;
+defm ATOMIC_RMW_XOR_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.xor", 0xfe3b>;
+defm ATOMIC_RMW8_U_XOR_I32 :
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw8_u.xor", 0xfe3c>;
+defm ATOMIC_RMW16_U_XOR_I32 :
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw16_u.xor", 0xfe3d>;
+defm ATOMIC_RMW8_U_XOR_I64 :
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw8_u.xor", 0xfe3e>;
+defm ATOMIC_RMW16_U_XOR_I64 :
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw16_u.xor", 0xfe3f>;
+defm ATOMIC_RMW32_U_XOR_I64 :
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw32_u.xor", 0xfe40>;
+
+defm ATOMIC_RMW_XCHG_I32 :
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw.xchg", 0xfe41>;
+defm ATOMIC_RMW_XCHG_I64 :
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw.xchg", 0xfe42>;
+defm ATOMIC_RMW8_U_XCHG_I32 :
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw8_u.xchg", 0xfe43>;
+defm ATOMIC_RMW16_U_XCHG_I32 :
+  WebAssemblyBinRMW<I32, "i32.atomic.rmw16_u.xchg", 0xfe44>;
+defm ATOMIC_RMW8_U_XCHG_I64 :
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw8_u.xchg", 0xfe45>;
+defm ATOMIC_RMW16_U_XCHG_I64 :
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw16_u.xchg", 0xfe46>;
+defm ATOMIC_RMW32_U_XCHG_I64 :
+  WebAssemblyBinRMW<I64, "i64.atomic.rmw32_u.xchg", 0xfe47>;
+}
+
+// Select binary RMWs with no constant offset.
+class BinRMWPatNoOffset<ValueType ty, PatFrag kind, NI inst> :
+  Pat<(ty (kind I32:$addr, ty:$val)), (inst 0, 0, I32:$addr, ty:$val)>;
+
+// Select binary RMWs with a constant offset.
+
+// Pattern with address + immediate offset
+class BinRMWPatImmOff<ValueType ty, PatFrag kind, PatFrag operand, NI inst> :
+  Pat<(ty (kind (operand I32:$addr, imm:$off), ty:$val)),
+      (inst 0, imm:$off, I32:$addr, ty:$val)>;
+
+class BinRMWPatGlobalAddr<ValueType ty, PatFrag kind, NI inst> :
+  Pat<(ty (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)),
+                ty:$val)),
+      (inst 0, tglobaladdr:$off, I32:$addr, ty:$val)>;
+
+class BinRMWPatExternalSym<ValueType ty, PatFrag kind, NI inst> :
+  Pat<(ty (kind (add I32:$addr, (WebAssemblywrapper texternalsym:$off)),
+                ty:$val)),
+      (inst 0, texternalsym:$off, I32:$addr, ty:$val)>;
+
+// Select binary RMWs with just a constant offset.
+class BinRMWPatOffsetOnly<ValueType ty, PatFrag kind, NI inst> :
+  Pat<(ty (kind imm:$off, ty:$val)),
+      (inst 0, imm:$off, (CONST_I32 0), ty:$val)>;
+
+class BinRMWPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> :
+  Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$val)),
+      (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>;
+
+class BinRMWPatExternSymOffOnly<ValueType ty, PatFrag kind, NI inst> :
+  Pat<(ty (kind (WebAssemblywrapper texternalsym:$off), ty:$val)),
+      (inst 0, texternalsym:$off, (CONST_I32 0), ty:$val)>;
+
+// Patterns for various addressing modes.
+multiclass BinRMWPattern<PatFrag rmw_32, PatFrag rmw_64, NI inst_32,
+                         NI inst_64> {
+  def : BinRMWPatNoOffset<i32, rmw_32, inst_32>;
+  def : BinRMWPatNoOffset<i64, rmw_64, inst_64>;
+
+  def : BinRMWPatImmOff<i32, rmw_32, regPlusImm, inst_32>;
+  def : BinRMWPatImmOff<i64, rmw_64, regPlusImm, inst_64>;
+  def : BinRMWPatImmOff<i32, rmw_32, or_is_add, inst_32>;
+  def : BinRMWPatImmOff<i64, rmw_64, or_is_add, inst_64>;
+
+  def : BinRMWPatGlobalAddr<i32, rmw_32, inst_32>;
+  def : BinRMWPatGlobalAddr<i64, rmw_64, inst_64>;
+
+  def : BinRMWPatExternalSym<i32, rmw_32, inst_32>;
+  def : BinRMWPatExternalSym<i64, rmw_64, inst_64>;
+
+  def : BinRMWPatOffsetOnly<i32, rmw_32, inst_32>;
+  def : BinRMWPatOffsetOnly<i64, rmw_64, inst_64>;
+
+  def : BinRMWPatGlobalAddrOffOnly<i32, rmw_32, inst_32>;
+  def : BinRMWPatGlobalAddrOffOnly<i64, rmw_64, inst_64>;
+
+  def : BinRMWPatExternSymOffOnly<i32, rmw_32, inst_32>;
+  def : BinRMWPatExternSymOffOnly<i64, rmw_64, inst_64>;
+}
+
+let Predicates = [HasAtomics] in {
+defm : BinRMWPattern<atomic_load_add_32, atomic_load_add_64, ATOMIC_RMW_ADD_I32,
+                     ATOMIC_RMW_ADD_I64>;
+defm : BinRMWPattern<atomic_load_sub_32, atomic_load_sub_64, ATOMIC_RMW_SUB_I32,
+                     ATOMIC_RMW_SUB_I64>;
+defm : BinRMWPattern<atomic_load_and_32, atomic_load_and_64, ATOMIC_RMW_AND_I32,
+                     ATOMIC_RMW_AND_I64>;
+defm : BinRMWPattern<atomic_load_or_32, atomic_load_or_64, ATOMIC_RMW_OR_I32,
+                     ATOMIC_RMW_OR_I64>;
+defm : BinRMWPattern<atomic_load_xor_32, atomic_load_xor_64, ATOMIC_RMW_XOR_I32,
+                     ATOMIC_RMW_XOR_I64>;
+defm : BinRMWPattern<atomic_swap_32, atomic_swap_64, ATOMIC_RMW_XCHG_I32,
+                     ATOMIC_RMW_XCHG_I64>;
+} // Predicates = [HasAtomics]
+
+// Truncating & zero-extending binary RMW patterns.
+// These are combined patterns of truncating store patterns and zero-extending
+// load patterns above.
+class zext_bin_rmw_8_32<PatFrag kind> :
+  PatFrag<(ops node:$addr, node:$val),
+          (and (i32 (kind node:$addr, node:$val)), 255)>;
+class zext_bin_rmw_16_32<PatFrag kind> :
+  PatFrag<(ops node:$addr, node:$val),
+          (and (i32 (kind node:$addr, node:$val)), 65535)>;
+class zext_bin_rmw_8_64<PatFrag kind> :
+  PatFrag<(ops node:$addr, node:$val),
+    (and (i64 (anyext (i32 (kind node:$addr,
+                                 (i32 (trunc (i64 node:$val))))))), 255)>;
+class zext_bin_rmw_16_64<PatFrag kind> :
+  PatFrag<(ops node:$addr, node:$val),
+    (and (i64 (anyext (i32 (kind node:$addr,
+                                 (i32 (trunc (i64 node:$val))))))), 65535)>;
+class zext_bin_rmw_32_64<PatFrag kind> :
+  PatFrag<(ops node:$addr, node:$val),
+          (zext (i32 (kind node:$addr, (i32 (trunc (i64 node:$val))))))>;
+
+// Truncating & sign-extending binary RMW patterns.
+// These are combined patterns of truncating store patterns and sign-extending
+// load patterns above. We match subword RMWs (for 32-bit) and anyext RMWs (for
+// 64-bit) and select a zext RMW; the next instruction will be sext_inreg which
+// is selected by itself.
+class sext_bin_rmw_8_32<PatFrag kind> :
+  PatFrag<(ops node:$addr, node:$val), (kind node:$addr, node:$val)>;
+class sext_bin_rmw_16_32<PatFrag kind> : sext_bin_rmw_8_32<kind>;
+class sext_bin_rmw_8_64<PatFrag kind> :
+  PatFrag<(ops node:$addr, node:$val),
+          (anyext (i32 (kind node:$addr, (i32 (trunc (i64 node:$val))))))>;
+class sext_bin_rmw_16_64<PatFrag kind> : sext_bin_rmw_8_64<kind>;
+// 32->64 sext RMW gets selected as i32.atomic.rmw.***, i64.extend_s/i32
+
+// Patterns for various addressing modes for truncating-extending binary RMWs.
+multiclass BinRMWTruncExtPattern<
+  PatFrag rmw_8, PatFrag rmw_16, PatFrag rmw_32, PatFrag rmw_64,
+  NI inst8_32, NI inst16_32, NI inst8_64, NI inst16_64, NI inst32_64> {
+  // Truncating-extending binary RMWs with no constant offset
+  def : BinRMWPatNoOffset<i32, zext_bin_rmw_8_32<rmw_8>, inst8_32>;
+  def : BinRMWPatNoOffset<i32, zext_bin_rmw_16_32<rmw_16>, inst16_32>;
+  def : BinRMWPatNoOffset<i64, zext_bin_rmw_8_64<rmw_8>, inst8_64>;
+  def : BinRMWPatNoOffset<i64, zext_bin_rmw_16_64<rmw_16>, inst16_64>;
+  def : BinRMWPatNoOffset<i64, zext_bin_rmw_32_64<rmw_32>, inst32_64>;
+
+  def : BinRMWPatNoOffset<i32, sext_bin_rmw_8_32<rmw_8>, inst8_32>;
+  def : BinRMWPatNoOffset<i32, sext_bin_rmw_16_32<rmw_16>, inst16_32>;
+  def : BinRMWPatNoOffset<i64, sext_bin_rmw_8_64<rmw_8>, inst8_64>;
+  def : BinRMWPatNoOffset<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>;
+
+  // Truncating-extending binary RMWs with a constant offset
+  def : BinRMWPatImmOff<i32, zext_bin_rmw_8_32<rmw_8>, regPlusImm, inst8_32>;
+  def : BinRMWPatImmOff<i32, zext_bin_rmw_16_32<rmw_16>, regPlusImm, inst16_32>;
+  def : BinRMWPatImmOff<i64, zext_bin_rmw_8_64<rmw_8>, regPlusImm, inst8_64>;
+  def : BinRMWPatImmOff<i64, zext_bin_rmw_16_64<rmw_16>, regPlusImm, inst16_64>;
+  def : BinRMWPatImmOff<i64, zext_bin_rmw_32_64<rmw_32>, regPlusImm, inst32_64>;
+  def : BinRMWPatImmOff<i32, zext_bin_rmw_8_32<rmw_8>, or_is_add, inst8_32>;
+  def : BinRMWPatImmOff<i32, zext_bin_rmw_16_32<rmw_16>, or_is_add, inst16_32>;
+  def : BinRMWPatImmOff<i64, zext_bin_rmw_8_64<rmw_8>, or_is_add, inst8_64>;
+  def : BinRMWPatImmOff<i64, zext_bin_rmw_16_64<rmw_16>, or_is_add, inst16_64>;
+  def : BinRMWPatImmOff<i64, zext_bin_rmw_32_64<rmw_32>, or_is_add, inst32_64>;
+
+  def : BinRMWPatImmOff<i32, sext_bin_rmw_8_32<rmw_8>, regPlusImm, inst8_32>;
+  def : BinRMWPatImmOff<i32, sext_bin_rmw_16_32<rmw_16>, regPlusImm, inst16_32>;
+  def : BinRMWPatImmOff<i64, sext_bin_rmw_8_64<rmw_8>, regPlusImm, inst8_64>;
+  def : BinRMWPatImmOff<i64, sext_bin_rmw_16_64<rmw_16>, regPlusImm, inst16_64>;
+  def : BinRMWPatImmOff<i32, sext_bin_rmw_8_32<rmw_8>, or_is_add, inst8_32>;
+  def : BinRMWPatImmOff<i32, sext_bin_rmw_16_32<rmw_16>, or_is_add, inst16_32>;
+  def : BinRMWPatImmOff<i64, sext_bin_rmw_8_64<rmw_8>, or_is_add, inst8_64>;
+  def : BinRMWPatImmOff<i64, sext_bin_rmw_16_64<rmw_16>, or_is_add, inst16_64>;
+
+  def : BinRMWPatGlobalAddr<i32, zext_bin_rmw_8_32<rmw_8>, inst8_32>;
+  def : BinRMWPatGlobalAddr<i32, zext_bin_rmw_16_32<rmw_16>, inst16_32>;
+  def : BinRMWPatGlobalAddr<i64, zext_bin_rmw_8_64<rmw_8>, inst8_64>;
+  def : BinRMWPatGlobalAddr<i64, zext_bin_rmw_16_64<rmw_16>, inst16_64>;
+  def : BinRMWPatGlobalAddr<i64, zext_bin_rmw_32_64<rmw_32>, inst32_64>;
+
+  def : BinRMWPatGlobalAddr<i32, sext_bin_rmw_8_32<rmw_8>, inst8_32>;
+  def : BinRMWPatGlobalAddr<i32, sext_bin_rmw_16_32<rmw_16>, inst16_32>;
+  def : BinRMWPatGlobalAddr<i64, sext_bin_rmw_8_64<rmw_8>, inst8_64>;
+  def : BinRMWPatGlobalAddr<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>;
+
+  def : BinRMWPatExternalSym<i32, zext_bin_rmw_8_32<rmw_8>, inst8_32>;
+  def : BinRMWPatExternalSym<i32, zext_bin_rmw_16_32<rmw_16>, inst16_32>;
+  def : BinRMWPatExternalSym<i64, zext_bin_rmw_8_64<rmw_8>, inst8_64>;
+  def : BinRMWPatExternalSym<i64, zext_bin_rmw_16_64<rmw_16>, inst16_64>;
+  def : BinRMWPatExternalSym<i64, zext_bin_rmw_32_64<rmw_32>, inst32_64>;
+
+  def : BinRMWPatExternalSym<i32, sext_bin_rmw_8_32<rmw_8>, inst8_32>;
+  def : BinRMWPatExternalSym<i32, sext_bin_rmw_16_32<rmw_16>, inst16_32>;
+  def : BinRMWPatExternalSym<i64, sext_bin_rmw_8_64<rmw_8>, inst8_64>;
+  def : BinRMWPatExternalSym<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>;
+
+  // Truncating-extending binary RMWs with just a constant offset
+  def : BinRMWPatOffsetOnly<i32, zext_bin_rmw_8_32<rmw_8>, inst8_32>;
+  def : BinRMWPatOffsetOnly<i32, zext_bin_rmw_16_32<rmw_16>, inst16_32>;
+  def : BinRMWPatOffsetOnly<i64, zext_bin_rmw_8_64<rmw_8>, inst8_64>;
+  def : BinRMWPatOffsetOnly<i64, zext_bin_rmw_16_64<rmw_16>, inst16_64>;
+  def : BinRMWPatOffsetOnly<i64, zext_bin_rmw_32_64<rmw_32>, inst32_64>;
+
+  def : BinRMWPatOffsetOnly<i32, sext_bin_rmw_8_32<rmw_8>, inst8_32>;
+  def : BinRMWPatOffsetOnly<i32, sext_bin_rmw_16_32<rmw_16>, inst16_32>;
+  def : BinRMWPatOffsetOnly<i64, sext_bin_rmw_8_64<rmw_8>, inst8_64>;
+  def : BinRMWPatOffsetOnly<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>;
+
+  def : BinRMWPatGlobalAddrOffOnly<i32, zext_bin_rmw_8_32<rmw_8>, inst8_32>;
+  def : BinRMWPatGlobalAddrOffOnly<i32, zext_bin_rmw_16_32<rmw_16>, inst16_32>;
+  def : BinRMWPatGlobalAddrOffOnly<i64, zext_bin_rmw_8_64<rmw_8>, inst8_64>;
+  def : BinRMWPatGlobalAddrOffOnly<i64, zext_bin_rmw_16_64<rmw_16>, inst16_64>;
+  def : BinRMWPatGlobalAddrOffOnly<i64, zext_bin_rmw_32_64<rmw_32>, inst32_64>;
+
+  def : BinRMWPatGlobalAddrOffOnly<i32, sext_bin_rmw_8_32<rmw_8>, inst8_32>;
+  def : BinRMWPatGlobalAddrOffOnly<i32, sext_bin_rmw_16_32<rmw_16>, inst16_32>;
+  def : BinRMWPatGlobalAddrOffOnly<i64, sext_bin_rmw_8_64<rmw_8>, inst8_64>;
+  def : BinRMWPatGlobalAddrOffOnly<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>;
+
+  def : BinRMWPatExternSymOffOnly<i32, zext_bin_rmw_8_32<rmw_8>, inst8_32>;
+  def : BinRMWPatExternSymOffOnly<i32, zext_bin_rmw_16_32<rmw_16>, inst16_32>;
+  def : BinRMWPatExternSymOffOnly<i64, zext_bin_rmw_8_64<rmw_8>, inst8_64>;
+  def : BinRMWPatExternSymOffOnly<i64, zext_bin_rmw_16_64<rmw_16>, inst16_64>;
+  def : BinRMWPatExternSymOffOnly<i64, zext_bin_rmw_32_64<rmw_32>, inst32_64>;
+
+  def : BinRMWPatExternSymOffOnly<i32, sext_bin_rmw_8_32<rmw_8>, inst8_32>;
+  def : BinRMWPatExternSymOffOnly<i32, sext_bin_rmw_16_32<rmw_16>, inst16_32>;
+  def : BinRMWPatExternSymOffOnly<i64, sext_bin_rmw_8_64<rmw_8>, inst8_64>;
+  def : BinRMWPatExternSymOffOnly<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>;
+}
+
+let Predicates = [HasAtomics] in {
+defm : BinRMWTruncExtPattern<
+  atomic_load_add_8, atomic_load_add_16, atomic_load_add_32, atomic_load_add_64,
+  ATOMIC_RMW8_U_ADD_I32, ATOMIC_RMW16_U_ADD_I32,
+  ATOMIC_RMW8_U_ADD_I64, ATOMIC_RMW16_U_ADD_I64, ATOMIC_RMW32_U_ADD_I64>;
+defm : BinRMWTruncExtPattern<
+  atomic_load_sub_8, atomic_load_sub_16, atomic_load_sub_32, atomic_load_sub_64,
+  ATOMIC_RMW8_U_SUB_I32, ATOMIC_RMW16_U_SUB_I32,
+  ATOMIC_RMW8_U_SUB_I64, ATOMIC_RMW16_U_SUB_I64, ATOMIC_RMW32_U_SUB_I64>;
+defm : BinRMWTruncExtPattern<
+  atomic_load_and_8, atomic_load_and_16, atomic_load_and_32, atomic_load_and_64,
+  ATOMIC_RMW8_U_AND_I32, ATOMIC_RMW16_U_AND_I32,
+  ATOMIC_RMW8_U_AND_I64, ATOMIC_RMW16_U_AND_I64, ATOMIC_RMW32_U_AND_I64>;
+defm : BinRMWTruncExtPattern<
+  atomic_load_or_8, atomic_load_or_16, atomic_load_or_32, atomic_load_or_64,
+  ATOMIC_RMW8_U_OR_I32, ATOMIC_RMW16_U_OR_I32,
+  ATOMIC_RMW8_U_OR_I64, ATOMIC_RMW16_U_OR_I64, ATOMIC_RMW32_U_OR_I64>;
+defm : BinRMWTruncExtPattern<
+  atomic_load_xor_8, atomic_load_xor_16, atomic_load_xor_32, atomic_load_xor_64,
+  ATOMIC_RMW8_U_XOR_I32, ATOMIC_RMW16_U_XOR_I32,
+  ATOMIC_RMW8_U_XOR_I64, ATOMIC_RMW16_U_XOR_I64, ATOMIC_RMW32_U_XOR_I64>;
+defm : BinRMWTruncExtPattern<
+  atomic_swap_8, atomic_swap_16, atomic_swap_32, atomic_swap_64,
+  ATOMIC_RMW8_U_XCHG_I32, ATOMIC_RMW16_U_XCHG_I32,
+  ATOMIC_RMW8_U_XCHG_I64, ATOMIC_RMW16_U_XCHG_I64, ATOMIC_RMW32_U_XCHG_I64>;
+} // Predicates = [HasAtomics]
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
index 6f0ae89..1422199 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
@@ -107,6 +107,48 @@
       case WebAssembly::ATOMIC_LOAD8_U_I64:
       case WebAssembly::ATOMIC_LOAD16_U_I64:
       case WebAssembly::ATOMIC_LOAD32_U_I64:
+      case WebAssembly::ATOMIC_RMW8_U_ADD_I32:
+      case WebAssembly::ATOMIC_RMW8_U_ADD_I64:
+      case WebAssembly::ATOMIC_RMW8_U_SUB_I32:
+      case WebAssembly::ATOMIC_RMW8_U_SUB_I64:
+      case WebAssembly::ATOMIC_RMW8_U_AND_I32:
+      case WebAssembly::ATOMIC_RMW8_U_AND_I64:
+      case WebAssembly::ATOMIC_RMW8_U_OR_I32:
+      case WebAssembly::ATOMIC_RMW8_U_OR_I64:
+      case WebAssembly::ATOMIC_RMW8_U_XOR_I32:
+      case WebAssembly::ATOMIC_RMW8_U_XOR_I64:
+      case WebAssembly::ATOMIC_RMW8_U_XCHG_I32:
+      case WebAssembly::ATOMIC_RMW8_U_XCHG_I64:
+      case WebAssembly::ATOMIC_RMW16_U_ADD_I32:
+      case WebAssembly::ATOMIC_RMW16_U_ADD_I64:
+      case WebAssembly::ATOMIC_RMW16_U_SUB_I32:
+      case WebAssembly::ATOMIC_RMW16_U_SUB_I64:
+      case WebAssembly::ATOMIC_RMW16_U_AND_I32:
+      case WebAssembly::ATOMIC_RMW16_U_AND_I64:
+      case WebAssembly::ATOMIC_RMW16_U_OR_I32:
+      case WebAssembly::ATOMIC_RMW16_U_OR_I64:
+      case WebAssembly::ATOMIC_RMW16_U_XOR_I32:
+      case WebAssembly::ATOMIC_RMW16_U_XOR_I64:
+      case WebAssembly::ATOMIC_RMW16_U_XCHG_I32:
+      case WebAssembly::ATOMIC_RMW16_U_XCHG_I64:
+      case WebAssembly::ATOMIC_RMW_ADD_I32:
+      case WebAssembly::ATOMIC_RMW32_U_ADD_I64:
+      case WebAssembly::ATOMIC_RMW_SUB_I32:
+      case WebAssembly::ATOMIC_RMW32_U_SUB_I64:
+      case WebAssembly::ATOMIC_RMW_AND_I32:
+      case WebAssembly::ATOMIC_RMW32_U_AND_I64:
+      case WebAssembly::ATOMIC_RMW_OR_I32:
+      case WebAssembly::ATOMIC_RMW32_U_OR_I64:
+      case WebAssembly::ATOMIC_RMW_XOR_I32:
+      case WebAssembly::ATOMIC_RMW32_U_XOR_I64:
+      case WebAssembly::ATOMIC_RMW_XCHG_I32:
+      case WebAssembly::ATOMIC_RMW32_U_XCHG_I64:
+      case WebAssembly::ATOMIC_RMW_ADD_I64:
+      case WebAssembly::ATOMIC_RMW_SUB_I64:
+      case WebAssembly::ATOMIC_RMW_AND_I64:
+      case WebAssembly::ATOMIC_RMW_OR_I64:
+      case WebAssembly::ATOMIC_RMW_XOR_I64:
+      case WebAssembly::ATOMIC_RMW_XCHG_I64:
         RewriteP2Align(MI, WebAssembly::LoadP2AlignOperandNo);
         break;
       case WebAssembly::STORE_I32: