[WebAssembly] Reorder load/store operands to match binary encoding.

The p2align operand of a load/store is encoded before the offset
operand; reorder the MachineInstr operands accordingly.

llvm-svn: 285044
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
index a282f86..4c9e30e 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@@ -56,611 +56,613 @@
 let Defs = [ARGUMENTS] in {
 
 // Basic load.
-def LOAD_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr,
-                                   P2Align:$p2align), [],
-                 "i32.load\t$dst, ${off}(${addr})${p2align}", 0x28>;
-def LOAD_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
-                                   P2Align:$p2align), [],
-                 "i64.load\t$dst, ${off}(${addr})${p2align}", 0x29>;
-def LOAD_F32 : I<(outs F32:$dst), (ins i32imm:$off, I32:$addr,
-                                   P2Align:$p2align), [],
-                 "f32.load\t$dst, ${off}(${addr})${p2align}", 0x2a>;
-def LOAD_F64 : I<(outs F64:$dst), (ins i32imm:$off, I32:$addr,
-                                   P2Align:$p2align), [],
-                 "f64.load\t$dst, ${off}(${addr})${p2align}", 0x2b>;
+// FIXME: When we can break syntax compatibility, reorder the fields in the
+// asmstrings to match the binary encoding.
+def LOAD_I32 : I<(outs I32:$dst),
+                 (ins P2Align:$p2align, i32imm:$off, I32:$addr),
+                 [], "i32.load\t$dst, ${off}(${addr})${p2align}", 0x28>;
+def LOAD_I64 : I<(outs I64:$dst),
+                 (ins P2Align:$p2align, i32imm:$off, I32:$addr),
+                 [], "i64.load\t$dst, ${off}(${addr})${p2align}", 0x29>;
+def LOAD_F32 : I<(outs F32:$dst),
+                 (ins P2Align:$p2align, i32imm:$off, I32:$addr),
+                 [], "f32.load\t$dst, ${off}(${addr})${p2align}", 0x2a>;
+def LOAD_F64 : I<(outs F64:$dst),
+                 (ins P2Align:$p2align, i32imm:$off, I32:$addr),
+                 [], "f64.load\t$dst, ${off}(${addr})${p2align}", 0x2b>;
 
 } // Defs = [ARGUMENTS]
 
 // Select loads with no constant offset.
-def : Pat<(i32 (load I32:$addr)), (LOAD_I32 0, $addr, 0)>;
-def : Pat<(i64 (load I32:$addr)), (LOAD_I64 0, $addr, 0)>;
-def : Pat<(f32 (load I32:$addr)), (LOAD_F32 0, $addr, 0)>;
-def : Pat<(f64 (load I32:$addr)), (LOAD_F64 0, $addr, 0)>;
+def : Pat<(i32 (load I32:$addr)), (LOAD_I32 0, 0, $addr)>;
+def : Pat<(i64 (load I32:$addr)), (LOAD_I64 0, 0, $addr)>;
+def : Pat<(f32 (load I32:$addr)), (LOAD_F32 0, 0, $addr)>;
+def : Pat<(f64 (load I32:$addr)), (LOAD_F64 0, 0, $addr)>;
 
 // Select loads with a constant offset.
 def : Pat<(i32 (load (regPlusImm I32:$addr, imm:$off))),
-          (LOAD_I32 imm:$off, $addr, 0)>;
+          (LOAD_I32 0, imm:$off, $addr)>;
 def : Pat<(i64 (load (regPlusImm I32:$addr, imm:$off))),
-          (LOAD_I64 imm:$off, $addr, 0)>;
+          (LOAD_I64 0, imm:$off, $addr)>;
 def : Pat<(f32 (load (regPlusImm I32:$addr, imm:$off))),
-          (LOAD_F32 imm:$off, $addr, 0)>;
+          (LOAD_F32 0, imm:$off, $addr)>;
 def : Pat<(f64 (load (regPlusImm I32:$addr, imm:$off))),
-          (LOAD_F64 imm:$off, $addr, 0)>;
+          (LOAD_F64 0, imm:$off, $addr)>;
 def : Pat<(i32 (load (or_is_add I32:$addr, imm:$off))),
-          (LOAD_I32 imm:$off, $addr, 0)>;
+          (LOAD_I32 0, imm:$off, $addr)>;
 def : Pat<(i64 (load (or_is_add I32:$addr, imm:$off))),
-          (LOAD_I64 imm:$off, $addr, 0)>;
+          (LOAD_I64 0, imm:$off, $addr)>;
 def : Pat<(f32 (load (or_is_add I32:$addr, imm:$off))),
-          (LOAD_F32 imm:$off, $addr, 0)>;
+          (LOAD_F32 0, imm:$off, $addr)>;
 def : Pat<(f64 (load (or_is_add I32:$addr, imm:$off))),
-          (LOAD_F64 imm:$off, $addr, 0)>;
+          (LOAD_F64 0, imm:$off, $addr)>;
 def : Pat<(i32 (load (regPlusGA I32:$addr,
                                 (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD_I32 tglobaladdr:$off, $addr, 0)>;
+          (LOAD_I32 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i64 (load (regPlusGA I32:$addr,
                                 (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD_I64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD_I64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(f32 (load (regPlusGA I32:$addr,
                                 (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD_F32 tglobaladdr:$off, $addr, 0)>;
+          (LOAD_F32 0, tglobaladdr:$off, $addr)>;
 def : Pat<(f64 (load (regPlusGA I32:$addr,
                                 (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD_F64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD_F64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i32 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD_I32 texternalsym:$off, $addr, 0)>;
+          (LOAD_I32 0, texternalsym:$off, $addr)>;
 def : Pat<(i64 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD_I64 texternalsym:$off, $addr, 0)>;
+          (LOAD_I64 0, texternalsym:$off, $addr)>;
 def : Pat<(f32 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD_F32 texternalsym:$off, $addr, 0)>;
+          (LOAD_F32 0, texternalsym:$off, $addr)>;
 def : Pat<(f64 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD_F64 texternalsym:$off, $addr, 0)>;
+          (LOAD_F64 0, texternalsym:$off, $addr)>;
 
 // Select loads with just a constant offset.
-def : Pat<(i32 (load imm:$off)), (LOAD_I32 imm:$off, (CONST_I32 0), 0)>;
-def : Pat<(i64 (load imm:$off)), (LOAD_I64 imm:$off, (CONST_I32 0), 0)>;
-def : Pat<(f32 (load imm:$off)), (LOAD_F32 imm:$off, (CONST_I32 0), 0)>;
-def : Pat<(f64 (load imm:$off)), (LOAD_F64 imm:$off, (CONST_I32 0), 0)>;
+def : Pat<(i32 (load imm:$off)), (LOAD_I32 0, imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (load imm:$off)), (LOAD_I64 0, imm:$off, (CONST_I32 0))>;
+def : Pat<(f32 (load imm:$off)), (LOAD_F32 0, imm:$off, (CONST_I32 0))>;
+def : Pat<(f64 (load imm:$off)), (LOAD_F64 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i32 (load (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD_I32 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD_I32 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i64 (load (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(f32 (load (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD_F32 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD_F32 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(f64 (load (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD_F64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD_F64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i32 (load (WebAssemblywrapper texternalsym:$off))),
-          (LOAD_I32 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD_I32 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i64 (load (WebAssemblywrapper texternalsym:$off))),
-          (LOAD_I64 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD_I64 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(f32 (load (WebAssemblywrapper texternalsym:$off))),
-          (LOAD_F32 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD_F32 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(f64 (load (WebAssemblywrapper texternalsym:$off))),
-          (LOAD_F64 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD_F64 0, texternalsym:$off, (CONST_I32 0))>;
 
 let Defs = [ARGUMENTS] in {
 
 // Extending load.
-def LOAD8_S_I32  : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr,
-                                       P2Align:$p2align), [],
-                     "i32.load8_s\t$dst, ${off}(${addr})${p2align}", 0x2c>;
-def LOAD8_U_I32  : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr,
-                                       P2Align:$p2align), [],
-                     "i32.load8_u\t$dst, ${off}(${addr})${p2align}", 0x2d>;
-def LOAD16_S_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr,
-                                       P2Align:$p2align), [],
-                     "i32.load16_s\t$dst, ${off}(${addr})${p2align}", 0x2e>;
-def LOAD16_U_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr,
-                                       P2Align:$p2align), [],
-                     "i32.load16_u\t$dst, ${off}(${addr})${p2align}", 0x2f>;
-def LOAD8_S_I64  : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
-                                       P2Align:$p2align), [],
-                     "i64.load8_s\t$dst, ${off}(${addr})${p2align}", 0x30>;
-def LOAD8_U_I64  : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
-                                       P2Align:$p2align), [],
-                     "i64.load8_u\t$dst, ${off}(${addr})${p2align}", 0x31>;
-def LOAD16_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
-                                       P2Align:$p2align), [],
-                     "i64.load16_s\t$dst, ${off}(${addr})${p2align}", 0x32>;
-def LOAD16_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
-                                       P2Align:$p2align), [],
-                     "i64.load16_u\t$dst, ${off}(${addr})${p2align}", 0x33>;
-def LOAD32_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
-                                       P2Align:$p2align), [],
-                     "i64.load32_s\t$dst, ${off}(${addr})${p2align}", 0x34>;
-def LOAD32_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
-                                       P2Align:$p2align), [],
-                     "i64.load32_u\t$dst, ${off}(${addr})${p2align}", 0x35>;
+def LOAD8_S_I32  : I<(outs I32:$dst),
+                     (ins P2Align:$p2align, i32imm:$off, I32:$addr),
+                     [], "i32.load8_s\t$dst, ${off}(${addr})${p2align}", 0x2c>;
+def LOAD8_U_I32  : I<(outs I32:$dst),
+                     (ins P2Align:$p2align, i32imm:$off, I32:$addr),
+                     [], "i32.load8_u\t$dst, ${off}(${addr})${p2align}", 0x2d>;
+def LOAD16_S_I32 : I<(outs I32:$dst),
+                     (ins P2Align:$p2align, i32imm:$off, I32:$addr),
+                     [], "i32.load16_s\t$dst, ${off}(${addr})${p2align}", 0x2e>;
+def LOAD16_U_I32 : I<(outs I32:$dst),
+                     (ins P2Align:$p2align, i32imm:$off, I32:$addr),
+                     [], "i32.load16_u\t$dst, ${off}(${addr})${p2align}", 0x2f>;
+def LOAD8_S_I64  : I<(outs I64:$dst),
+                     (ins P2Align:$p2align, i32imm:$off, I32:$addr),
+                     [], "i64.load8_s\t$dst, ${off}(${addr})${p2align}", 0x30>;
+def LOAD8_U_I64  : I<(outs I64:$dst),
+                     (ins P2Align:$p2align, i32imm:$off, I32:$addr),
+                     [], "i64.load8_u\t$dst, ${off}(${addr})${p2align}", 0x31>;
+def LOAD16_S_I64 : I<(outs I64:$dst),
+                     (ins P2Align:$p2align, i32imm:$off, I32:$addr),
+                     [], "i64.load16_s\t$dst, ${off}(${addr})${p2align}", 0x32>;
+def LOAD16_U_I64 : I<(outs I64:$dst),
+                     (ins P2Align:$p2align, i32imm:$off, I32:$addr),
+                     [], "i64.load16_u\t$dst, ${off}(${addr})${p2align}", 0x33>;
+def LOAD32_S_I64 : I<(outs I64:$dst),
+                     (ins P2Align:$p2align, i32imm:$off, I32:$addr),
+                     [], "i64.load32_s\t$dst, ${off}(${addr})${p2align}", 0x34>;
+def LOAD32_U_I64 : I<(outs I64:$dst),
+                     (ins P2Align:$p2align, i32imm:$off, I32:$addr),
+                     [], "i64.load32_u\t$dst, ${off}(${addr})${p2align}", 0x35>;
 
 } // Defs = [ARGUMENTS]
 
 // Select extending loads with no constant offset.
-def : Pat<(i32 (sextloadi8 I32:$addr)), (LOAD8_S_I32 0, $addr, 0)>;
-def : Pat<(i32 (zextloadi8 I32:$addr)), (LOAD8_U_I32 0, $addr, 0)>;
-def : Pat<(i32 (sextloadi16 I32:$addr)), (LOAD16_S_I32 0, $addr, 0)>;
-def : Pat<(i32 (zextloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr, 0)>;
-def : Pat<(i64 (sextloadi8 I32:$addr)), (LOAD8_S_I64 0, $addr, 0)>;
-def : Pat<(i64 (zextloadi8 I32:$addr)), (LOAD8_U_I64 0, $addr, 0)>;
-def : Pat<(i64 (sextloadi16 I32:$addr)), (LOAD16_S_I64 0, $addr, 0)>;
-def : Pat<(i64 (zextloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr, 0)>;
-def : Pat<(i64 (sextloadi32 I32:$addr)), (LOAD32_S_I64 0, $addr, 0)>;
-def : Pat<(i64 (zextloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr, 0)>;
+def : Pat<(i32 (sextloadi8 I32:$addr)), (LOAD8_S_I32 0, 0, $addr)>;
+def : Pat<(i32 (zextloadi8 I32:$addr)), (LOAD8_U_I32 0, 0, $addr)>;
+def : Pat<(i32 (sextloadi16 I32:$addr)), (LOAD16_S_I32 0, 0, $addr)>;
+def : Pat<(i32 (zextloadi16 I32:$addr)), (LOAD16_U_I32 0, 0, $addr)>;
+def : Pat<(i64 (sextloadi8 I32:$addr)), (LOAD8_S_I64 0, 0, $addr)>;
+def : Pat<(i64 (zextloadi8 I32:$addr)), (LOAD8_U_I64 0, 0, $addr)>;
+def : Pat<(i64 (sextloadi16 I32:$addr)), (LOAD16_S_I64 0, 0, $addr)>;
+def : Pat<(i64 (zextloadi16 I32:$addr)), (LOAD16_U_I64 0, 0, $addr)>;
+def : Pat<(i64 (sextloadi32 I32:$addr)), (LOAD32_S_I64 0, 0, $addr)>;
+def : Pat<(i64 (zextloadi32 I32:$addr)), (LOAD32_U_I64 0, 0, $addr)>;
 
 // Select extending loads with a constant offset.
 def : Pat<(i32 (sextloadi8 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD8_S_I32 imm:$off, $addr, 0)>;
+          (LOAD8_S_I32 0, imm:$off, $addr)>;
 def : Pat<(i32 (zextloadi8 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD8_U_I32 imm:$off, $addr, 0)>;
+          (LOAD8_U_I32 0, imm:$off, $addr)>;
 def : Pat<(i32 (sextloadi16 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD16_S_I32 imm:$off, $addr, 0)>;
+          (LOAD16_S_I32 0, imm:$off, $addr)>;
 def : Pat<(i32 (zextloadi16 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD16_U_I32 imm:$off, $addr, 0)>;
+          (LOAD16_U_I32 0, imm:$off, $addr)>;
 def : Pat<(i64 (sextloadi8 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD8_S_I64 imm:$off, $addr, 0)>;
+          (LOAD8_S_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (zextloadi8 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD8_U_I64 imm:$off, $addr, 0)>;
+          (LOAD8_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (sextloadi16 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD16_S_I64 imm:$off, $addr, 0)>;
+          (LOAD16_S_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (zextloadi16 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD16_U_I64 imm:$off, $addr, 0)>;
+          (LOAD16_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (sextloadi32 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD32_S_I64 imm:$off, $addr, 0)>;
+          (LOAD32_S_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (zextloadi32 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD32_U_I64 imm:$off, $addr, 0)>;
+          (LOAD32_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i32 (sextloadi8 (or_is_add I32:$addr, imm:$off))),
-          (LOAD8_S_I32 imm:$off, $addr, 0)>;
+          (LOAD8_S_I32 0, imm:$off, $addr)>;
 def : Pat<(i32 (zextloadi8 (or_is_add I32:$addr, imm:$off))),
-          (LOAD8_U_I32 imm:$off, $addr, 0)>;
+          (LOAD8_U_I32 0, imm:$off, $addr)>;
 def : Pat<(i32 (sextloadi16 (or_is_add I32:$addr, imm:$off))),
-          (LOAD16_S_I32 imm:$off, $addr, 0)>;
+          (LOAD16_S_I32 0, imm:$off, $addr)>;
 def : Pat<(i32 (zextloadi16 (or_is_add I32:$addr, imm:$off))),
-          (LOAD16_U_I32 imm:$off, $addr, 0)>;
+          (LOAD16_U_I32 0, imm:$off, $addr)>;
 def : Pat<(i64 (sextloadi8 (or_is_add I32:$addr, imm:$off))),
-          (LOAD8_S_I64 imm:$off, $addr, 0)>;
+          (LOAD8_S_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (zextloadi8 (or_is_add I32:$addr, imm:$off))),
-          (LOAD8_U_I64 imm:$off, $addr, 0)>;
+          (LOAD8_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (sextloadi16 (or_is_add I32:$addr, imm:$off))),
-          (LOAD16_S_I64 imm:$off, $addr, 0)>;
+          (LOAD16_S_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (zextloadi16 (or_is_add I32:$addr, imm:$off))),
-          (LOAD16_U_I64 imm:$off, $addr, 0)>;
+          (LOAD16_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (sextloadi32 (or_is_add I32:$addr, imm:$off))),
-          (LOAD32_S_I64 imm:$off, $addr, 0)>;
+          (LOAD32_S_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (zextloadi32 (or_is_add I32:$addr, imm:$off))),
-          (LOAD32_U_I64 imm:$off, $addr, 0)>;
+          (LOAD32_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i32 (sextloadi8 (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD8_S_I32 tglobaladdr:$off, $addr, 0)>;
+          (LOAD8_S_I32 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i32 (zextloadi8 (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD8_U_I32 tglobaladdr:$off, $addr, 0)>;
+          (LOAD8_U_I32 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i32 (sextloadi16 (regPlusGA I32:$addr,
                                        (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD16_S_I32 tglobaladdr:$off, $addr, 0)>;
+          (LOAD16_S_I32 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i32 (zextloadi16 (regPlusGA I32:$addr,
                                        (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD16_U_I32 tglobaladdr:$off, $addr, 0)>;
+          (LOAD16_U_I32 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i64 (sextloadi8 (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD8_S_I64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD8_S_I64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i64 (zextloadi8 (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD8_U_I64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD8_U_I64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i64 (sextloadi16 (regPlusGA I32:$addr,
                                        (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD16_S_I64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD16_S_I64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i64 (zextloadi16 (regPlusGA I32:$addr,
                                        (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD16_U_I64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD16_U_I64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i64 (sextloadi32 (regPlusGA I32:$addr,
                                        (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD32_S_I64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD32_S_I64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i64 (zextloadi32 (regPlusGA I32:$addr,
                                        (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD32_U_I64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD32_U_I64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i32 (sextloadi8 (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD8_S_I32 texternalsym:$off, $addr, 0)>;
+          (LOAD8_S_I32 0, texternalsym:$off, $addr)>;
 def : Pat<(i32 (zextloadi8 (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD8_U_I32 texternalsym:$off, $addr, 0)>;
+          (LOAD8_U_I32 0, texternalsym:$off, $addr)>;
 def : Pat<(i32 (sextloadi16 (add I32:$addr,
                                  (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD16_S_I32 texternalsym:$off, $addr, 0)>;
+          (LOAD16_S_I32 0, texternalsym:$off, $addr)>;
 def : Pat<(i32 (zextloadi16 (add I32:$addr,
                                  (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD16_U_I32 texternalsym:$off, $addr, 0)>;
+          (LOAD16_U_I32 0, texternalsym:$off, $addr)>;
 def : Pat<(i64 (sextloadi8 (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD8_S_I64 texternalsym:$off, $addr, 0)>;
+          (LOAD8_S_I64 0, texternalsym:$off, $addr)>;
 def : Pat<(i64 (zextloadi8 (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD8_U_I64 texternalsym:$off, $addr, 0)>;
+          (LOAD8_U_I64 0, texternalsym:$off, $addr)>;
 def : Pat<(i64 (sextloadi16 (add I32:$addr,
                                  (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD16_S_I64 texternalsym:$off, $addr, 0)>;
+          (LOAD16_S_I64 0, texternalsym:$off, $addr)>;
 def : Pat<(i64 (zextloadi16 (add I32:$addr,
                                  (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD16_U_I64 texternalsym:$off, $addr, 0)>;
+          (LOAD16_U_I64 0, texternalsym:$off, $addr)>;
 def : Pat<(i64 (sextloadi32 (add I32:$addr,
                                  (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD32_S_I64 texternalsym:$off, $addr, 0)>;
+          (LOAD32_S_I64 0, texternalsym:$off, $addr)>;
 def : Pat<(i64 (zextloadi32 (add I32:$addr,
                                  (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD32_U_I64 texternalsym:$off, $addr, 0)>;
+          (LOAD32_U_I64 0, texternalsym:$off, $addr)>;
 
 // Select extending loads with just a constant offset.
 def : Pat<(i32 (sextloadi8 imm:$off)),
-          (LOAD8_S_I32 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD8_S_I32 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i32 (zextloadi8 imm:$off)),
-          (LOAD8_U_I32 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I32 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i32 (sextloadi16 imm:$off)),
-          (LOAD16_S_I32 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD16_S_I32 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i32 (zextloadi16 imm:$off)),
-          (LOAD16_U_I32 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I32 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i64 (sextloadi8 imm:$off)),
-          (LOAD8_S_I64 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD8_S_I64 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i64 (zextloadi8 imm:$off)),
-          (LOAD8_U_I64 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I64 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i64 (sextloadi16 imm:$off)),
-          (LOAD16_S_I64 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD16_S_I64 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i64 (zextloadi16 imm:$off)),
-          (LOAD16_U_I64 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I64 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i64 (sextloadi32 imm:$off)),
-          (LOAD32_S_I64 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD32_S_I64 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i64 (zextloadi32 imm:$off)),
-          (LOAD32_U_I64 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD32_U_I64 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i32 (sextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD8_S_I32 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD8_S_I32 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i32 (zextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I32 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i32 (sextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD16_S_I32 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD16_S_I32 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i32 (zextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I32 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i64 (sextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD8_S_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD8_S_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i64 (zextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i64 (sextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD16_S_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD16_S_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i64 (zextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i64 (sextloadi32 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD32_S_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD32_S_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i64 (zextloadi32 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD32_U_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i32 (sextloadi8 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD8_S_I32 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD8_S_I32 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i32 (zextloadi8 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I32 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i32 (sextloadi16 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD16_S_I32 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD16_S_I32 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i32 (zextloadi16 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I32 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i64 (sextloadi8 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD8_S_I64 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD8_S_I64 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i64 (zextloadi8 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I64 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i64 (sextloadi16 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD16_S_I64 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD16_S_I64 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i64 (zextloadi16 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I64 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i64 (sextloadi32 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD32_S_I64 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD32_S_I64 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i64 (zextloadi32 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD32_U_I64 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD32_U_I64 0, texternalsym:$off, (CONST_I32 0))>;
 
 // Resolve "don't care" extending loads to zero-extending loads. This is
 // somewhat arbitrary, but zero-extending is conceptually simpler.
 
 // Select "don't care" extending loads with no constant offset.
-def : Pat<(i32 (extloadi8 I32:$addr)),  (LOAD8_U_I32 0, $addr, 0)>;
-def : Pat<(i32 (extloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr, 0)>;
-def : Pat<(i64 (extloadi8 I32:$addr)),  (LOAD8_U_I64 0, $addr, 0)>;
-def : Pat<(i64 (extloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr, 0)>;
-def : Pat<(i64 (extloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr, 0)>;
+def : Pat<(i32 (extloadi8 I32:$addr)),  (LOAD8_U_I32 0, 0, $addr)>;
+def : Pat<(i32 (extloadi16 I32:$addr)), (LOAD16_U_I32 0, 0, $addr)>;
+def : Pat<(i64 (extloadi8 I32:$addr)),  (LOAD8_U_I64 0, 0, $addr)>;
+def : Pat<(i64 (extloadi16 I32:$addr)), (LOAD16_U_I64 0, 0, $addr)>;
+def : Pat<(i64 (extloadi32 I32:$addr)), (LOAD32_U_I64 0, 0, $addr)>;
 
 // Select "don't care" extending loads with a constant offset.
 def : Pat<(i32 (extloadi8 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD8_U_I32 imm:$off, $addr, 0)>;
+          (LOAD8_U_I32 0, imm:$off, $addr)>;
 def : Pat<(i32 (extloadi16 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD16_U_I32 imm:$off, $addr, 0)>;
+          (LOAD16_U_I32 0, imm:$off, $addr)>;
 def : Pat<(i64 (extloadi8 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD8_U_I64 imm:$off, $addr, 0)>;
+          (LOAD8_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (extloadi16 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD16_U_I64 imm:$off, $addr, 0)>;
+          (LOAD16_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (extloadi32 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD32_U_I64 imm:$off, $addr, 0)>;
+          (LOAD32_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i32 (extloadi8 (or_is_add I32:$addr, imm:$off))),
-          (LOAD8_U_I32 imm:$off, $addr, 0)>;
+          (LOAD8_U_I32 0, imm:$off, $addr)>;
 def : Pat<(i32 (extloadi16 (or_is_add I32:$addr, imm:$off))),
-          (LOAD16_U_I32 imm:$off, $addr, 0)>;
+          (LOAD16_U_I32 0, imm:$off, $addr)>;
 def : Pat<(i64 (extloadi8 (or_is_add I32:$addr, imm:$off))),
-          (LOAD8_U_I64 imm:$off, $addr, 0)>;
+          (LOAD8_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (extloadi16 (or_is_add I32:$addr, imm:$off))),
-          (LOAD16_U_I64 imm:$off, $addr, 0)>;
+          (LOAD16_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (extloadi32 (or_is_add I32:$addr, imm:$off))),
-          (LOAD32_U_I64 imm:$off, $addr, 0)>;
+          (LOAD32_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i32 (extloadi8 (regPlusGA I32:$addr,
                                      (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD8_U_I32 tglobaladdr:$off, $addr, 0)>;
+          (LOAD8_U_I32 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i32 (extloadi16 (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD16_U_I32 tglobaladdr:$off, $addr, 0)>;
+          (LOAD16_U_I32 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i64 (extloadi8 (regPlusGA I32:$addr,
                                      (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD8_U_I64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD8_U_I64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i64 (extloadi16 (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD16_U_I64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD16_U_I64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i64 (extloadi32 (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD32_U_I64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD32_U_I64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i32 (extloadi8 (add I32:$addr,
                                (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD8_U_I32 texternalsym:$off, $addr, 0)>;
+          (LOAD8_U_I32 0, texternalsym:$off, $addr)>;
 def : Pat<(i32 (extloadi16 (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD16_U_I32 texternalsym:$off, $addr, 0)>;
+          (LOAD16_U_I32 0, texternalsym:$off, $addr)>;
 def : Pat<(i64 (extloadi8 (add I32:$addr,
                                (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD8_U_I64 texternalsym:$off, $addr, 0)>;
+          (LOAD8_U_I64 0, texternalsym:$off, $addr)>;
 def : Pat<(i64 (extloadi16 (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD16_U_I64 texternalsym:$off, $addr, 0)>;
+          (LOAD16_U_I64 0, texternalsym:$off, $addr)>;
 def : Pat<(i64 (extloadi32 (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD32_U_I64 texternalsym:$off, $addr, 0)>;
+          (LOAD32_U_I64 0, texternalsym:$off, $addr)>;
 
 // Select "don't care" extending loads with just a constant offset.
 def : Pat<(i32 (extloadi8 imm:$off)),
-          (LOAD8_U_I32 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I32 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i32 (extloadi16 imm:$off)),
-          (LOAD16_U_I32 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I32 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i64 (extloadi8 imm:$off)),
-          (LOAD8_U_I64 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I64 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i64 (extloadi16 imm:$off)),
-          (LOAD16_U_I64 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I64 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i64 (extloadi32 imm:$off)),
-          (LOAD32_U_I64 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD32_U_I64 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i32 (extloadi8 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I32 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i32 (extloadi16 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I32 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i64 (extloadi8 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i64 (extloadi16 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i64 (extloadi32 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD32_U_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i32 (extloadi8 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I32 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i32 (extloadi16 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I32 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i64 (extloadi8 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I64 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i64 (extloadi16 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I64 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i64 (extloadi32 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD32_U_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 
 let Defs = [ARGUMENTS] in {
 
 // Basic store.
 // Note: WebAssembly inverts SelectionDAG's usual operand order.
-def STORE_I32  : I<(outs), (ins i32imm:$off, I32:$addr,
-                            P2Align:$p2align, I32:$val), [],
+def STORE_I32  : I<(outs), (ins P2Align:$p2align, i32imm:$off, I32:$addr,
+                            I32:$val), [],
                    "i32.store\t${off}(${addr})${p2align}, $val", 0x36>;
-def STORE_I64  : I<(outs), (ins i32imm:$off, I32:$addr,
-                            P2Align:$p2align, I64:$val), [],
+def STORE_I64  : I<(outs), (ins P2Align:$p2align, i32imm:$off, I32:$addr,
+                            I64:$val), [],
                    "i64.store\t${off}(${addr})${p2align}, $val", 0x37>;
-def STORE_F32  : I<(outs), (ins i32imm:$off, I32:$addr,
-                            P2Align:$p2align, F32:$val), [],
+def STORE_F32  : I<(outs), (ins P2Align:$p2align, i32imm:$off, I32:$addr,
+                            F32:$val), [],
                    "f32.store\t${off}(${addr})${p2align}, $val", 0x38>;
-def STORE_F64  : I<(outs), (ins i32imm:$off, I32:$addr,
-                            P2Align:$p2align, F64:$val), [],
+def STORE_F64  : I<(outs), (ins P2Align:$p2align, i32imm:$off, I32:$addr,
+                            F64:$val), [],
                    "f64.store\t${off}(${addr})${p2align}, $val", 0x39>;
 
 } // Defs = [ARGUMENTS]
 
 // Select stores with no constant offset.
-def : Pat<(store I32:$val, I32:$addr), (STORE_I32 0, I32:$addr, 0, I32:$val)>;
-def : Pat<(store I64:$val, I32:$addr), (STORE_I64 0, I32:$addr, 0, I64:$val)>;
-def : Pat<(store F32:$val, I32:$addr), (STORE_F32 0, I32:$addr, 0, F32:$val)>;
-def : Pat<(store F64:$val, I32:$addr), (STORE_F64 0, I32:$addr, 0, F64:$val)>;
+def : Pat<(store I32:$val, I32:$addr), (STORE_I32 0, 0, I32:$addr, I32:$val)>;
+def : Pat<(store I64:$val, I32:$addr), (STORE_I64 0, 0, I32:$addr, I64:$val)>;
+def : Pat<(store F32:$val, I32:$addr), (STORE_F32 0, 0, I32:$addr, F32:$val)>;
+def : Pat<(store F64:$val, I32:$addr), (STORE_F64 0, 0, I32:$addr, F64:$val)>;
 
 // Select stores with a constant offset.
 def : Pat<(store I32:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE_I32 imm:$off, I32:$addr, 0, I32:$val)>;
+          (STORE_I32 0, imm:$off, I32:$addr, I32:$val)>;
 def : Pat<(store I64:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE_I64 imm:$off, I32:$addr, 0, I64:$val)>;
+          (STORE_I64 0, imm:$off, I32:$addr, I64:$val)>;
 def : Pat<(store F32:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE_F32 imm:$off, I32:$addr, 0, F32:$val)>;
+          (STORE_F32 0, imm:$off, I32:$addr, F32:$val)>;
 def : Pat<(store F64:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE_F64 imm:$off, I32:$addr, 0, F64:$val)>;
+          (STORE_F64 0, imm:$off, I32:$addr, F64:$val)>;
 def : Pat<(store I32:$val, (or_is_add I32:$addr, imm:$off)),
-          (STORE_I32 imm:$off, I32:$addr, 0, I32:$val)>;
+          (STORE_I32 0, imm:$off, I32:$addr, I32:$val)>;
 def : Pat<(store I64:$val, (or_is_add I32:$addr, imm:$off)),
-          (STORE_I64 imm:$off, I32:$addr, 0, I64:$val)>;
+          (STORE_I64 0, imm:$off, I32:$addr, I64:$val)>;
 def : Pat<(store F32:$val, (or_is_add I32:$addr, imm:$off)),
-          (STORE_F32 imm:$off, I32:$addr, 0, F32:$val)>;
+          (STORE_F32 0, imm:$off, I32:$addr, F32:$val)>;
 def : Pat<(store F64:$val, (or_is_add I32:$addr, imm:$off)),
-          (STORE_F64 imm:$off, I32:$addr, 0, F64:$val)>;
+          (STORE_F64 0, imm:$off, I32:$addr, F64:$val)>;
 def : Pat<(store I32:$val, (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE_I32 tglobaladdr:$off, I32:$addr, 0, I32:$val)>;
+          (STORE_I32 0, tglobaladdr:$off, I32:$addr, I32:$val)>;
 def : Pat<(store I64:$val, (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE_I64 tglobaladdr:$off, I32:$addr, 0, I64:$val)>;
+          (STORE_I64 0, tglobaladdr:$off, I32:$addr, I64:$val)>;
 def : Pat<(store F32:$val, (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE_F32 tglobaladdr:$off, I32:$addr, 0, F32:$val)>;
+          (STORE_F32 0, tglobaladdr:$off, I32:$addr, F32:$val)>;
 def : Pat<(store F64:$val, (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE_F64 tglobaladdr:$off, I32:$addr, 0, F64:$val)>;
+          (STORE_F64 0, tglobaladdr:$off, I32:$addr, F64:$val)>;
 def : Pat<(store I32:$val, (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off))),
-          (STORE_I32 texternalsym:$off, I32:$addr, 0, I32:$val)>;
+          (STORE_I32 0, texternalsym:$off, I32:$addr, I32:$val)>;
 def : Pat<(store I64:$val, (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off))),
-          (STORE_I64 texternalsym:$off, I32:$addr, 0, I64:$val)>;
+          (STORE_I64 0, texternalsym:$off, I32:$addr, I64:$val)>;
 def : Pat<(store F32:$val, (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off))),
-          (STORE_F32 texternalsym:$off, I32:$addr, 0, F32:$val)>;
+          (STORE_F32 0, texternalsym:$off, I32:$addr, F32:$val)>;
 def : Pat<(store F64:$val, (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off))),
-          (STORE_F64 texternalsym:$off, I32:$addr, 0, F64:$val)>;
+          (STORE_F64 0, texternalsym:$off, I32:$addr, F64:$val)>;
 
 // Select stores with just a constant offset.
 def : Pat<(store I32:$val, imm:$off),
-          (STORE_I32 imm:$off, (CONST_I32 0), 0, I32:$val)>;
+          (STORE_I32 0, imm:$off, (CONST_I32 0), I32:$val)>;
 def : Pat<(store I64:$val, imm:$off),
-          (STORE_I64 imm:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE_I64 0, imm:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(store F32:$val, imm:$off),
-          (STORE_F32 imm:$off, (CONST_I32 0), 0, F32:$val)>;
+          (STORE_F32 0, imm:$off, (CONST_I32 0), F32:$val)>;
 def : Pat<(store F64:$val, imm:$off),
-          (STORE_F64 imm:$off, (CONST_I32 0), 0, F64:$val)>;
+          (STORE_F64 0, imm:$off, (CONST_I32 0), F64:$val)>;
 def : Pat<(store I32:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE_I32 tglobaladdr:$off, (CONST_I32 0), 0, I32:$val)>;
+          (STORE_I32 0, tglobaladdr:$off, (CONST_I32 0), I32:$val)>;
 def : Pat<(store I64:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE_I64 tglobaladdr:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE_I64 0, tglobaladdr:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(store F32:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE_F32 tglobaladdr:$off, (CONST_I32 0), 0, F32:$val)>;
+          (STORE_F32 0, tglobaladdr:$off, (CONST_I32 0), F32:$val)>;
 def : Pat<(store F64:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE_F64 tglobaladdr:$off, (CONST_I32 0), 0, F64:$val)>;
+          (STORE_F64 0, tglobaladdr:$off, (CONST_I32 0), F64:$val)>;
 def : Pat<(store I32:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE_I32 texternalsym:$off, (CONST_I32 0), 0, I32:$val)>;
+          (STORE_I32 0, texternalsym:$off, (CONST_I32 0), I32:$val)>;
 def : Pat<(store I64:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE_I64 texternalsym:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE_I64 0, texternalsym:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(store F32:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE_F32 texternalsym:$off, (CONST_I32 0), 0, F32:$val)>;
+          (STORE_F32 0, texternalsym:$off, (CONST_I32 0), F32:$val)>;
 def : Pat<(store F64:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE_F64 texternalsym:$off, (CONST_I32 0), 0, F64:$val)>;
+          (STORE_F64 0, texternalsym:$off, (CONST_I32 0), F64:$val)>;
 
 let Defs = [ARGUMENTS] in {
 
 // Truncating store.
-def STORE8_I32  : I<(outs), (ins i32imm:$off, I32:$addr,
-                             P2Align:$p2align, I32:$val), [],
+def STORE8_I32  : I<(outs), (ins P2Align:$p2align, i32imm:$off, I32:$addr,
+                             I32:$val), [],
                     "i32.store8\t${off}(${addr})${p2align}, $val", 0x3a>;
-def STORE16_I32 : I<(outs), (ins i32imm:$off, I32:$addr,
-                             P2Align:$p2align, I32:$val), [],
+def STORE16_I32 : I<(outs), (ins P2Align:$p2align, i32imm:$off, I32:$addr,
+                             I32:$val), [],
                     "i32.store16\t${off}(${addr})${p2align}, $val", 0x3b>;
-def STORE8_I64  : I<(outs), (ins i32imm:$off, I32:$addr,
-                             P2Align:$p2align, I64:$val), [],
+def STORE8_I64  : I<(outs), (ins P2Align:$p2align, i32imm:$off, I32:$addr,
+                             I64:$val), [],
                     "i64.store8\t${off}(${addr})${p2align}, $val", 0x3c>;
-def STORE16_I64 : I<(outs), (ins i32imm:$off, I32:$addr,
-                             P2Align:$p2align, I64:$val), [],
+def STORE16_I64 : I<(outs), (ins P2Align:$p2align, i32imm:$off, I32:$addr,
+                             I64:$val), [],
                     "i64.store16\t${off}(${addr})${p2align}, $val", 0x3d>;
-def STORE32_I64 : I<(outs), (ins i32imm:$off, I32:$addr,
-                             P2Align:$p2align, I64:$val), [],
+def STORE32_I64 : I<(outs), (ins P2Align:$p2align, i32imm:$off, I32:$addr,
+                             I64:$val), [],
                     "i64.store32\t${off}(${addr})${p2align}, $val", 0x3e>;
 
 } // Defs = [ARGUMENTS]
 
 // Select truncating stores with no constant offset.
 def : Pat<(truncstorei8 I32:$val, I32:$addr),
-          (STORE8_I32 0, I32:$addr, 0, I32:$val)>;
+          (STORE8_I32 0, 0, I32:$addr, I32:$val)>;
 def : Pat<(truncstorei16 I32:$val, I32:$addr),
-          (STORE16_I32 0, I32:$addr, 0, I32:$val)>;
+          (STORE16_I32 0, 0, I32:$addr, I32:$val)>;
 def : Pat<(truncstorei8 I64:$val, I32:$addr),
-          (STORE8_I64 0, I32:$addr, 0, I64:$val)>;
+          (STORE8_I64 0, 0, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei16 I64:$val, I32:$addr),
-          (STORE16_I64 0, I32:$addr, 0, I64:$val)>;
+          (STORE16_I64 0, 0, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei32 I64:$val, I32:$addr),
-          (STORE32_I64 0, I32:$addr, 0, I64:$val)>;
+          (STORE32_I64 0, 0, I32:$addr, I64:$val)>;
 
 // Select truncating stores with a constant offset.
 def : Pat<(truncstorei8 I32:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE8_I32 imm:$off, I32:$addr, 0, I32:$val)>;
+          (STORE8_I32 0, imm:$off, I32:$addr, I32:$val)>;
 def : Pat<(truncstorei16 I32:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE16_I32 imm:$off, I32:$addr, 0, I32:$val)>;
+          (STORE16_I32 0, imm:$off, I32:$addr, I32:$val)>;
 def : Pat<(truncstorei8 I64:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE8_I64 imm:$off, I32:$addr, 0, I64:$val)>;
+          (STORE8_I64 0, imm:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei16 I64:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE16_I64 imm:$off, I32:$addr, 0, I64:$val)>;
+          (STORE16_I64 0, imm:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei32 I64:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE32_I64 imm:$off, I32:$addr, 0, I64:$val)>;
+          (STORE32_I64 0, imm:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei8 I32:$val, (or_is_add I32:$addr, imm:$off)),
-          (STORE8_I32 imm:$off, I32:$addr, 0, I32:$val)>;
+          (STORE8_I32 0, imm:$off, I32:$addr, I32:$val)>;
 def : Pat<(truncstorei16 I32:$val, (or_is_add I32:$addr, imm:$off)),
-          (STORE16_I32 imm:$off, I32:$addr, 0, I32:$val)>;
+          (STORE16_I32 0, imm:$off, I32:$addr, I32:$val)>;
 def : Pat<(truncstorei8 I64:$val, (or_is_add I32:$addr, imm:$off)),
-          (STORE8_I64 imm:$off, I32:$addr, 0, I64:$val)>;
+          (STORE8_I64 0, imm:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei16 I64:$val, (or_is_add I32:$addr, imm:$off)),
-          (STORE16_I64 imm:$off, I32:$addr, 0, I64:$val)>;
+          (STORE16_I64 0, imm:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei32 I64:$val, (or_is_add I32:$addr, imm:$off)),
-          (STORE32_I64 imm:$off, I32:$addr, 0, I64:$val)>;
+          (STORE32_I64 0, imm:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei8 I32:$val,
                         (regPlusGA I32:$addr,
                                    (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE8_I32 tglobaladdr:$off, I32:$addr, 0, I32:$val)>;
+          (STORE8_I32 0, tglobaladdr:$off, I32:$addr, I32:$val)>;
 def : Pat<(truncstorei16 I32:$val,
                          (regPlusGA I32:$addr,
                                     (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE16_I32 tglobaladdr:$off, I32:$addr, 0, I32:$val)>;
+          (STORE16_I32 0, tglobaladdr:$off, I32:$addr, I32:$val)>;
 def : Pat<(truncstorei8 I64:$val,
                         (regPlusGA I32:$addr,
                                    (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE8_I64 tglobaladdr:$off, I32:$addr, 0, I64:$val)>;
+          (STORE8_I64 0, tglobaladdr:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei16 I64:$val,
                          (regPlusGA I32:$addr,
                                     (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE16_I64 tglobaladdr:$off, I32:$addr, 0, I64:$val)>;
+          (STORE16_I64 0, tglobaladdr:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei32 I64:$val,
                          (regPlusGA I32:$addr,
                                     (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE32_I64 tglobaladdr:$off, I32:$addr, 0, I64:$val)>;
+          (STORE32_I64 0, tglobaladdr:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei8 I32:$val, (add I32:$addr,
                                        (WebAssemblywrapper texternalsym:$off))),
-          (STORE8_I32 texternalsym:$off, I32:$addr, 0, I32:$val)>;
+          (STORE8_I32 0, texternalsym:$off, I32:$addr, I32:$val)>;
 def : Pat<(truncstorei16 I32:$val,
                          (add I32:$addr,
                               (WebAssemblywrapper texternalsym:$off))),
-          (STORE16_I32 texternalsym:$off, I32:$addr, 0, I32:$val)>;
+          (STORE16_I32 0, texternalsym:$off, I32:$addr, I32:$val)>;
 def : Pat<(truncstorei8 I64:$val,
                         (add I32:$addr,
                              (WebAssemblywrapper texternalsym:$off))),
-          (STORE8_I64 texternalsym:$off, I32:$addr, 0, I64:$val)>;
+          (STORE8_I64 0, texternalsym:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei16 I64:$val,
                          (add I32:$addr,
                               (WebAssemblywrapper texternalsym:$off))),
-          (STORE16_I64 texternalsym:$off, I32:$addr, 0, I64:$val)>;
+          (STORE16_I64 0, texternalsym:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei32 I64:$val,
                          (add I32:$addr,
                               (WebAssemblywrapper texternalsym:$off))),
-          (STORE32_I64 texternalsym:$off, I32:$addr, 0, I64:$val)>;
+          (STORE32_I64 0, texternalsym:$off, I32:$addr, I64:$val)>;
 
 // Select truncating stores with just a constant offset.
 def : Pat<(truncstorei8 I32:$val, imm:$off),
-          (STORE8_I32 imm:$off, (CONST_I32 0), 0, I32:$val)>;
+          (STORE8_I32 0, imm:$off, (CONST_I32 0), I32:$val)>;
 def : Pat<(truncstorei16 I32:$val, imm:$off),
-          (STORE16_I32 imm:$off, (CONST_I32 0), 0, I32:$val)>;
+          (STORE16_I32 0, imm:$off, (CONST_I32 0), I32:$val)>;
 def : Pat<(truncstorei8 I64:$val, imm:$off),
-          (STORE8_I64 imm:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE8_I64 0, imm:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(truncstorei16 I64:$val, imm:$off),
-          (STORE16_I64 imm:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE16_I64 0, imm:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(truncstorei32 I64:$val, imm:$off),
-          (STORE32_I64 imm:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE32_I64 0, imm:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(truncstorei8 I32:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE8_I32 tglobaladdr:$off, (CONST_I32 0), 0, I32:$val)>;
+          (STORE8_I32 0, tglobaladdr:$off, (CONST_I32 0), I32:$val)>;
 def : Pat<(truncstorei16 I32:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE16_I32 tglobaladdr:$off, (CONST_I32 0), 0, I32:$val)>;
+          (STORE16_I32 0, tglobaladdr:$off, (CONST_I32 0), I32:$val)>;
 def : Pat<(truncstorei8 I64:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE8_I64 tglobaladdr:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE8_I64 0, tglobaladdr:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(truncstorei16 I64:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE16_I64 tglobaladdr:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE16_I64 0, tglobaladdr:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(truncstorei32 I64:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE32_I64 tglobaladdr:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE32_I64 0, tglobaladdr:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(truncstorei8 I32:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE8_I32 texternalsym:$off, (CONST_I32 0), 0, I32:$val)>;
+          (STORE8_I32 0, texternalsym:$off, (CONST_I32 0), I32:$val)>;
 def : Pat<(truncstorei16 I32:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE16_I32 texternalsym:$off, (CONST_I32 0), 0, I32:$val)>;
+          (STORE16_I32 0, texternalsym:$off, (CONST_I32 0), I32:$val)>;
 def : Pat<(truncstorei8 I64:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE8_I64 texternalsym:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE8_I64 0, texternalsym:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(truncstorei16 I64:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE16_I64 texternalsym:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE16_I64 0, texternalsym:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(truncstorei32 I64:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE32_I64 texternalsym:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE32_I64 0, texternalsym:$off, (CONST_I32 0), I64:$val)>;
 
 let Defs = [ARGUMENTS] in {