Merge V8 5.2.361.47  DO NOT MERGE

https://chromium.googlesource.com/v8/v8/+/5.2.361.47

FPIIM-449

Change-Id: Ibec421b85a9b88cb3a432ada642e469fe7e78346
(cherry picked from commit bcf72ee8e3b26f1d0726869c7ddb3921c68b09a8)
diff --git a/src/wasm/asm-wasm-builder.cc b/src/wasm/asm-wasm-builder.cc
index d16d3a8..325058c 100644
--- a/src/wasm/asm-wasm-builder.cc
+++ b/src/wasm/asm-wasm-builder.cc
@@ -11,6 +11,7 @@
 #include <math.h>
 
 #include "src/wasm/asm-wasm-builder.h"
+#include "src/wasm/switch-logic.h"
 #include "src/wasm/wasm-macro-gen.h"
 #include "src/wasm/wasm-opcodes.h"
 
@@ -30,6 +31,7 @@
     if (HasStackOverflow()) return; \
   } while (false)
 
+enum AsmScope { kModuleScope, kInitScope, kFuncScope, kExportScope };
 
 class AsmWasmBuilderImpl : public AstVisitor {
  public:
@@ -43,9 +45,7 @@
         global_variables_(HashMap::PointersMatch,
                           ZoneHashMap::kDefaultHashMapCapacity,
                           ZoneAllocationPolicy(zone)),
-        in_function_(false),
-        is_set_op_(false),
-        marking_exported(false),
+        scope_(kModuleScope),
         builder_(new (zone) WasmModuleBuilder(zone)),
         current_function_builder_(nullptr),
         literal_(literal),
@@ -55,20 +55,21 @@
         typer_(typer),
         cache_(TypeCache::Get()),
         breakable_blocks_(zone),
-        block_size_(0),
         init_function_index_(0),
         next_table_index_(0),
         function_tables_(HashMap::PointersMatch,
                          ZoneHashMap::kDefaultHashMapCapacity,
                          ZoneAllocationPolicy(zone)),
-        imported_function_table_(this) {
+        imported_function_table_(this),
+        bounds_(typer->bounds()) {
     InitializeAstVisitor(isolate);
   }
 
   void InitializeInitFunction() {
     init_function_index_ = builder_->AddFunction();
+    FunctionSig::Builder b(zone(), 0, 0);
     current_function_builder_ = builder_->FunctionAt(init_function_index_);
-    current_function_builder_->ReturnType(kAstStmt);
+    current_function_builder_->SetSignature(b.Build());
     builder_->MarkStartFunction(init_function_index_);
     current_function_builder_ = nullptr;
   }
@@ -81,13 +82,13 @@
   void VisitVariableDeclaration(VariableDeclaration* decl) {}
 
   void VisitFunctionDeclaration(FunctionDeclaration* decl) {
-    DCHECK(!in_function_);
+    DCHECK_EQ(kModuleScope, scope_);
     DCHECK_NULL(current_function_builder_);
-    uint16_t index = LookupOrInsertFunction(decl->proxy()->var());
+    uint32_t index = LookupOrInsertFunction(decl->proxy()->var());
     current_function_builder_ = builder_->FunctionAt(index);
-    in_function_ = true;
+    scope_ = kFuncScope;
     RECURSE(Visit(decl->fun()));
-    in_function_ = false;
+    scope_ = kModuleScope;
     current_function_builder_ = nullptr;
     local_variables_.Clear();
   }
@@ -99,6 +100,10 @@
   void VisitStatements(ZoneList<Statement*>* stmts) {
     for (int i = 0; i < stmts->length(); ++i) {
       Statement* stmt = stmts->at(i);
+      ExpressionStatement* e = stmt->AsExpressionStatement();
+      if (e != nullptr && e->expression()->IsUndefinedLiteral()) {
+        continue;
+      }
       RECURSE(Visit(stmt));
       if (stmt->IsJump()) break;
     }
@@ -115,12 +120,10 @@
         }
       }
     }
-    if (in_function_) {
+    if (scope_ == kFuncScope) {
       BlockVisitor visitor(this, stmt->AsBreakableStatement(), kExprBlock,
-                           false,
-                           static_cast<byte>(stmt->statements()->length()));
+                           false);
       RECURSE(VisitStatements(stmt->statements()));
-      DCHECK(block_size_ >= 0);
     } else {
       RECURSE(VisitStatements(stmt->statements()));
     }
@@ -128,25 +131,17 @@
 
   class BlockVisitor {
    private:
-    int prev_block_size_;
-    uint32_t index_;
     AsmWasmBuilderImpl* builder_;
 
    public:
     BlockVisitor(AsmWasmBuilderImpl* builder, BreakableStatement* stmt,
-                 WasmOpcode opcode, bool is_loop, int initial_block_size)
+                 WasmOpcode opcode, bool is_loop)
         : builder_(builder) {
       builder_->breakable_blocks_.push_back(std::make_pair(stmt, is_loop));
       builder_->current_function_builder_->Emit(opcode);
-      index_ =
-          builder_->current_function_builder_->EmitEditableVarIntImmediate();
-      prev_block_size_ = builder_->block_size_;
-      builder_->block_size_ = initial_block_size;
     }
     ~BlockVisitor() {
-      builder_->current_function_builder_->EditVarIntImmediate(
-          index_, builder_->block_size_);
-      builder_->block_size_ = prev_block_size_;
+      builder_->current_function_builder_->Emit(kExprEnd);
       builder_->breakable_blocks_.pop_back();
     }
   };
@@ -160,25 +155,24 @@
   void VisitEmptyParentheses(EmptyParentheses* paren) { UNREACHABLE(); }
 
   void VisitIfStatement(IfStatement* stmt) {
-    DCHECK(in_function_);
-    if (stmt->HasElseStatement()) {
-      current_function_builder_->Emit(kExprIfElse);
-    } else {
-      current_function_builder_->Emit(kExprIf);
-    }
+    DCHECK_EQ(kFuncScope, scope_);
     RECURSE(Visit(stmt->condition()));
+    current_function_builder_->Emit(kExprIf);
+    // WASM ifs come with implement blocks for both arms.
+    breakable_blocks_.push_back(std::make_pair(nullptr, false));
     if (stmt->HasThenStatement()) {
       RECURSE(Visit(stmt->then_statement()));
-    } else {
-      current_function_builder_->Emit(kExprNop);
     }
     if (stmt->HasElseStatement()) {
+      current_function_builder_->Emit(kExprElse);
       RECURSE(Visit(stmt->else_statement()));
     }
+    current_function_builder_->Emit(kExprEnd);
+    breakable_blocks_.pop_back();
   }
 
   void VisitContinueStatement(ContinueStatement* stmt) {
-    DCHECK(in_function_);
+    DCHECK_EQ(kFuncScope, scope_);
     DCHECK_NOT_NULL(stmt->target());
     int i = static_cast<int>(breakable_blocks_.size()) - 1;
     int block_distance = 0;
@@ -194,12 +188,12 @@
       }
     }
     DCHECK(i >= 0);
-    current_function_builder_->EmitWithVarInt(kExprBr, block_distance);
-    current_function_builder_->Emit(kExprNop);
+    current_function_builder_->EmitWithU8(kExprBr, ARITY_0);
+    current_function_builder_->EmitVarInt(block_distance);
   }
 
   void VisitBreakStatement(BreakStatement* stmt) {
-    DCHECK(in_function_);
+    DCHECK_EQ(kFuncScope, scope_);
     DCHECK_NOT_NULL(stmt->target());
     int i = static_cast<int>(breakable_blocks_.size()) - 1;
     int block_distance = 0;
@@ -217,123 +211,191 @@
       }
     }
     DCHECK(i >= 0);
-    current_function_builder_->EmitWithVarInt(kExprBr, block_distance);
-    current_function_builder_->Emit(kExprNop);
+    current_function_builder_->EmitWithU8(kExprBr, ARITY_0);
+    current_function_builder_->EmitVarInt(block_distance);
   }
 
   void VisitReturnStatement(ReturnStatement* stmt) {
-    if (in_function_) {
-      current_function_builder_->Emit(kExprReturn);
+    if (scope_ == kModuleScope) {
+      scope_ = kExportScope;
+      RECURSE(Visit(stmt->expression()));
+      scope_ = kModuleScope;
+    } else if (scope_ == kFuncScope) {
+      RECURSE(Visit(stmt->expression()));
+      uint8_t arity =
+          TypeOf(stmt->expression()) == kAstStmt ? ARITY_0 : ARITY_1;
+      current_function_builder_->EmitWithU8(kExprReturn, arity);
     } else {
-      marking_exported = true;
-    }
-    RECURSE(Visit(stmt->expression()));
-    if (!in_function_) {
-      marking_exported = false;
+      UNREACHABLE();
     }
   }
 
   void VisitWithStatement(WithStatement* stmt) { UNREACHABLE(); }
 
-  void SetLocalTo(uint16_t index, int value) {
-    current_function_builder_->Emit(kExprSetLocal);
-    AddLeb128(index, true);
-    // TODO(bradnelson): variable size
-    byte code[] = {WASM_I32V(value)};
-    current_function_builder_->EmitCode(code, sizeof(code));
-    block_size_++;
-  }
+  void HandleCase(CaseNode* node,
+                  const ZoneMap<int, unsigned int>& case_to_block,
+                  VariableProxy* tag, int default_block, int if_depth) {
+    int prev_if_depth = if_depth;
+    if (node->left != nullptr) {
+      VisitVariableProxy(tag);
+      current_function_builder_->EmitI32Const(node->begin);
+      current_function_builder_->Emit(kExprI32LtS);
+      current_function_builder_->Emit(kExprIf);
+      if_depth++;
+      breakable_blocks_.push_back(std::make_pair(nullptr, false));
+      HandleCase(node->left, case_to_block, tag, default_block, if_depth);
+      current_function_builder_->Emit(kExprElse);
+    }
+    if (node->right != nullptr) {
+      VisitVariableProxy(tag);
+      current_function_builder_->EmitI32Const(node->end);
+      current_function_builder_->Emit(kExprI32GtS);
+      current_function_builder_->Emit(kExprIf);
+      if_depth++;
+      breakable_blocks_.push_back(std::make_pair(nullptr, false));
+      HandleCase(node->right, case_to_block, tag, default_block, if_depth);
+      current_function_builder_->Emit(kExprElse);
+    }
+    if (node->begin == node->end) {
+      VisitVariableProxy(tag);
+      current_function_builder_->EmitI32Const(node->begin);
+      current_function_builder_->Emit(kExprI32Eq);
+      current_function_builder_->Emit(kExprIf);
+      DCHECK(case_to_block.find(node->begin) != case_to_block.end());
+      current_function_builder_->EmitWithU8(kExprBr, ARITY_0);
+      current_function_builder_->EmitVarInt(1 + if_depth +
+                                            case_to_block.at(node->begin));
+      current_function_builder_->Emit(kExprEnd);
+    } else {
+      if (node->begin != 0) {
+        VisitVariableProxy(tag);
+        current_function_builder_->EmitI32Const(node->begin);
+        current_function_builder_->Emit(kExprI32Sub);
+      } else {
+        VisitVariableProxy(tag);
+      }
+      current_function_builder_->EmitWithU8(kExprBrTable, ARITY_0);
+      current_function_builder_->EmitVarInt(node->end - node->begin + 1);
+      for (int v = node->begin; v <= node->end; v++) {
+        if (case_to_block.find(v) != case_to_block.end()) {
+          byte break_code[] = {BR_TARGET(if_depth + case_to_block.at(v))};
+          current_function_builder_->EmitCode(break_code, sizeof(break_code));
+        } else {
+          byte break_code[] = {BR_TARGET(if_depth + default_block)};
+          current_function_builder_->EmitCode(break_code, sizeof(break_code));
+        }
+        if (v == kMaxInt) {
+          break;
+        }
+      }
+      byte break_code[] = {BR_TARGET(if_depth + default_block)};
+      current_function_builder_->EmitCode(break_code, sizeof(break_code));
+    }
 
-  void CompileCase(CaseClause* clause, uint16_t fall_through,
-                   VariableProxy* tag) {
-    Literal* label = clause->label()->AsLiteral();
-    DCHECK_NOT_NULL(label);
-    block_size_++;
-    current_function_builder_->Emit(kExprIf);
-    current_function_builder_->Emit(kExprI32Ior);
-    current_function_builder_->Emit(kExprI32Eq);
-    VisitVariableProxy(tag);
-    VisitLiteral(label);
-    current_function_builder_->Emit(kExprGetLocal);
-    AddLeb128(fall_through, true);
-    BlockVisitor visitor(this, nullptr, kExprBlock, false, 0);
-    SetLocalTo(fall_through, 1);
-    ZoneList<Statement*>* stmts = clause->statements();
-    block_size_ += stmts->length();
-    RECURSE(VisitStatements(stmts));
+    while (if_depth-- != prev_if_depth) {
+      breakable_blocks_.pop_back();
+      current_function_builder_->Emit(kExprEnd);
+    }
   }
 
   void VisitSwitchStatement(SwitchStatement* stmt) {
     VariableProxy* tag = stmt->tag()->AsVariableProxy();
     DCHECK_NOT_NULL(tag);
-    BlockVisitor visitor(this, stmt->AsBreakableStatement(), kExprBlock, false,
-                         0);
-    uint16_t fall_through = current_function_builder_->AddLocal(kAstI32);
-    SetLocalTo(fall_through, 0);
-
     ZoneList<CaseClause*>* clauses = stmt->cases();
-    for (int i = 0; i < clauses->length(); ++i) {
+    int case_count = clauses->length();
+    if (case_count == 0) {
+      return;
+    }
+    BlockVisitor visitor(this, stmt->AsBreakableStatement(), kExprBlock, false);
+    ZoneVector<BlockVisitor*> blocks(zone_);
+    ZoneVector<int32_t> cases(zone_);
+    ZoneMap<int, unsigned int> case_to_block(zone_);
+    bool has_default = false;
+    for (int i = case_count - 1; i >= 0; i--) {
       CaseClause* clause = clauses->at(i);
+      blocks.push_back(new BlockVisitor(this, nullptr, kExprBlock, false));
       if (!clause->is_default()) {
-        CompileCase(clause, fall_through, tag);
+        Literal* label = clause->label()->AsLiteral();
+        Handle<Object> value = label->value();
+        DCHECK(value->IsNumber() &&
+               bounds_->get(label).upper->Is(cache_.kAsmSigned));
+        int32_t label_value;
+        if (!value->ToInt32(&label_value)) {
+          UNREACHABLE();
+        }
+        case_to_block[label_value] = i;
+        cases.push_back(label_value);
       } else {
-        ZoneList<Statement*>* stmts = clause->statements();
-        block_size_ += stmts->length();
-        RECURSE(VisitStatements(stmts));
+        DCHECK_EQ(i, case_count - 1);
+        has_default = true;
       }
     }
+    if (!has_default || case_count > 1) {
+      int default_block = has_default ? case_count - 1 : case_count;
+      BlockVisitor switch_logic_block(this, nullptr, kExprBlock, false);
+      CaseNode* root = OrderCases(&cases, zone_);
+      HandleCase(root, case_to_block, tag, default_block, 0);
+      if (root->left != nullptr || root->right != nullptr ||
+          root->begin == root->end) {
+        current_function_builder_->EmitWithU8(kExprBr, ARITY_0);
+        current_function_builder_->EmitVarInt(default_block);
+      }
+    }
+    for (int i = 0; i < case_count; i++) {
+      CaseClause* clause = clauses->at(i);
+      RECURSE(VisitStatements(clause->statements()));
+      BlockVisitor* v = blocks.at(case_count - i - 1);
+      blocks.pop_back();
+      delete v;
+    }
   }
 
   void VisitCaseClause(CaseClause* clause) { UNREACHABLE(); }
 
   void VisitDoWhileStatement(DoWhileStatement* stmt) {
-    DCHECK(in_function_);
-    BlockVisitor visitor(this, stmt->AsBreakableStatement(), kExprLoop, true,
-                         2);
+    DCHECK_EQ(kFuncScope, scope_);
+    BlockVisitor visitor(this, stmt->AsBreakableStatement(), kExprLoop, true);
     RECURSE(Visit(stmt->body()));
-    current_function_builder_->Emit(kExprIf);
     RECURSE(Visit(stmt->cond()));
-    current_function_builder_->EmitWithVarInt(kExprBr, 0);
-    current_function_builder_->Emit(kExprNop);
+    current_function_builder_->Emit(kExprIf);
+    current_function_builder_->EmitWithU8U8(kExprBr, ARITY_0, 1);
+    current_function_builder_->Emit(kExprEnd);
   }
 
   void VisitWhileStatement(WhileStatement* stmt) {
-    DCHECK(in_function_);
-    BlockVisitor visitor(this, stmt->AsBreakableStatement(), kExprLoop, true,
-                         1);
-    current_function_builder_->Emit(kExprIf);
+    DCHECK_EQ(kFuncScope, scope_);
+    BlockVisitor visitor(this, stmt->AsBreakableStatement(), kExprLoop, true);
     RECURSE(Visit(stmt->cond()));
-    current_function_builder_->EmitWithVarInt(kExprBr, 0);
+    breakable_blocks_.push_back(std::make_pair(nullptr, false));
+    current_function_builder_->Emit(kExprIf);
     RECURSE(Visit(stmt->body()));
+    current_function_builder_->EmitWithU8U8(kExprBr, ARITY_0, 1);
+    current_function_builder_->Emit(kExprEnd);
+    breakable_blocks_.pop_back();
   }
 
   void VisitForStatement(ForStatement* stmt) {
-    DCHECK(in_function_);
+    DCHECK_EQ(kFuncScope, scope_);
     if (stmt->init() != nullptr) {
-      block_size_++;
       RECURSE(Visit(stmt->init()));
     }
-    BlockVisitor visitor(this, stmt->AsBreakableStatement(), kExprLoop, true,
-                         0);
+    BlockVisitor visitor(this, stmt->AsBreakableStatement(), kExprLoop, true);
     if (stmt->cond() != nullptr) {
-      block_size_++;
-      current_function_builder_->Emit(kExprIf);
-      current_function_builder_->Emit(kExprI32Eqz);
       RECURSE(Visit(stmt->cond()));
-      current_function_builder_->EmitWithVarInt(kExprBr, 1);
+      current_function_builder_->Emit(kExprI32Eqz);
+      current_function_builder_->Emit(kExprIf);
       current_function_builder_->Emit(kExprNop);
+      current_function_builder_->EmitWithU8U8(kExprBr, ARITY_0, 2);
+      current_function_builder_->Emit(kExprEnd);
     }
     if (stmt->body() != nullptr) {
-      block_size_++;
       RECURSE(Visit(stmt->body()));
     }
     if (stmt->next() != nullptr) {
-      block_size_++;
       RECURSE(Visit(stmt->next()));
     }
-    block_size_++;
-    current_function_builder_->EmitWithVarInt(kExprBr, 0);
     current_function_builder_->Emit(kExprNop);
+    current_function_builder_->EmitWithU8U8(kExprBr, ARITY_0, 0);
   }
 
   void VisitForInStatement(ForInStatement* stmt) { UNREACHABLE(); }
@@ -348,16 +410,21 @@
 
   void VisitFunctionLiteral(FunctionLiteral* expr) {
     Scope* scope = expr->scope();
-    if (in_function_) {
-      if (expr->bounds().lower->IsFunction()) {
-        FunctionType* func_type = expr->bounds().lower->AsFunction();
+    if (scope_ == kFuncScope) {
+      if (bounds_->get(expr).lower->IsFunction()) {
+        // Build the signature for the function.
+        FunctionType* func_type = bounds_->get(expr).lower->AsFunction();
         LocalType return_type = TypeFrom(func_type->Result());
-        current_function_builder_->ReturnType(return_type);
+        FunctionSig::Builder b(zone(), return_type == kAstStmt ? 0 : 1,
+                               func_type->Arity());
+        if (return_type != kAstStmt) b.AddReturn(return_type);
         for (int i = 0; i < expr->parameter_count(); i++) {
           LocalType type = TypeFrom(func_type->Parameter(i));
           DCHECK_NE(kAstStmt, type);
-          LookupOrInsertLocal(scope->parameter(i), type);
+          b.AddParam(type);
+          InsertParameter(scope->parameter(i), type, i);
         }
+        current_function_builder_->SetSignature(b.Build());
       } else {
         UNREACHABLE();
       }
@@ -371,11 +438,16 @@
   }
 
   void VisitConditional(Conditional* expr) {
-    DCHECK(in_function_);
-    current_function_builder_->Emit(kExprIfElse);
+    DCHECK_EQ(kFuncScope, scope_);
     RECURSE(Visit(expr->condition()));
+    // WASM ifs come with implicit blocks for both arms.
+    breakable_blocks_.push_back(std::make_pair(nullptr, false));
+    current_function_builder_->Emit(kExprIf);
     RECURSE(Visit(expr->then_expression()));
+    current_function_builder_->Emit(kExprElse);
     RECURSE(Visit(expr->else_expression()));
+    current_function_builder_->Emit(kExprEnd);
+    breakable_blocks_.pop_back();
   }
 
   bool VisitStdlibConstant(Variable* var) {
@@ -431,41 +503,29 @@
   }
 
   void VisitVariableProxy(VariableProxy* expr) {
-    if (in_function_) {
+    if (scope_ == kFuncScope || scope_ == kInitScope) {
       Variable* var = expr->var();
-      if (is_set_op_) {
-        if (var->IsContextSlot()) {
-          current_function_builder_->Emit(kExprStoreGlobal);
-        } else {
-          current_function_builder_->Emit(kExprSetLocal);
-        }
-        is_set_op_ = false;
-      } else {
-        if (VisitStdlibConstant(var)) {
-          return;
-        }
-        if (var->IsContextSlot()) {
-          current_function_builder_->Emit(kExprLoadGlobal);
-        } else {
-          current_function_builder_->Emit(kExprGetLocal);
-        }
+      if (VisitStdlibConstant(var)) {
+        return;
       }
       LocalType var_type = TypeOf(expr);
       DCHECK_NE(kAstStmt, var_type);
       if (var->IsContextSlot()) {
-        AddLeb128(LookupOrInsertGlobal(var, var_type), false);
+        current_function_builder_->EmitWithVarInt(
+            kExprLoadGlobal, LookupOrInsertGlobal(var, var_type));
       } else {
-        AddLeb128(LookupOrInsertLocal(var, var_type), true);
+        current_function_builder_->EmitGetLocal(
+            LookupOrInsertLocal(var, var_type));
       }
     }
   }
 
   void VisitLiteral(Literal* expr) {
     Handle<Object> value = expr->value();
-    if (!in_function_ || !value->IsNumber()) {
+    if (!value->IsNumber() || (scope_ != kFuncScope && scope_ != kInitScope)) {
       return;
     }
-    Type* type = expr->bounds().upper;
+    Type* type = bounds_->get(expr).upper;
     if (type->Is(cache_.kAsmSigned)) {
       int32_t i = 0;
       if (!value->ToInt32(&i)) {
@@ -496,7 +556,7 @@
     ZoneList<ObjectLiteralProperty*>* props = expr->properties();
     for (int i = 0; i < props->length(); ++i) {
       ObjectLiteralProperty* prop = props->at(i);
-      DCHECK(marking_exported);
+      DCHECK_EQ(kExportScope, scope_);
       VariableProxy* expr = prop->value()->AsVariableProxy();
       DCHECK_NOT_NULL(expr);
       Variable* var = expr->var();
@@ -505,10 +565,11 @@
       DCHECK(name->IsPropertyName());
       const AstRawString* raw_name = name->AsRawPropertyName();
       if (var->is_function()) {
-        uint16_t index = LookupOrInsertFunction(var);
+        uint32_t index = LookupOrInsertFunction(var);
         builder_->FunctionAt(index)->Exported(1);
-        builder_->FunctionAt(index)
-            ->SetName(raw_name->raw_data(), raw_name->length());
+        builder_->FunctionAt(index)->SetName(
+            reinterpret_cast<const char*>(raw_name->raw_data()),
+            raw_name->length());
       }
     }
   }
@@ -517,17 +578,17 @@
 
   void LoadInitFunction() {
     current_function_builder_ = builder_->FunctionAt(init_function_index_);
-    in_function_ = true;
+    scope_ = kInitScope;
   }
 
   void UnLoadInitFunction() {
-    in_function_ = false;
+    scope_ = kModuleScope;
     current_function_builder_ = nullptr;
   }
 
   void AddFunctionTable(VariableProxy* table, ArrayLiteral* funcs) {
     FunctionType* func_type =
-        funcs->bounds().lower->AsArray()->Element()->AsFunction();
+        bounds_->get(funcs).lower->AsArray()->Element()->AsFunction();
     LocalType return_type = TypeFrom(func_type->Result());
     FunctionSig::Builder sig(zone(), return_type == kAstStmt ? 0 : 1,
                              func_type->Arity());
@@ -537,7 +598,7 @@
     for (int i = 0; i < func_type->Arity(); i++) {
       sig.AddParam(TypeFrom(func_type->Parameter(i)));
     }
-    uint16_t signature_index = builder_->AddSignature(sig.Build());
+    uint32_t signature_index = builder_->AddSignature(sig.Build());
     InsertFunctionTable(table->var(), next_table_index_, signature_index);
     next_table_index_ += funcs->values()->length();
     for (int i = 0; i < funcs->values()->length(); i++) {
@@ -549,11 +610,11 @@
 
   struct FunctionTableIndices : public ZoneObject {
     uint32_t start_index;
-    uint16_t signature_index;
+    uint32_t signature_index;
   };
 
   void InsertFunctionTable(Variable* v, uint32_t start_index,
-                           uint16_t signature_index) {
+                           uint32_t signature_index) {
     FunctionTableIndices* container = new (zone()) FunctionTableIndices();
     container->start_index = start_index;
     container->signature_index = signature_index;
@@ -573,12 +634,11 @@
    private:
     class ImportedFunctionIndices : public ZoneObject {
      public:
-      const unsigned char* name_;
+      const char* name_;
       int name_length_;
       WasmModuleBuilder::SignatureMap signature_to_index_;
 
-      ImportedFunctionIndices(const unsigned char* name, int name_length,
-                              Zone* zone)
+      ImportedFunctionIndices(const char* name, int name_length, Zone* zone)
           : name_(name), name_length_(name_length), signature_to_index_(zone) {}
     };
     ZoneHashMap table_;
@@ -590,7 +650,7 @@
                  ZoneAllocationPolicy(builder->zone())),
           builder_(builder) {}
 
-    void AddImport(Variable* v, const unsigned char* name, int name_length) {
+    void AddImport(Variable* v, const char* name, int name_length) {
       ImportedFunctionIndices* indices = new (builder_->zone())
           ImportedFunctionIndices(name, name_length, builder_->zone());
       ZoneHashMap::Entry* entry = table_.LookupOrInsert(
@@ -598,7 +658,7 @@
       entry->value = indices;
     }
 
-    uint16_t GetFunctionIndex(Variable* v, FunctionSig* sig) {
+    uint32_t GetFunctionIndex(Variable* v, FunctionSig* sig) {
       ZoneHashMap::Entry* entry = table_.Lookup(v, ComputePointerHash(v));
       DCHECK_NOT_NULL(entry);
       ImportedFunctionIndices* indices =
@@ -608,60 +668,137 @@
       if (pos != indices->signature_to_index_.end()) {
         return pos->second;
       } else {
-        uint16_t index = builder_->builder_->AddFunction();
+        uint32_t index = builder_->builder_->AddImport(
+            indices->name_, indices->name_length_, sig);
         indices->signature_to_index_[sig] = index;
-        WasmFunctionBuilder* function = builder_->builder_->FunctionAt(index);
-        function->External(1);
-        function->SetName(indices->name_, indices->name_length_);
-        if (sig->return_count() > 0) {
-          function->ReturnType(sig->GetReturn());
-        }
-        for (size_t i = 0; i < sig->parameter_count(); i++) {
-          function->AddParam(sig->GetParam(i));
-        }
         return index;
       }
     }
   };
 
-  void VisitAssignment(Assignment* expr) {
-    bool in_init = false;
-    if (!in_function_) {
-      BinaryOperation* binop = expr->value()->AsBinaryOperation();
-      if (binop != nullptr) {
+  void EmitAssignmentLhs(Expression* target, MachineType* mtype) {
+    // Match the left hand side of the assignment.
+    VariableProxy* target_var = target->AsVariableProxy();
+    if (target_var != nullptr) {
+      // Left hand side is a local or a global variable, no code on LHS.
+      return;
+    }
+
+    Property* target_prop = target->AsProperty();
+    if (target_prop != nullptr) {
+      // Left hand side is a property access, i.e. the asm.js heap.
+      VisitPropertyAndEmitIndex(target_prop, mtype);
+      return;
+    }
+
+    if (target_var == nullptr && target_prop == nullptr) {
+      UNREACHABLE();  // invalid assignment.
+    }
+  }
+
+  void EmitAssignmentRhs(Expression* target, Expression* value, bool* is_nop) {
+    BinaryOperation* binop = value->AsBinaryOperation();
+    if (binop != nullptr) {
+      if (scope_ == kInitScope) {
+        // Handle foreign variables in the initialization scope.
         Property* prop = binop->left()->AsProperty();
-        DCHECK_NOT_NULL(prop);
-        LoadInitFunction();
-        is_set_op_ = true;
-        RECURSE(Visit(expr->target()));
-        DCHECK(!is_set_op_);
         if (binop->op() == Token::MUL) {
           DCHECK(binop->right()->IsLiteral());
           DCHECK_EQ(1.0, binop->right()->AsLiteral()->raw_value()->AsNumber());
           DCHECK(binop->right()->AsLiteral()->raw_value()->ContainsDot());
           VisitForeignVariable(true, prop);
+          return;
         } else if (binop->op() == Token::BIT_OR) {
           DCHECK(binop->right()->IsLiteral());
           DCHECK_EQ(0.0, binop->right()->AsLiteral()->raw_value()->AsNumber());
           DCHECK(!binop->right()->AsLiteral()->raw_value()->ContainsDot());
           VisitForeignVariable(false, prop);
+          return;
         } else {
           UNREACHABLE();
         }
-        UnLoadInitFunction();
-        return;
       }
+      if (MatchBinaryOperation(binop) == kAsIs) {
+        VariableProxy* target_var = target->AsVariableProxy();
+        VariableProxy* effective_value_var = GetLeft(binop)->AsVariableProxy();
+        if (target_var != nullptr && effective_value_var != nullptr &&
+            target_var->var() == effective_value_var->var()) {
+          *is_nop = true;
+          return;
+        }
+      }
+    }
+    RECURSE(Visit(value));
+  }
+
+  void EmitAssignment(Assignment* expr, MachineType type) {
+    // Match the left hand side of the assignment.
+    VariableProxy* target_var = expr->target()->AsVariableProxy();
+    if (target_var != nullptr) {
+      // Left hand side is a local or a global variable.
+      Variable* var = target_var->var();
+      LocalType var_type = TypeOf(expr);
+      DCHECK_NE(kAstStmt, var_type);
+      if (var->IsContextSlot()) {
+        current_function_builder_->EmitWithVarInt(
+            kExprStoreGlobal, LookupOrInsertGlobal(var, var_type));
+      } else {
+        current_function_builder_->EmitSetLocal(
+            LookupOrInsertLocal(var, var_type));
+      }
+    }
+
+    Property* target_prop = expr->target()->AsProperty();
+    if (target_prop != nullptr) {
+      // Left hand side is a property access, i.e. the asm.js heap.
+      if (TypeOf(expr->value()) == kAstF64 && expr->target()->IsProperty() &&
+          bounds_->get(expr->target()->AsProperty()->obj())
+              .lower->Is(cache_.kFloat32Array)) {
+        current_function_builder_->Emit(kExprF32ConvertF64);
+      }
+      WasmOpcode opcode;
+      if (type == MachineType::Int8()) {
+        opcode = kExprI32AsmjsStoreMem8;
+      } else if (type == MachineType::Uint8()) {
+        opcode = kExprI32AsmjsStoreMem8;
+      } else if (type == MachineType::Int16()) {
+        opcode = kExprI32AsmjsStoreMem16;
+      } else if (type == MachineType::Uint16()) {
+        opcode = kExprI32AsmjsStoreMem16;
+      } else if (type == MachineType::Int32()) {
+        opcode = kExprI32AsmjsStoreMem;
+      } else if (type == MachineType::Uint32()) {
+        opcode = kExprI32AsmjsStoreMem;
+      } else if (type == MachineType::Float32()) {
+        opcode = kExprF32AsmjsStoreMem;
+      } else if (type == MachineType::Float64()) {
+        opcode = kExprF64AsmjsStoreMem;
+      } else {
+        UNREACHABLE();
+      }
+      current_function_builder_->Emit(opcode);
+    }
+
+    if (target_var == nullptr && target_prop == nullptr) {
+      UNREACHABLE();  // invalid assignment.
+    }
+  }
+
+  void VisitAssignment(Assignment* expr) {
+    bool as_init = false;
+    if (scope_ == kModuleScope) {
       Property* prop = expr->value()->AsProperty();
       if (prop != nullptr) {
         VariableProxy* vp = prop->obj()->AsVariableProxy();
         if (vp != nullptr && vp->var()->IsParameter() &&
             vp->var()->index() == 1) {
           VariableProxy* target = expr->target()->AsVariableProxy();
-          if (target->bounds().lower->Is(Type::Function())) {
+          if (bounds_->get(target).lower->Is(Type::Function())) {
             const AstRawString* name =
                 prop->key()->AsLiteral()->AsRawPropertyName();
-            imported_function_table_.AddImport(target->var(), name->raw_data(),
-                                               name->length());
+            imported_function_table_.AddImport(
+                target->var(), reinterpret_cast<const char*>(name->raw_data()),
+                name->length());
           }
         }
         // Property values in module scope don't emit code, so return.
@@ -669,7 +806,7 @@
       }
       ArrayLiteral* funcs = expr->value()->AsArrayLiteral();
       if (funcs != nullptr &&
-          funcs->bounds().lower->AsArray()->Element()->IsFunction()) {
+          bounds_->get(funcs).lower->AsArray()->Element()->IsFunction()) {
         VariableProxy* target = expr->target()->AsVariableProxy();
         DCHECK_NOT_NULL(target);
         AddFunctionTable(target, funcs);
@@ -680,32 +817,18 @@
         // No init code to emit for CallNew nodes.
         return;
       }
-      in_init = true;
-      LoadInitFunction();
+      as_init = true;
     }
-    BinaryOperation* value_op = expr->value()->AsBinaryOperation();
-    if (value_op != nullptr && MatchBinaryOperation(value_op) == kAsIs) {
-      VariableProxy* target_var = expr->target()->AsVariableProxy();
-      VariableProxy* effective_value_var = GetLeft(value_op)->AsVariableProxy();
-      if (target_var != nullptr && effective_value_var != nullptr &&
-          target_var->var() == effective_value_var->var()) {
-        block_size_--;
-        return;
-      }
+
+    if (as_init) LoadInitFunction();
+    MachineType mtype;
+    bool is_nop = false;
+    EmitAssignmentLhs(expr->target(), &mtype);
+    EmitAssignmentRhs(expr->target(), expr->value(), &is_nop);
+    if (!is_nop) {
+      EmitAssignment(expr, mtype);
     }
-    is_set_op_ = true;
-    RECURSE(Visit(expr->target()));
-    DCHECK(!is_set_op_);
-    // Assignment to heapf32 from float64 converts.
-    if (TypeOf(expr->value()) == kAstF64 && expr->target()->IsProperty() &&
-        expr->target()->AsProperty()->obj()->bounds().lower->Is(
-            cache_.kFloat32Array)) {
-      current_function_builder_->Emit(kExprF32ConvertF64);
-    }
-    RECURSE(Visit(expr->value()));
-    if (in_init) {
-      UnLoadInitFunction();
-    }
+    if (as_init) UnLoadInitFunction();
   }
 
   void VisitYield(Yield* expr) { UNREACHABLE(); }
@@ -744,9 +867,7 @@
             Handle<Object> nvalue = maybe_nvalue.ToHandleChecked();
             if (nvalue->IsNumber()) {
               int32_t val = static_cast<int32_t>(nvalue->Number());
-              // TODO(bradnelson): variable size
-              byte code[] = {WASM_I32V(val)};
-              current_function_builder_->EmitCode(code, sizeof(code));
+              current_function_builder_->EmitI32Const(val);
               return;
             }
           }
@@ -762,46 +883,41 @@
     }
   }
 
-  void VisitProperty(Property* expr) {
+  void VisitPropertyAndEmitIndex(Property* expr, MachineType* mtype) {
     Expression* obj = expr->obj();
-    DCHECK_EQ(obj->bounds().lower, obj->bounds().upper);
-    Type* type = obj->bounds().lower;
-    MachineType mtype;
+    DCHECK_EQ(bounds_->get(obj).lower, bounds_->get(obj).upper);
+    Type* type = bounds_->get(obj).lower;
     int size;
     if (type->Is(cache_.kUint8Array)) {
-      mtype = MachineType::Uint8();
+      *mtype = MachineType::Uint8();
       size = 1;
     } else if (type->Is(cache_.kInt8Array)) {
-      mtype = MachineType::Int8();
+      *mtype = MachineType::Int8();
       size = 1;
     } else if (type->Is(cache_.kUint16Array)) {
-      mtype = MachineType::Uint16();
+      *mtype = MachineType::Uint16();
       size = 2;
     } else if (type->Is(cache_.kInt16Array)) {
-      mtype = MachineType::Int16();
+      *mtype = MachineType::Int16();
       size = 2;
     } else if (type->Is(cache_.kUint32Array)) {
-      mtype = MachineType::Uint32();
+      *mtype = MachineType::Uint32();
       size = 4;
     } else if (type->Is(cache_.kInt32Array)) {
-      mtype = MachineType::Int32();
+      *mtype = MachineType::Int32();
       size = 4;
     } else if (type->Is(cache_.kUint32Array)) {
-      mtype = MachineType::Uint32();
+      *mtype = MachineType::Uint32();
       size = 4;
     } else if (type->Is(cache_.kFloat32Array)) {
-      mtype = MachineType::Float32();
+      *mtype = MachineType::Float32();
       size = 4;
     } else if (type->Is(cache_.kFloat64Array)) {
-      mtype = MachineType::Float64();
+      *mtype = MachineType::Float64();
       size = 8;
     } else {
       UNREACHABLE();
     }
-    // TODO(titzer): use special asm-compatibility opcodes?
-    current_function_builder_->EmitWithU8U8(
-        WasmOpcodes::LoadStoreOpcodeOf(mtype, is_set_op_), 0, 0);
-    is_set_op_ = false;
     if (size == 1) {
       // Allow more general expression in byte arrays than the spec
       // strictly permits.
@@ -809,87 +925,123 @@
       // places that strictly should be HEAP8[HEAP32[..]>>0].
       RECURSE(Visit(expr->key()));
       return;
-    } else {
-      Literal* value = expr->key()->AsLiteral();
-      if (value) {
-        DCHECK(value->raw_value()->IsNumber());
-        DCHECK_EQ(kAstI32, TypeOf(value));
-        int val = static_cast<int>(value->raw_value()->AsNumber());
-        // TODO(bradnelson): variable size
-        byte code[] = {WASM_I32V(val * size)};
-        current_function_builder_->EmitCode(code, sizeof(code));
-        return;
-      }
-      BinaryOperation* binop = expr->key()->AsBinaryOperation();
-      if (binop) {
-        DCHECK_EQ(Token::SAR, binop->op());
-        DCHECK(binop->right()->AsLiteral()->raw_value()->IsNumber());
-        DCHECK(kAstI32 == TypeOf(binop->right()->AsLiteral()));
-        DCHECK_EQ(size,
-                  1 << static_cast<int>(
-                      binop->right()->AsLiteral()->raw_value()->AsNumber()));
-        // Mask bottom bits to match asm.js behavior.
-        current_function_builder_->Emit(kExprI32And);
-        byte code[] = {WASM_I8(~(size - 1))};
-        current_function_builder_->EmitCode(code, sizeof(code));
-        RECURSE(Visit(binop->left()));
-        return;
-      }
+    }
+
+    Literal* value = expr->key()->AsLiteral();
+    if (value) {
+      DCHECK(value->raw_value()->IsNumber());
+      DCHECK_EQ(kAstI32, TypeOf(value));
+      int32_t val = static_cast<int32_t>(value->raw_value()->AsNumber());
+      // TODO(titzer): handle overflow here.
+      current_function_builder_->EmitI32Const(val * size);
+      return;
+    }
+    BinaryOperation* binop = expr->key()->AsBinaryOperation();
+    if (binop) {
+      DCHECK_EQ(Token::SAR, binop->op());
+      DCHECK(binop->right()->AsLiteral()->raw_value()->IsNumber());
+      DCHECK(kAstI32 == TypeOf(binop->right()->AsLiteral()));
+      DCHECK_EQ(size,
+                1 << static_cast<int>(
+                    binop->right()->AsLiteral()->raw_value()->AsNumber()));
+      // Mask bottom bits to match asm.js behavior.
+      byte mask = static_cast<byte>(~(size - 1));
+      RECURSE(Visit(binop->left()));
+      current_function_builder_->EmitWithU8(kExprI8Const, mask);
+      current_function_builder_->Emit(kExprI32And);
+      return;
     }
     UNREACHABLE();
   }
 
+  void VisitProperty(Property* expr) {
+    MachineType type;
+    VisitPropertyAndEmitIndex(expr, &type);
+    WasmOpcode opcode;
+    if (type == MachineType::Int8()) {
+      opcode = kExprI32AsmjsLoadMem8S;
+    } else if (type == MachineType::Uint8()) {
+      opcode = kExprI32AsmjsLoadMem8U;
+    } else if (type == MachineType::Int16()) {
+      opcode = kExprI32AsmjsLoadMem16S;
+    } else if (type == MachineType::Uint16()) {
+      opcode = kExprI32AsmjsLoadMem16U;
+    } else if (type == MachineType::Int32()) {
+      opcode = kExprI32AsmjsLoadMem;
+    } else if (type == MachineType::Uint32()) {
+      opcode = kExprI32AsmjsLoadMem;
+    } else if (type == MachineType::Float32()) {
+      opcode = kExprF32AsmjsLoadMem;
+    } else if (type == MachineType::Float64()) {
+      opcode = kExprF64AsmjsLoadMem;
+    } else {
+      UNREACHABLE();
+    }
+
+    current_function_builder_->Emit(opcode);
+  }
+
   bool VisitStdlibFunction(Call* call, VariableProxy* expr) {
     Variable* var = expr->var();
     AsmTyper::StandardMember standard_object =
         typer_->VariableAsStandardMember(var);
     ZoneList<Expression*>* args = call->arguments();
     LocalType call_type = TypeOf(call);
+
     switch (standard_object) {
       case AsmTyper::kNone: {
         return false;
       }
       case AsmTyper::kMathAcos: {
+        VisitCallArgs(call);
         DCHECK_EQ(kAstF64, call_type);
         current_function_builder_->Emit(kExprF64Acos);
         break;
       }
       case AsmTyper::kMathAsin: {
+        VisitCallArgs(call);
         DCHECK_EQ(kAstF64, call_type);
         current_function_builder_->Emit(kExprF64Asin);
         break;
       }
       case AsmTyper::kMathAtan: {
+        VisitCallArgs(call);
         DCHECK_EQ(kAstF64, call_type);
         current_function_builder_->Emit(kExprF64Atan);
         break;
       }
       case AsmTyper::kMathCos: {
+        VisitCallArgs(call);
         DCHECK_EQ(kAstF64, call_type);
         current_function_builder_->Emit(kExprF64Cos);
         break;
       }
       case AsmTyper::kMathSin: {
+        VisitCallArgs(call);
         DCHECK_EQ(kAstF64, call_type);
         current_function_builder_->Emit(kExprF64Sin);
         break;
       }
       case AsmTyper::kMathTan: {
+        VisitCallArgs(call);
         DCHECK_EQ(kAstF64, call_type);
         current_function_builder_->Emit(kExprF64Tan);
         break;
       }
       case AsmTyper::kMathExp: {
+        VisitCallArgs(call);
         DCHECK_EQ(kAstF64, call_type);
         current_function_builder_->Emit(kExprF64Exp);
         break;
       }
       case AsmTyper::kMathLog: {
+        VisitCallArgs(call);
         DCHECK_EQ(kAstF64, call_type);
         current_function_builder_->Emit(kExprF64Log);
         break;
       }
       case AsmTyper::kMathCeil: {
+        VisitCallArgs(call);
         if (call_type == kAstF32) {
           current_function_builder_->Emit(kExprF32Ceil);
         } else if (call_type == kAstF64) {
@@ -900,6 +1052,7 @@
         break;
       }
       case AsmTyper::kMathFloor: {
+        VisitCallArgs(call);
         if (call_type == kAstF32) {
           current_function_builder_->Emit(kExprF32Floor);
         } else if (call_type == kAstF64) {
@@ -910,6 +1063,7 @@
         break;
       }
       case AsmTyper::kMathSqrt: {
+        VisitCallArgs(call);
         if (call_type == kAstF32) {
           current_function_builder_->Emit(kExprF32Sqrt);
         } else if (call_type == kAstF64) {
@@ -920,19 +1074,33 @@
         break;
       }
       case AsmTyper::kMathAbs: {
-        // TODO(bradnelson): Should this be cast to float?
         if (call_type == kAstI32) {
-          current_function_builder_->Emit(kExprIfElse);
-          current_function_builder_->Emit(kExprI32LtS);
-          Visit(args->at(0));
+          uint32_t tmp = current_function_builder_->AddLocal(kAstI32);
+
+          // if set_local(tmp, x) < 0
+          Visit(call->arguments()->at(0));
+          current_function_builder_->EmitSetLocal(tmp);
           byte code[] = {WASM_I8(0)};
           current_function_builder_->EmitCode(code, sizeof(code));
-          current_function_builder_->Emit(kExprI32Sub);
+          current_function_builder_->Emit(kExprI32LtS);
+          current_function_builder_->Emit(kExprIf);
+
+          // then (0 - tmp)
           current_function_builder_->EmitCode(code, sizeof(code));
-          Visit(args->at(0));
+          current_function_builder_->EmitGetLocal(tmp);
+          current_function_builder_->Emit(kExprI32Sub);
+
+          // else tmp
+          current_function_builder_->Emit(kExprElse);
+          current_function_builder_->EmitGetLocal(tmp);
+          // end
+          current_function_builder_->Emit(kExprEnd);
+
         } else if (call_type == kAstF32) {
+          VisitCallArgs(call);
           current_function_builder_->Emit(kExprF32Abs);
         } else if (call_type == kAstF64) {
+          VisitCallArgs(call);
           current_function_builder_->Emit(kExprF64Abs);
         } else {
           UNREACHABLE();
@@ -942,13 +1110,32 @@
       case AsmTyper::kMathMin: {
         // TODO(bradnelson): Change wasm to match Math.min in asm.js mode.
         if (call_type == kAstI32) {
-          current_function_builder_->Emit(kExprIfElse);
+          uint32_t tmp_x = current_function_builder_->AddLocal(kAstI32);
+          uint32_t tmp_y = current_function_builder_->AddLocal(kAstI32);
+
+          // if set_local(tmp_x, x) < set_local(tmp_y, y)
+          Visit(call->arguments()->at(0));
+          current_function_builder_->EmitSetLocal(tmp_x);
+
+          Visit(call->arguments()->at(1));
+          current_function_builder_->EmitSetLocal(tmp_y);
+
           current_function_builder_->Emit(kExprI32LeS);
-          Visit(args->at(0));
-          Visit(args->at(1));
+          current_function_builder_->Emit(kExprIf);
+
+          // then tmp_x
+          current_function_builder_->EmitGetLocal(tmp_x);
+
+          // else tmp_y
+          current_function_builder_->Emit(kExprElse);
+          current_function_builder_->EmitGetLocal(tmp_y);
+          current_function_builder_->Emit(kExprEnd);
+
         } else if (call_type == kAstF32) {
+          VisitCallArgs(call);
           current_function_builder_->Emit(kExprF32Min);
         } else if (call_type == kAstF64) {
+          VisitCallArgs(call);
           current_function_builder_->Emit(kExprF64Min);
         } else {
           UNREACHABLE();
@@ -958,13 +1145,33 @@
       case AsmTyper::kMathMax: {
         // TODO(bradnelson): Change wasm to match Math.max in asm.js mode.
         if (call_type == kAstI32) {
-          current_function_builder_->Emit(kExprIfElse);
-          current_function_builder_->Emit(kExprI32GtS);
-          Visit(args->at(0));
-          Visit(args->at(1));
+          uint32_t tmp_x = current_function_builder_->AddLocal(kAstI32);
+          uint32_t tmp_y = current_function_builder_->AddLocal(kAstI32);
+
+          // if set_local(tmp_x, x) < set_local(tmp_y, y)
+          Visit(call->arguments()->at(0));
+
+          current_function_builder_->EmitSetLocal(tmp_x);
+
+          Visit(call->arguments()->at(1));
+          current_function_builder_->EmitSetLocal(tmp_y);
+
+          current_function_builder_->Emit(kExprI32LeS);
+          current_function_builder_->Emit(kExprIf);
+
+          // then tmp_y
+          current_function_builder_->EmitGetLocal(tmp_y);
+
+          // else tmp_x
+          current_function_builder_->Emit(kExprElse);
+          current_function_builder_->EmitGetLocal(tmp_x);
+          current_function_builder_->Emit(kExprEnd);
+
         } else if (call_type == kAstF32) {
+          VisitCallArgs(call);
           current_function_builder_->Emit(kExprF32Max);
         } else if (call_type == kAstF64) {
+          VisitCallArgs(call);
           current_function_builder_->Emit(kExprF64Max);
         } else {
           UNREACHABLE();
@@ -972,16 +1179,19 @@
         break;
       }
       case AsmTyper::kMathAtan2: {
+        VisitCallArgs(call);
         DCHECK_EQ(kAstF64, call_type);
         current_function_builder_->Emit(kExprF64Atan2);
         break;
       }
       case AsmTyper::kMathPow: {
+        VisitCallArgs(call);
         DCHECK_EQ(kAstF64, call_type);
         current_function_builder_->Emit(kExprF64Pow);
         break;
       }
       case AsmTyper::kMathImul: {
+        VisitCallArgs(call);
         current_function_builder_->Emit(kExprI32Mul);
         break;
       }
@@ -989,6 +1199,7 @@
         DCHECK(args->length() == 1);
         Literal* literal = args->at(0)->AsLiteral();
         if (literal != nullptr) {
+          // constant fold Math.fround(#const);
           if (literal->raw_value()->IsNumber()) {
             float val = static_cast<float>(literal->raw_value()->AsNumber());
             byte code[] = {WASM_F32(val)};
@@ -996,6 +1207,7 @@
             return true;
           }
         }
+        VisitCallArgs(call);
         switch (TypeIndexOf(args->at(0))) {
           case kInt32:
           case kFixnum:
@@ -1019,7 +1231,6 @@
         break;
       }
     }
-    VisitCallArgs(call);
     return true;
   }
 
@@ -1035,17 +1246,17 @@
     Call::CallType call_type = expr->GetCallType(isolate_);
     switch (call_type) {
       case Call::OTHER_CALL: {
-        DCHECK(in_function_);
+        DCHECK_EQ(kFuncScope, scope_);
         VariableProxy* proxy = expr->expression()->AsVariableProxy();
         if (proxy != nullptr) {
           if (VisitStdlibFunction(expr, proxy)) {
             return;
           }
         }
-        uint16_t index;
+        uint32_t index;
         VariableProxy* vp = expr->expression()->AsVariableProxy();
         if (vp != nullptr &&
-            Type::Any()->Is(vp->bounds().lower->AsFunction()->Result())) {
+            Type::Any()->Is(bounds_->get(vp).lower->AsFunction()->Result())) {
           LocalType return_type = TypeOf(expr);
           ZoneList<Expression*>* args = expr->arguments();
           FunctionSig::Builder sig(zone(), return_type == kAstStmt ? 0 : 1,
@@ -1058,35 +1269,38 @@
           }
           index =
               imported_function_table_.GetFunctionIndex(vp->var(), sig.Build());
+          VisitCallArgs(expr);
+          current_function_builder_->Emit(kExprCallImport);
+          current_function_builder_->EmitVarInt(expr->arguments()->length());
+          current_function_builder_->EmitVarInt(index);
         } else {
           index = LookupOrInsertFunction(vp->var());
+          VisitCallArgs(expr);
+          current_function_builder_->Emit(kExprCallFunction);
+          current_function_builder_->EmitVarInt(expr->arguments()->length());
+          current_function_builder_->EmitVarInt(index);
         }
-        current_function_builder_->Emit(kExprCallFunction);
-        std::vector<uint8_t> index_arr = UnsignedLEB128From(index);
-        current_function_builder_->EmitCode(
-            &index_arr[0], static_cast<uint32_t>(index_arr.size()));
         break;
       }
       case Call::KEYED_PROPERTY_CALL: {
-        DCHECK(in_function_);
+        DCHECK_EQ(kFuncScope, scope_);
         Property* p = expr->expression()->AsProperty();
         DCHECK_NOT_NULL(p);
         VariableProxy* var = p->obj()->AsVariableProxy();
         DCHECK_NOT_NULL(var);
         FunctionTableIndices* indices = LookupFunctionTable(var->var());
-        current_function_builder_->EmitWithVarInt(kExprCallIndirect,
-                                                  indices->signature_index);
-        current_function_builder_->Emit(kExprI32Add);
-        // TODO(bradnelson): variable size
-        byte code[] = {WASM_I32V(indices->start_index)};
-        current_function_builder_->EmitCode(code, sizeof(code));
         RECURSE(Visit(p->key()));
+        current_function_builder_->EmitI32Const(indices->start_index);
+        current_function_builder_->Emit(kExprI32Add);
+        VisitCallArgs(expr);
+        current_function_builder_->Emit(kExprCallIndirect);
+        current_function_builder_->EmitVarInt(expr->arguments()->length());
+        current_function_builder_->EmitVarInt(indices->signature_index);
         break;
       }
       default:
         UNREACHABLE();
     }
-    VisitCallArgs(expr);
   }
 
   void VisitCallNew(CallNew* expr) { UNREACHABLE(); }
@@ -1094,6 +1308,7 @@
   void VisitCallRuntime(CallRuntime* expr) { UNREACHABLE(); }
 
   void VisitUnaryOperation(UnaryOperation* expr) {
+    RECURSE(Visit(expr->expression()));
     switch (expr->op()) {
       case Token::NOT: {
         DCHECK_EQ(kAstI32, TypeOf(expr->expression()));
@@ -1103,7 +1318,6 @@
       default:
         UNREACHABLE();
     }
-    RECURSE(Visit(expr->expression()));
   }
 
   void VisitCountOperation(CountOperation* expr) { UNREACHABLE(); }
@@ -1207,9 +1421,6 @@
 #ifdef Mul
 #undef Mul
 #endif
-#ifdef Div
-#undef Div
-#endif
 
 #define NON_SIGNED_BINOP(op)      \
   static WasmOpcode opcodes[] = { \
@@ -1249,6 +1460,7 @@
   void VisitBinaryOperation(BinaryOperation* expr) {
     ConvertOperation convertOperation = MatchBinaryOperation(expr);
     if (convertOperation == kToDouble) {
+      RECURSE(Visit(expr->left()));
       TypeIndex type = TypeIndexOf(expr->left());
       if (type == kInt32 || type == kFixnum) {
         current_function_builder_->Emit(kExprF64SConvertI32);
@@ -1259,37 +1471,53 @@
       } else {
         UNREACHABLE();
       }
-      RECURSE(Visit(expr->left()));
     } else if (convertOperation == kToInt) {
+      RECURSE(Visit(GetLeft(expr)));
       TypeIndex type = TypeIndexOf(GetLeft(expr));
       if (type == kFloat32) {
-        current_function_builder_->Emit(kExprI32SConvertF32);
+        current_function_builder_->Emit(kExprI32AsmjsSConvertF32);
       } else if (type == kFloat64) {
-        current_function_builder_->Emit(kExprI32SConvertF64);
+        current_function_builder_->Emit(kExprI32AsmjsSConvertF64);
       } else {
         UNREACHABLE();
       }
-      RECURSE(Visit(GetLeft(expr)));
     } else if (convertOperation == kAsIs) {
       RECURSE(Visit(GetLeft(expr)));
     } else {
+      if (expr->op() == Token::COMMA) {
+        current_function_builder_->Emit(kExprBlock);
+      }
+
+      RECURSE(Visit(expr->left()));
+      RECURSE(Visit(expr->right()));
+
+      if (expr->op() == Token::COMMA) {
+        current_function_builder_->Emit(kExprEnd);
+      }
+
       switch (expr->op()) {
         BINOP_CASE(Token::ADD, Add, NON_SIGNED_BINOP, true);
         BINOP_CASE(Token::SUB, Sub, NON_SIGNED_BINOP, true);
         BINOP_CASE(Token::MUL, Mul, NON_SIGNED_BINOP, true);
-        BINOP_CASE(Token::DIV, Div, SIGNED_BINOP, false);
         BINOP_CASE(Token::BIT_OR, Ior, NON_SIGNED_INT_BINOP, true);
         BINOP_CASE(Token::BIT_AND, And, NON_SIGNED_INT_BINOP, true);
         BINOP_CASE(Token::BIT_XOR, Xor, NON_SIGNED_INT_BINOP, true);
         BINOP_CASE(Token::SHL, Shl, NON_SIGNED_INT_BINOP, true);
         BINOP_CASE(Token::SAR, ShrS, NON_SIGNED_INT_BINOP, true);
         BINOP_CASE(Token::SHR, ShrU, NON_SIGNED_INT_BINOP, true);
+        case Token::DIV: {
+          static WasmOpcode opcodes[] = {kExprI32AsmjsDivS, kExprI32AsmjsDivU,
+                                         kExprF32Div, kExprF64Div};
+          int type = TypeIndexOf(expr->left(), expr->right(), false);
+          current_function_builder_->Emit(opcodes[type]);
+          break;
+        }
         case Token::MOD: {
           TypeIndex type = TypeIndexOf(expr->left(), expr->right(), false);
           if (type == kInt32) {
-            current_function_builder_->Emit(kExprI32RemS);
+            current_function_builder_->Emit(kExprI32AsmjsRemS);
           } else if (type == kUint32) {
-            current_function_builder_->Emit(kExprI32RemU);
+            current_function_builder_->Emit(kExprI32AsmjsRemU);
           } else if (type == kFloat64) {
             current_function_builder_->Emit(kExprF64Mod);
             return;
@@ -1299,31 +1527,17 @@
           break;
         }
         case Token::COMMA: {
-          current_function_builder_->EmitWithVarInt(kExprBlock, 2);
           break;
         }
         default:
           UNREACHABLE();
       }
-      RECURSE(Visit(expr->left()));
-      RECURSE(Visit(expr->right()));
-    }
-  }
-
-  void AddLeb128(uint32_t index, bool is_local) {
-    std::vector<uint8_t> index_vec = UnsignedLEB128From(index);
-    if (is_local) {
-      uint32_t pos_of_index[1] = {0};
-      current_function_builder_->EmitCode(
-          &index_vec[0], static_cast<uint32_t>(index_vec.size()), pos_of_index,
-          1);
-    } else {
-      current_function_builder_->EmitCode(
-          &index_vec[0], static_cast<uint32_t>(index_vec.size()));
     }
   }
 
   void VisitCompareOperation(CompareOperation* expr) {
+    RECURSE(Visit(expr->left()));
+    RECURSE(Visit(expr->right()));
     switch (expr->op()) {
       BINOP_CASE(Token::EQ, Eq, NON_SIGNED_BINOP, false);
       BINOP_CASE(Token::LT, Lt, SIGNED_BINOP, false);
@@ -1333,8 +1547,6 @@
       default:
         UNREACHABLE();
     }
-    RECURSE(Visit(expr->left()));
-    RECURSE(Visit(expr->right()));
   }
 
 #undef BINOP_CASE
@@ -1369,8 +1581,8 @@
   }
 
   TypeIndex TypeIndexOf(Expression* expr) {
-    DCHECK_EQ(expr->bounds().lower, expr->bounds().upper);
-    Type* type = expr->bounds().lower;
+    DCHECK_EQ(bounds_->get(expr).lower, bounds_->get(expr).upper);
+    Type* type = bounds_->get(expr).lower;
     if (type->Is(cache_.kAsmFixnum)) {
       return kFixnum;
     } else if (type->Is(cache_.kAsmSigned)) {
@@ -1422,20 +1634,17 @@
   void VisitRewritableExpression(RewritableExpression* expr) { UNREACHABLE(); }
 
   struct IndexContainer : public ZoneObject {
-    uint16_t index;
+    uint32_t index;
   };
 
-  uint16_t LookupOrInsertLocal(Variable* v, LocalType type) {
+  uint32_t LookupOrInsertLocal(Variable* v, LocalType type) {
     DCHECK_NOT_NULL(current_function_builder_);
     ZoneHashMap::Entry* entry =
         local_variables_.Lookup(v, ComputePointerHash(v));
     if (entry == nullptr) {
-      uint16_t index;
-      if (v->IsParameter()) {
-        index = current_function_builder_->AddParam(type);
-      } else {
-        index = current_function_builder_->AddLocal(type);
-      }
+      uint32_t index;
+      DCHECK(!v->IsParameter());
+      index = current_function_builder_->AddLocal(type);
       IndexContainer* container = new (zone()) IndexContainer();
       container->index = index;
       entry = local_variables_.LookupOrInsert(v, ComputePointerHash(v),
@@ -1445,11 +1654,24 @@
     return (reinterpret_cast<IndexContainer*>(entry->value))->index;
   }
 
-  uint16_t LookupOrInsertGlobal(Variable* v, LocalType type) {
+  void InsertParameter(Variable* v, LocalType type, uint32_t index) {
+    DCHECK(v->IsParameter());
+    DCHECK_NOT_NULL(current_function_builder_);
+    ZoneHashMap::Entry* entry =
+        local_variables_.Lookup(v, ComputePointerHash(v));
+    DCHECK_NULL(entry);
+    IndexContainer* container = new (zone()) IndexContainer();
+    container->index = index;
+    entry = local_variables_.LookupOrInsert(v, ComputePointerHash(v),
+                                            ZoneAllocationPolicy(zone()));
+    entry->value = container;
+  }
+
+  uint32_t LookupOrInsertGlobal(Variable* v, LocalType type) {
     ZoneHashMap::Entry* entry =
         global_variables_.Lookup(v, ComputePointerHash(v));
     if (entry == nullptr) {
-      uint16_t index =
+      uint32_t index =
           builder_->AddGlobal(WasmOpcodes::MachineTypeFor(type), 0);
       IndexContainer* container = new (zone()) IndexContainer();
       container->index = index;
@@ -1460,11 +1682,11 @@
     return (reinterpret_cast<IndexContainer*>(entry->value))->index;
   }
 
-  uint16_t LookupOrInsertFunction(Variable* v) {
+  uint32_t LookupOrInsertFunction(Variable* v) {
     DCHECK_NOT_NULL(builder_);
     ZoneHashMap::Entry* entry = functions_.Lookup(v, ComputePointerHash(v));
     if (entry == nullptr) {
-      uint16_t index = builder_->AddFunction();
+      uint32_t index = builder_->AddFunction();
       IndexContainer* container = new (zone()) IndexContainer();
       container->index = index;
       entry = functions_.LookupOrInsert(v, ComputePointerHash(v),
@@ -1475,8 +1697,8 @@
   }
 
   LocalType TypeOf(Expression* expr) {
-    DCHECK_EQ(expr->bounds().lower, expr->bounds().upper);
-    return TypeFrom(expr->bounds().lower);
+    DCHECK_EQ(bounds_->get(expr).lower, bounds_->get(expr).upper);
+    return TypeFrom(bounds_->get(expr).lower);
   }
 
   LocalType TypeFrom(Type* type) {
@@ -1496,9 +1718,7 @@
   ZoneHashMap local_variables_;
   ZoneHashMap functions_;
   ZoneHashMap global_variables_;
-  bool in_function_;
-  bool is_set_op_;
-  bool marking_exported;
+  AsmScope scope_;
   WasmModuleBuilder* builder_;
   WasmFunctionBuilder* current_function_builder_;
   FunctionLiteral* literal_;
@@ -1508,11 +1728,11 @@
   AsmTyper* typer_;
   TypeCache const& cache_;
   ZoneVector<std::pair<BreakableStatement*, bool>> breakable_blocks_;
-  int block_size_;
-  uint16_t init_function_index_;
+  uint32_t init_function_index_;
   uint32_t next_table_index_;
   ZoneHashMap function_tables_;
   ImportedFunctionTable imported_function_table_;
+  const AstTypeBounds* bounds_;
 
   DEFINE_AST_VISITOR_SUBCLASS_MEMBERS();
 
diff --git a/src/wasm/ast-decoder.cc b/src/wasm/ast-decoder.cc
index e2f6a04..b8a86c3 100644
--- a/src/wasm/ast-decoder.cc
+++ b/src/wasm/ast-decoder.cc
@@ -42,17 +42,6 @@
   WasmOpcode opcode() const { return static_cast<WasmOpcode>(*pc); }
 };
 
-// A production represents an incomplete decoded tree in the LR decoder.
-struct Production {
-  Tree* tree;  // the root of the syntax tree.
-  int index;   // the current index into the children of the tree.
-
-  WasmOpcode opcode() const { return static_cast<WasmOpcode>(*pc()); }
-  const byte* pc() const { return tree->pc; }
-  bool done() const { return index >= static_cast<int>(tree->count); }
-  Tree* last() const { return index > 0 ? tree->children[index - 1] : nullptr; }
-};
-
 // An SsaEnv environment carries the current local variable renaming
 // as well as the current effect and control dependency in the TF graph.
 // It maintains a control state that tracks whether the environment
@@ -72,19 +61,30 @@
     control = nullptr;
     effect = nullptr;
   }
+  void SetNotMerged() {
+    if (state == kMerged) state = kReached;
+  }
 };
 
-// An entry in the stack of blocks during decoding.
-struct Block {
-  SsaEnv* ssa_env;  // SSA renaming environment.
-  int stack_depth;  // production stack depth.
+// An entry on the value stack.
+struct Value {
+  const byte* pc;
+  TFNode* node;
+  LocalType type;
 };
 
-// An entry in the stack of ifs during decoding.
-struct IfEnv {
-  SsaEnv* false_env;
-  SsaEnv* merge_env;
-  SsaEnv** case_envs;
+// An entry on the control stack (i.e. if, block, loop).
+struct Control {
+  const byte* pc;
+  int stack_depth;    // stack height at the beginning of the construct.
+  SsaEnv* end_env;    // end environment for the construct.
+  SsaEnv* false_env;  // false environment (only for if).
+  TFNode* node;       // result node for the construct.
+  LocalType type;     // result type for the construct.
+  bool is_loop;       // true if this is the inner label of a loop.
+
+  bool is_if() { return *pc == kExprIf; }
+  bool is_block() { return *pc == kExprBlock; }
 };
 
 // Macros that build nodes only if there is a graph and the current SSA
@@ -157,30 +157,50 @@
     return false;
   }
 
-  inline bool Validate(const byte* pc, FunctionIndexOperand& operand) {
+  inline bool Validate(const byte* pc, CallFunctionOperand& operand) {
     ModuleEnv* m = module_;
     if (m && m->module && operand.index < m->module->functions.size()) {
       operand.sig = m->module->functions[operand.index].sig;
+      uint32_t expected = static_cast<uint32_t>(operand.sig->parameter_count());
+      if (operand.arity != expected) {
+        error(pc, pc + 1,
+              "arity mismatch in direct function call (expected %u, got %u)",
+              expected, operand.arity);
+        return false;
+      }
       return true;
     }
     error(pc, pc + 1, "invalid function index");
     return false;
   }
 
-  inline bool Validate(const byte* pc, SignatureIndexOperand& operand) {
+  inline bool Validate(const byte* pc, CallIndirectOperand& operand) {
     ModuleEnv* m = module_;
     if (m && m->module && operand.index < m->module->signatures.size()) {
       operand.sig = m->module->signatures[operand.index];
+      uint32_t expected = static_cast<uint32_t>(operand.sig->parameter_count());
+      if (operand.arity != expected) {
+        error(pc, pc + 1,
+              "arity mismatch in indirect function call (expected %u, got %u)",
+              expected, operand.arity);
+        return false;
+      }
       return true;
     }
     error(pc, pc + 1, "invalid signature index");
     return false;
   }
 
-  inline bool Validate(const byte* pc, ImportIndexOperand& operand) {
+  inline bool Validate(const byte* pc, CallImportOperand& operand) {
     ModuleEnv* m = module_;
     if (m && m->module && operand.index < m->module->import_table.size()) {
       operand.sig = m->module->import_table[operand.index].sig;
+      uint32_t expected = static_cast<uint32_t>(operand.sig->parameter_count());
+      if (operand.arity != expected) {
+        error(pc, pc + 1, "arity mismatch in import call (expected %u, got %u)",
+              expected, operand.arity);
+        return false;
+      }
       return true;
     }
     error(pc, pc + 1, "invalid signature index");
@@ -188,9 +208,13 @@
   }
 
   inline bool Validate(const byte* pc, BreakDepthOperand& operand,
-                       ZoneVector<Block>& blocks) {
-    if (operand.depth < blocks.size()) {
-      operand.target = &blocks[blocks.size() - operand.depth - 1];
+                       ZoneVector<Control>& control) {
+    if (operand.arity > 1) {
+      error(pc, pc + 1, "invalid arity for br or br_if");
+      return false;
+    }
+    if (operand.depth < control.size()) {
+      operand.target = &control[control.size() - operand.depth - 1];
       return true;
     }
     error(pc, pc + 1, "invalid break depth");
@@ -199,6 +223,10 @@
 
   bool Validate(const byte* pc, BranchTableOperand& operand,
                 size_t block_depth) {
+    if (operand.arity > 1) {
+      error(pc, pc + 1, "invalid arity for break");
+      return false;
+    }
     // Verify table.
     for (uint32_t i = 0; i < operand.table_count + 1; i++) {
       uint32_t target = operand.read_entry(this, i);
@@ -229,46 +257,49 @@
       case kExprLoadGlobal:
       case kExprNop:
       case kExprUnreachable:
+      case kExprEnd:
+      case kExprBlock:
+      case kExprLoop:
         return 0;
 
-      case kExprBr:
       case kExprStoreGlobal:
       case kExprSetLocal:
+      case kExprElse:
         return 1;
 
+      case kExprBr: {
+        BreakDepthOperand operand(this, pc);
+        return operand.arity;
+      }
+      case kExprBrIf: {
+        BreakDepthOperand operand(this, pc);
+        return 1 + operand.arity;
+      }
+      case kExprBrTable: {
+        BranchTableOperand operand(this, pc);
+        return 1 + operand.arity;
+      }
+
       case kExprIf:
-      case kExprBrIf:
-        return 2;
-      case kExprIfElse:
+        return 1;
       case kExprSelect:
         return 3;
 
-      case kExprBlock:
-      case kExprLoop: {
-        BlockCountOperand operand(this, pc);
-        return operand.count;
-      }
-
       case kExprCallFunction: {
-        FunctionIndexOperand operand(this, pc);
-        return static_cast<int>(
-            module_->GetFunctionSignature(operand.index)->parameter_count());
+        CallFunctionOperand operand(this, pc);
+        return operand.arity;
       }
       case kExprCallIndirect: {
-        SignatureIndexOperand operand(this, pc);
-        return 1 + static_cast<int>(
-                       module_->GetSignature(operand.index)->parameter_count());
+        CallIndirectOperand operand(this, pc);
+        return 1 + operand.arity;
       }
       case kExprCallImport: {
-        ImportIndexOperand operand(this, pc);
-        return static_cast<int>(
-            module_->GetImportSignature(operand.index)->parameter_count());
+        CallImportOperand operand(this, pc);
+        return operand.arity;
       }
       case kExprReturn: {
-        return static_cast<int>(sig_->return_count());
-      }
-      case kExprBrTable: {
-        return 1;
+        ReturnArityOperand operand(this, pc);
+        return operand.arity;
       }
 
 #define DECLARE_OPCODE_CASE(name, opcode, sig) \
@@ -281,7 +312,6 @@
         FOREACH_SIMPLE_OPCODE(DECLARE_OPCODE_CASE)
         FOREACH_ASMJS_COMPAT_OPCODE(DECLARE_OPCODE_CASE)
 #undef DECLARE_OPCODE_CASE
-      case kExprDeclLocals:
       default:
         UNREACHABLE();
         return 0;
@@ -298,11 +328,6 @@
         MemoryAccessOperand operand(this, pc);
         return 1 + operand.length;
       }
-      case kExprBlock:
-      case kExprLoop: {
-        BlockCountOperand operand(this, pc);
-        return 1 + operand.length;
-      }
       case kExprBr:
       case kExprBrIf: {
         BreakDepthOperand operand(this, pc);
@@ -315,15 +340,15 @@
       }
 
       case kExprCallFunction: {
-        FunctionIndexOperand operand(this, pc);
+        CallFunctionOperand operand(this, pc);
         return 1 + operand.length;
       }
       case kExprCallIndirect: {
-        SignatureIndexOperand operand(this, pc);
+        CallIndirectOperand operand(this, pc);
         return 1 + operand.length;
       }
       case kExprCallImport: {
-        ImportIndexOperand operand(this, pc);
+        CallImportOperand operand(this, pc);
         return 1 + operand.length;
       }
 
@@ -350,6 +375,10 @@
         return 5;
       case kExprF64Const:
         return 9;
+      case kExprReturn: {
+        ReturnArityOperand operand(this, pc);
+        return 1 + operand.length;
+      }
 
       default:
         return 1;
@@ -357,7 +386,6 @@
   }
 };
 
-
 // A shift-reduce-parser strategy for decoding Wasm code that uses an explicit
 // shift-reduce strategy with multiple internal stacks.
 class SR_WasmDecoder : public WasmDecoder {
@@ -368,55 +396,62 @@
         builder_(builder),
         base_(body.base),
         local_type_vec_(zone),
-        trees_(zone),
         stack_(zone),
-        blocks_(zone),
-        ifs_(zone) {
+        control_(zone) {
     local_types_ = &local_type_vec_;
   }
 
-  TreeResult Decode() {
+  bool Decode() {
+    base::ElapsedTimer decode_timer;
+    if (FLAG_trace_wasm_decode_time) {
+      decode_timer.Start();
+    }
+    stack_.clear();
+    control_.clear();
+
     if (end_ < pc_) {
       error(pc_, "function body end < start");
-      return result_;
+      return false;
     }
 
     DecodeLocalDecls();
     InitSsaEnv();
     DecodeFunctionBody();
 
-    Tree* tree = nullptr;
-    if (ok()) {
-      if (ssa_env_->go()) {
-        if (stack_.size() > 0) {
-          error(stack_.back().pc(), end_, "fell off end of code");
-        }
-        AddImplicitReturnAtEnd();
-      }
-      if (trees_.size() == 0) {
-        if (sig_->return_count() > 0) {
-          error(start_, "no trees created");
-        }
-      } else {
-        tree = trees_[0];
-      }
+    if (failed()) return TraceFailed();
+
+    if (!control_.empty()) {
+      error(pc_, control_.back().pc, "unterminated control structure");
+      return TraceFailed();
     }
 
-    if (ok()) {
-      TRACE("wasm-decode ok\n");
+    if (ssa_env_->go()) {
+      TRACE("  @%-6d #xx:%-20s|", startrel(pc_), "ImplicitReturn");
+      DoReturn();
+      if (failed()) return TraceFailed();
+      TRACE("\n");
+    }
+
+    if (FLAG_trace_wasm_decode_time) {
+      double ms = decode_timer.Elapsed().InMillisecondsF();
+      PrintF("wasm-decode ok (%0.3f ms)\n\n", ms);
     } else {
-      TRACE("wasm-error module+%-6d func+%d: %s\n\n", baserel(error_pc_),
-            startrel(error_pc_), error_msg_.get());
+      TRACE("wasm-decode ok\n\n");
     }
 
-    return toResult(tree);
+    return true;
+  }
+
+  bool TraceFailed() {
+    TRACE("wasm-error module+%-6d func+%d: %s\n\n", baserel(error_pc_),
+          startrel(error_pc_), error_msg_.get());
+    return false;
   }
 
   bool DecodeLocalDecls(AstLocalDecls& decls) {
     DecodeLocalDecls();
     if (failed()) return false;
     decls.decls_encoded_size = pc_offset();
-    decls.total_local_count = 0;
     decls.local_types.reserve(local_type_vec_.size());
     for (size_t pos = 0; pos < local_type_vec_.size();) {
       uint32_t count = 0;
@@ -425,9 +460,9 @@
         pos++;
         count++;
       }
-      decls.total_local_count += count;
       decls.local_types.push_back(std::pair<LocalType, uint32_t>(type, count));
     }
+    decls.total_local_count = static_cast<uint32_t>(local_type_vec_.size());
     return true;
   }
 
@@ -448,15 +483,12 @@
   Zone* zone_;
   TFBuilder* builder_;
   const byte* base_;
-  TreeResult result_;
 
   SsaEnv* ssa_env_;
 
-  ZoneVector<LocalType> local_type_vec_;
-  ZoneVector<Tree*> trees_;
-  ZoneVector<Production> stack_;
-  ZoneVector<Block> blocks_;
-  ZoneVector<IfEnv> ifs_;
+  ZoneVector<LocalType> local_type_vec_;  // types of local variables.
+  ZoneVector<Value> stack_;               // stack of values.
+  ZoneVector<Control> control_;           // stack of blocks, loops, and ifs.
 
   inline bool build() { return builder_ && ssa_env_->go(); }
 
@@ -508,53 +540,6 @@
     }
   }
 
-  void Leaf(LocalType type, TFNode* node = nullptr) {
-    size_t size = sizeof(Tree);
-    Tree* tree = reinterpret_cast<Tree*>(zone_->New(size));
-    tree->type = type;
-    tree->count = 0;
-    tree->pc = pc_;
-    tree->node = node;
-    tree->children[0] = nullptr;
-    Reduce(tree);
-  }
-
-  void Shift(LocalType type, uint32_t count) {
-    size_t size =
-        sizeof(Tree) + (count == 0 ? 0 : ((count - 1) * sizeof(Tree*)));
-    Tree* tree = reinterpret_cast<Tree*>(zone_->New(size));
-    tree->type = type;
-    tree->count = count;
-    tree->pc = pc_;
-    tree->node = nullptr;
-    for (uint32_t i = 0; i < count; i++) tree->children[i] = nullptr;
-    if (count == 0) {
-      Production p = {tree, 0};
-      Reduce(&p);
-      Reduce(tree);
-    } else {
-      stack_.push_back({tree, 0});
-    }
-  }
-
-  void Reduce(Tree* tree) {
-    while (true) {
-      if (stack_.size() == 0) {
-        trees_.push_back(tree);
-        break;
-      }
-      Production* p = &stack_.back();
-      p->tree->children[p->index++] = tree;
-      Reduce(p);
-      if (p->done()) {
-        tree = p->tree;
-        stack_.pop_back();
-      } else {
-        break;
-      }
-    }
-  }
-
   char* indentation() {
     static const int kMaxIndent = 64;
     static char bytes[kMaxIndent + 1];
@@ -605,11 +590,11 @@
     total_locals_ = local_type_vec_.size();
   }
 
-  // Decodes the body of a function, producing reduced trees into {result}.
+  // Decodes the body of a function.
   void DecodeFunctionBody() {
-    TRACE("wasm-decode %p...%p (%d bytes) %s\n",
+    TRACE("wasm-decode %p...%p (module+%d, %d bytes) %s\n",
           reinterpret_cast<const void*>(start_),
-          reinterpret_cast<const void*>(limit_),
+          reinterpret_cast<const void*>(limit_), baserel(pc_),
           static_cast<int>(limit_ - start_), builder_ ? "graph building" : "");
 
     if (pc_ >= limit_) return;  // Nothing to do.
@@ -617,49 +602,45 @@
     while (true) {  // decoding loop.
       int len = 1;
       WasmOpcode opcode = static_cast<WasmOpcode>(*pc_);
-      TRACE("wasm-decode module+%-6d %s func+%d: 0x%02x %s\n", baserel(pc_),
-            indentation(), startrel(pc_), opcode,
-            WasmOpcodes::OpcodeName(opcode));
+      TRACE("  @%-6d #%02x:%-20s|", startrel(pc_), opcode,
+            WasmOpcodes::ShortOpcodeName(opcode));
 
       FunctionSig* sig = WasmOpcodes::Signature(opcode);
       if (sig) {
-        // A simple expression with a fixed signature.
-        Shift(sig->GetReturn(), static_cast<uint32_t>(sig->parameter_count()));
-        pc_ += len;
-        if (pc_ >= limit_) {
-          // End of code reached or exceeded.
-          if (pc_ > limit_ && ok()) {
-            error("Beyond end of code");
+        // Fast case of a simple operator.
+        TFNode* node;
+        switch (sig->parameter_count()) {
+          case 1: {
+            Value val = Pop(0, sig->GetParam(0));
+            node = BUILD(Unop, opcode, val.node, position());
+            break;
           }
-          return;
+          case 2: {
+            Value rval = Pop(1, sig->GetParam(1));
+            Value lval = Pop(0, sig->GetParam(0));
+            node = BUILD(Binop, opcode, lval.node, rval.node, position());
+            break;
+          }
+          default:
+            UNREACHABLE();
+            node = nullptr;
+            break;
         }
-        continue;  // back to decoding loop.
-      }
-
-      switch (opcode) {
-        case kExprNop:
-          Leaf(kAstStmt);
-          break;
-        case kExprBlock: {
-          BlockCountOperand operand(this, pc_);
-          if (operand.count < 1) {
-            Leaf(kAstStmt);
-          } else {
-            Shift(kAstEnd, operand.count);
+        Push(GetReturnType(sig), node);
+      } else {
+        // Complex bytecode.
+        switch (opcode) {
+          case kExprNop:
+            Push(kAstStmt, nullptr);
+            break;
+          case kExprBlock: {
             // The break environment is the outer environment.
             SsaEnv* break_env = ssa_env_;
             PushBlock(break_env);
             SetEnv("block:start", Steal(break_env));
+            break;
           }
-          len = 1 + operand.length;
-          break;
-        }
-        case kExprLoop: {
-          BlockCountOperand operand(this, pc_);
-          if (operand.count < 1) {
-            Leaf(kAstStmt);
-          } else {
-            Shift(kAstEnd, operand.count);
+          case kExprLoop: {
             // The break environment is the outer environment.
             SsaEnv* break_env = ssa_env_;
             PushBlock(break_env);
@@ -667,268 +648,535 @@
             // The continue environment is the inner environment.
             PrepareForLoop(pc_, cont_env);
             SetEnv("loop:start", Split(cont_env));
-            if (ssa_env_->go()) ssa_env_->state = SsaEnv::kReached;
-            PushBlock(cont_env);
-            blocks_.back().stack_depth = -1;  // no production for inner block.
+            ssa_env_->SetNotMerged();
+            PushLoop(cont_env);
+            break;
           }
-          len = 1 + operand.length;
-          break;
-        }
-        case kExprIf:
-          Shift(kAstStmt, 2);
-          break;
-        case kExprIfElse:
-          Shift(kAstEnd, 3);  // Result type is typeof(x) in {c ? x : y}.
-          break;
-        case kExprSelect:
-          Shift(kAstStmt, 3);  // Result type is typeof(x) in {c ? x : y}.
-          break;
-        case kExprBr: {
-          BreakDepthOperand operand(this, pc_);
-          if (Validate(pc_, operand, blocks_)) {
-            Shift(kAstEnd, 1);
+          case kExprIf: {
+            // Condition on top of stack. Split environments for branches.
+            Value cond = Pop(0, kAstI32);
+            TFNode* if_true = nullptr;
+            TFNode* if_false = nullptr;
+            BUILD(Branch, cond.node, &if_true, &if_false);
+            SsaEnv* end_env = ssa_env_;
+            SsaEnv* false_env = Split(ssa_env_);
+            false_env->control = if_false;
+            SsaEnv* true_env = Steal(ssa_env_);
+            true_env->control = if_true;
+            PushIf(end_env, false_env);
+            SetEnv("if:true", true_env);
+            break;
           }
-          len = 1 + operand.length;
-          break;
-        }
-        case kExprBrIf: {
-          BreakDepthOperand operand(this, pc_);
-          if (Validate(pc_, operand, blocks_)) {
-            Shift(kAstStmt, 2);
+          case kExprElse: {
+            if (control_.empty()) {
+              error(pc_, "else does not match any if");
+              break;
+            }
+            Control* c = &control_.back();
+            if (!c->is_if()) {
+              error(pc_, c->pc, "else does not match an if");
+              break;
+            }
+            if (c->false_env == nullptr) {
+              error(pc_, c->pc, "else already present for if");
+              break;
+            }
+            Value val = PopUpTo(c->stack_depth);
+            MergeInto(c->end_env, &c->node, &c->type, val);
+            // Switch to environment for false branch.
+            SetEnv("if_else:false", c->false_env);
+            c->false_env = nullptr;  // record that an else is already seen
+            break;
           }
-          len = 1 + operand.length;
-          break;
-        }
-        case kExprBrTable: {
-          BranchTableOperand operand(this, pc_);
-          if (Validate(pc_, operand, blocks_.size())) {
-            Shift(kAstEnd, 1);
+          case kExprEnd: {
+            if (control_.empty()) {
+              error(pc_, "end does not match any if or block");
+              break;
+            }
+            const char* name = "block:end";
+            Control* c = &control_.back();
+            if (c->is_loop) {
+              // Loops always push control in pairs.
+              control_.pop_back();
+              c = &control_.back();
+              name = "loop:end";
+            }
+            Value val = PopUpTo(c->stack_depth);
+            if (c->is_if()) {
+              if (c->false_env != nullptr) {
+                // End the true branch of a one-armed if.
+                Goto(c->false_env, c->end_env);
+                val = {val.pc, nullptr, kAstStmt};
+                name = "if:merge";
+              } else {
+                // End the false branch of a two-armed if.
+                name = "if_else:merge";
+              }
+            }
+            if (ssa_env_->go()) {
+              MergeInto(c->end_env, &c->node, &c->type, val);
+            }
+            SetEnv(name, c->end_env);
+            stack_.resize(c->stack_depth);
+            Push(c->type, c->node);
+            control_.pop_back();
+            break;
           }
-          len = 1 + operand.length;
-          break;
-        }
-        case kExprReturn: {
-          int count = static_cast<int>(sig_->return_count());
-          if (count == 0) {
-            BUILD(Return, 0, builder_->Buffer(0));
-            ssa_env_->Kill();
-            Leaf(kAstEnd);
-          } else {
-            Shift(kAstEnd, count);
+          case kExprSelect: {
+            Value cond = Pop(2, kAstI32);
+            Value fval = Pop();
+            Value tval = Pop();
+            if (tval.type == kAstStmt || tval.type != fval.type) {
+              if (tval.type != kAstEnd && fval.type != kAstEnd) {
+                error(pc_, "type mismatch in select");
+                break;
+              }
+            }
+            if (build()) {
+              DCHECK(tval.type != kAstEnd);
+              DCHECK(fval.type != kAstEnd);
+              DCHECK(cond.type != kAstEnd);
+              TFNode* controls[2];
+              builder_->Branch(cond.node, &controls[0], &controls[1]);
+              TFNode* merge = builder_->Merge(2, controls);
+              TFNode* vals[2] = {tval.node, fval.node};
+              TFNode* phi = builder_->Phi(tval.type, 2, vals, merge);
+              Push(tval.type, phi);
+              ssa_env_->control = merge;
+            } else {
+              Push(tval.type, nullptr);
+            }
+            break;
           }
-          break;
-        }
-        case kExprUnreachable: {
-          BUILD0(Unreachable);
-          ssa_env_->Kill(SsaEnv::kControlEnd);
-          Leaf(kAstEnd, nullptr);
-          break;
-        }
-        case kExprI8Const: {
-          ImmI8Operand operand(this, pc_);
-          Leaf(kAstI32, BUILD(Int32Constant, operand.value));
-          len = 1 + operand.length;
-          break;
-        }
-        case kExprI32Const: {
-          ImmI32Operand operand(this, pc_);
-          Leaf(kAstI32, BUILD(Int32Constant, operand.value));
-          len = 1 + operand.length;
-          break;
-        }
-        case kExprI64Const: {
-          ImmI64Operand operand(this, pc_);
-          Leaf(kAstI64, BUILD(Int64Constant, operand.value));
-          len = 1 + operand.length;
-          break;
-        }
-        case kExprF32Const: {
-          ImmF32Operand operand(this, pc_);
-          Leaf(kAstF32, BUILD(Float32Constant, operand.value));
-          len = 1 + operand.length;
-          break;
-        }
-        case kExprF64Const: {
-          ImmF64Operand operand(this, pc_);
-          Leaf(kAstF64, BUILD(Float64Constant, operand.value));
-          len = 1 + operand.length;
-          break;
-        }
-        case kExprGetLocal: {
-          LocalIndexOperand operand(this, pc_);
-          if (Validate(pc_, operand)) {
-            TFNode* val = build() ? ssa_env_->locals[operand.index] : nullptr;
-            Leaf(operand.type, val);
+          case kExprBr: {
+            BreakDepthOperand operand(this, pc_);
+            Value val = {pc_, nullptr, kAstStmt};
+            if (operand.arity) val = Pop();
+            if (Validate(pc_, operand, control_)) {
+              BreakTo(operand.target, val);
+            }
+            len = 1 + operand.length;
+            Push(kAstEnd, nullptr);
+            break;
           }
-          len = 1 + operand.length;
-          break;
-        }
-        case kExprSetLocal: {
-          LocalIndexOperand operand(this, pc_);
-          if (Validate(pc_, operand)) {
-            Shift(operand.type, 1);
+          case kExprBrIf: {
+            BreakDepthOperand operand(this, pc_);
+            Value cond = Pop(operand.arity, kAstI32);
+            Value val = {pc_, nullptr, kAstStmt};
+            if (operand.arity == 1) val = Pop();
+            if (Validate(pc_, operand, control_)) {
+              SsaEnv* fenv = ssa_env_;
+              SsaEnv* tenv = Split(fenv);
+              fenv->SetNotMerged();
+              BUILD(Branch, cond.node, &tenv->control, &fenv->control);
+              ssa_env_ = tenv;
+              BreakTo(operand.target, val);
+              ssa_env_ = fenv;
+            }
+            len = 1 + operand.length;
+            Push(kAstStmt, nullptr);
+            break;
           }
-          len = 1 + operand.length;
-          break;
-        }
-        case kExprLoadGlobal: {
-          GlobalIndexOperand operand(this, pc_);
-          if (Validate(pc_, operand)) {
-            Leaf(operand.type, BUILD(LoadGlobal, operand.index));
+          case kExprBrTable: {
+            BranchTableOperand operand(this, pc_);
+            if (Validate(pc_, operand, control_.size())) {
+              Value key = Pop(operand.arity, kAstI32);
+              Value val = {pc_, nullptr, kAstStmt};
+              if (operand.arity == 1) val = Pop();
+              if (failed()) break;
+
+              SsaEnv* break_env = ssa_env_;
+              if (operand.table_count > 0) {
+                // Build branches to the various blocks based on the table.
+                TFNode* sw = BUILD(Switch, operand.table_count + 1, key.node);
+
+                SsaEnv* copy = Steal(break_env);
+                ssa_env_ = copy;
+                for (uint32_t i = 0; i < operand.table_count + 1; i++) {
+                  uint16_t target = operand.read_entry(this, i);
+                  ssa_env_ = Split(copy);
+                  ssa_env_->control = (i == operand.table_count)
+                                          ? BUILD(IfDefault, sw)
+                                          : BUILD(IfValue, i, sw);
+                  int depth = target;
+                  Control* c = &control_[control_.size() - depth - 1];
+                  MergeInto(c->end_env, &c->node, &c->type, val);
+                }
+              } else {
+                // Only a default target. Do the equivalent of br.
+                uint16_t target = operand.read_entry(this, 0);
+                int depth = target;
+                Control* c = &control_[control_.size() - depth - 1];
+                MergeInto(c->end_env, &c->node, &c->type, val);
+              }
+              // br_table ends the control flow like br.
+              ssa_env_ = break_env;
+              Push(kAstStmt, nullptr);
+            }
+            len = 1 + operand.length;
+            break;
           }
-          len = 1 + operand.length;
-          break;
-        }
-        case kExprStoreGlobal: {
-          GlobalIndexOperand operand(this, pc_);
-          if (Validate(pc_, operand)) {
-            Shift(operand.type, 1);
+          case kExprReturn: {
+            ReturnArityOperand operand(this, pc_);
+            if (operand.arity != sig_->return_count()) {
+              error(pc_, pc_ + 1, "arity mismatch in return");
+            }
+            DoReturn();
+            len = 1 + operand.length;
+            break;
           }
-          len = 1 + operand.length;
-          break;
-        }
-        case kExprI32LoadMem8S:
-        case kExprI32LoadMem8U:
-        case kExprI32LoadMem16S:
-        case kExprI32LoadMem16U:
-        case kExprI32LoadMem:
-          len = DecodeLoadMem(pc_, kAstI32);
-          break;
-        case kExprI64LoadMem8S:
-        case kExprI64LoadMem8U:
-        case kExprI64LoadMem16S:
-        case kExprI64LoadMem16U:
-        case kExprI64LoadMem32S:
-        case kExprI64LoadMem32U:
-        case kExprI64LoadMem:
-          len = DecodeLoadMem(pc_, kAstI64);
-          break;
-        case kExprF32LoadMem:
-          len = DecodeLoadMem(pc_, kAstF32);
-          break;
-        case kExprF64LoadMem:
-          len = DecodeLoadMem(pc_, kAstF64);
-          break;
-        case kExprI32StoreMem8:
-        case kExprI32StoreMem16:
-        case kExprI32StoreMem:
-          len = DecodeStoreMem(pc_, kAstI32);
-          break;
-        case kExprI64StoreMem8:
-        case kExprI64StoreMem16:
-        case kExprI64StoreMem32:
-        case kExprI64StoreMem:
-          len = DecodeStoreMem(pc_, kAstI64);
-          break;
-        case kExprF32StoreMem:
-          len = DecodeStoreMem(pc_, kAstF32);
-          break;
-        case kExprF64StoreMem:
-          len = DecodeStoreMem(pc_, kAstF64);
-          break;
-        case kExprMemorySize:
-          Leaf(kAstI32, BUILD(MemSize, 0));
-          break;
-        case kExprGrowMemory:
-          Shift(kAstI32, 1);
-          break;
-        case kExprCallFunction: {
-          FunctionIndexOperand operand(this, pc_);
-          if (Validate(pc_, operand)) {
-            LocalType type = operand.sig->return_count() == 0
-                                 ? kAstStmt
-                                 : operand.sig->GetReturn();
-            Shift(type, static_cast<int>(operand.sig->parameter_count()));
+          case kExprUnreachable: {
+            Push(kAstEnd, BUILD(Unreachable, position()));
+            ssa_env_->Kill(SsaEnv::kControlEnd);
+            break;
           }
-          len = 1 + operand.length;
-          break;
-        }
-        case kExprCallIndirect: {
-          SignatureIndexOperand operand(this, pc_);
-          if (Validate(pc_, operand)) {
-            LocalType type = operand.sig->return_count() == 0
-                                 ? kAstStmt
-                                 : operand.sig->GetReturn();
-            Shift(type, static_cast<int>(1 + operand.sig->parameter_count()));
+          case kExprI8Const: {
+            ImmI8Operand operand(this, pc_);
+            Push(kAstI32, BUILD(Int32Constant, operand.value));
+            len = 1 + operand.length;
+            break;
           }
-          len = 1 + operand.length;
-          break;
-        }
-        case kExprCallImport: {
-          ImportIndexOperand operand(this, pc_);
-          if (Validate(pc_, operand)) {
-            LocalType type = operand.sig->return_count() == 0
-                                 ? kAstStmt
-                                 : operand.sig->GetReturn();
-            Shift(type, static_cast<int>(operand.sig->parameter_count()));
+          case kExprI32Const: {
+            ImmI32Operand operand(this, pc_);
+            Push(kAstI32, BUILD(Int32Constant, operand.value));
+            len = 1 + operand.length;
+            break;
           }
-          len = 1 + operand.length;
-          break;
+          case kExprI64Const: {
+            ImmI64Operand operand(this, pc_);
+            Push(kAstI64, BUILD(Int64Constant, operand.value));
+            len = 1 + operand.length;
+            break;
+          }
+          case kExprF32Const: {
+            ImmF32Operand operand(this, pc_);
+            Push(kAstF32, BUILD(Float32Constant, operand.value));
+            len = 1 + operand.length;
+            break;
+          }
+          case kExprF64Const: {
+            ImmF64Operand operand(this, pc_);
+            Push(kAstF64, BUILD(Float64Constant, operand.value));
+            len = 1 + operand.length;
+            break;
+          }
+          case kExprGetLocal: {
+            LocalIndexOperand operand(this, pc_);
+            if (Validate(pc_, operand)) {
+              if (build()) {
+                Push(operand.type, ssa_env_->locals[operand.index]);
+              } else {
+                Push(operand.type, nullptr);
+              }
+            }
+            len = 1 + operand.length;
+            break;
+          }
+          case kExprSetLocal: {
+            LocalIndexOperand operand(this, pc_);
+            if (Validate(pc_, operand)) {
+              Value val = Pop(0, local_type_vec_[operand.index]);
+              if (ssa_env_->locals) ssa_env_->locals[operand.index] = val.node;
+              Push(val.type, val.node);
+            }
+            len = 1 + operand.length;
+            break;
+          }
+          case kExprLoadGlobal: {
+            GlobalIndexOperand operand(this, pc_);
+            if (Validate(pc_, operand)) {
+              Push(operand.type, BUILD(LoadGlobal, operand.index));
+            }
+            len = 1 + operand.length;
+            break;
+          }
+          case kExprStoreGlobal: {
+            GlobalIndexOperand operand(this, pc_);
+            if (Validate(pc_, operand)) {
+              Value val = Pop(0, operand.type);
+              BUILD(StoreGlobal, operand.index, val.node);
+              Push(val.type, val.node);
+            }
+            len = 1 + operand.length;
+            break;
+          }
+          case kExprI32LoadMem8S:
+            len = DecodeLoadMem(kAstI32, MachineType::Int8());
+            break;
+          case kExprI32LoadMem8U:
+            len = DecodeLoadMem(kAstI32, MachineType::Uint8());
+            break;
+          case kExprI32LoadMem16S:
+            len = DecodeLoadMem(kAstI32, MachineType::Int16());
+            break;
+          case kExprI32LoadMem16U:
+            len = DecodeLoadMem(kAstI32, MachineType::Uint16());
+            break;
+          case kExprI32LoadMem:
+            len = DecodeLoadMem(kAstI32, MachineType::Int32());
+            break;
+
+          case kExprI64LoadMem8S:
+            len = DecodeLoadMem(kAstI64, MachineType::Int8());
+            break;
+          case kExprI64LoadMem8U:
+            len = DecodeLoadMem(kAstI64, MachineType::Uint8());
+            break;
+          case kExprI64LoadMem16S:
+            len = DecodeLoadMem(kAstI64, MachineType::Int16());
+            break;
+          case kExprI64LoadMem16U:
+            len = DecodeLoadMem(kAstI64, MachineType::Uint16());
+            break;
+          case kExprI64LoadMem32S:
+            len = DecodeLoadMem(kAstI64, MachineType::Int32());
+            break;
+          case kExprI64LoadMem32U:
+            len = DecodeLoadMem(kAstI64, MachineType::Uint32());
+            break;
+          case kExprI64LoadMem:
+            len = DecodeLoadMem(kAstI64, MachineType::Int64());
+            break;
+          case kExprF32LoadMem:
+            len = DecodeLoadMem(kAstF32, MachineType::Float32());
+            break;
+          case kExprF64LoadMem:
+            len = DecodeLoadMem(kAstF64, MachineType::Float64());
+            break;
+          case kExprI32StoreMem8:
+            len = DecodeStoreMem(kAstI32, MachineType::Int8());
+            break;
+          case kExprI32StoreMem16:
+            len = DecodeStoreMem(kAstI32, MachineType::Int16());
+            break;
+          case kExprI32StoreMem:
+            len = DecodeStoreMem(kAstI32, MachineType::Int32());
+            break;
+          case kExprI64StoreMem8:
+            len = DecodeStoreMem(kAstI64, MachineType::Int8());
+            break;
+          case kExprI64StoreMem16:
+            len = DecodeStoreMem(kAstI64, MachineType::Int16());
+            break;
+          case kExprI64StoreMem32:
+            len = DecodeStoreMem(kAstI64, MachineType::Int32());
+            break;
+          case kExprI64StoreMem:
+            len = DecodeStoreMem(kAstI64, MachineType::Int64());
+            break;
+          case kExprF32StoreMem:
+            len = DecodeStoreMem(kAstF32, MachineType::Float32());
+            break;
+          case kExprF64StoreMem:
+            len = DecodeStoreMem(kAstF64, MachineType::Float64());
+            break;
+
+          case kExprMemorySize:
+            Push(kAstI32, BUILD(MemSize, 0));
+            break;
+          case kExprGrowMemory: {
+            Value val = Pop(0, kAstI32);
+            USE(val);  // TODO(titzer): build node for grow memory
+            Push(kAstI32, BUILD(Int32Constant, 0));
+            break;
+          }
+          case kExprCallFunction: {
+            CallFunctionOperand operand(this, pc_);
+            if (Validate(pc_, operand)) {
+              TFNode** buffer = PopArgs(operand.sig);
+              TFNode* call =
+                  BUILD(CallDirect, operand.index, buffer, position());
+              Push(GetReturnType(operand.sig), call);
+            }
+            len = 1 + operand.length;
+            break;
+          }
+          case kExprCallIndirect: {
+            CallIndirectOperand operand(this, pc_);
+            if (Validate(pc_, operand)) {
+              TFNode** buffer = PopArgs(operand.sig);
+              Value index = Pop(0, kAstI32);
+              if (buffer) buffer[0] = index.node;
+              TFNode* call =
+                  BUILD(CallIndirect, operand.index, buffer, position());
+              Push(GetReturnType(operand.sig), call);
+            }
+            len = 1 + operand.length;
+            break;
+          }
+          case kExprCallImport: {
+            CallImportOperand operand(this, pc_);
+            if (Validate(pc_, operand)) {
+              TFNode** buffer = PopArgs(operand.sig);
+              TFNode* call =
+                  BUILD(CallImport, operand.index, buffer, position());
+              Push(GetReturnType(operand.sig), call);
+            }
+            len = 1 + operand.length;
+            break;
+          }
+          default:
+            error("Invalid opcode");
+            return;
         }
-        case kExprDeclLocals:
-        default:
-          error("Invalid opcode");
-          return;
+      }  // end complex bytecode
+
+#if DEBUG
+      if (FLAG_trace_wasm_decoder) {
+        for (size_t i = 0; i < stack_.size(); i++) {
+          Value& val = stack_[i];
+          WasmOpcode opcode = static_cast<WasmOpcode>(*val.pc);
+          PrintF(" %c@%d:%s", WasmOpcodes::ShortNameOf(val.type),
+                 static_cast<int>(val.pc - start_),
+                 WasmOpcodes::ShortOpcodeName(opcode));
+          switch (opcode) {
+            case kExprI32Const: {
+              ImmI32Operand operand(this, val.pc);
+              PrintF("[%d]", operand.value);
+              break;
+            }
+            case kExprGetLocal: {
+              LocalIndexOperand operand(this, val.pc);
+              PrintF("[%u]", operand.index);
+              break;
+            }
+            case kExprSetLocal: {
+              LocalIndexOperand operand(this, val.pc);
+              PrintF("[%u]", operand.index);
+              break;
+            }
+            default:
+              break;
+          }
+        }
+        PrintF("\n");
       }
+#endif
       pc_ += len;
       if (pc_ >= limit_) {
         // End of code reached or exceeded.
-        if (pc_ > limit_ && ok()) {
-          error("Beyond end of code");
-        }
+        if (pc_ > limit_ && ok()) error("Beyond end of code");
         return;
       }
+    }  // end decode loop
+  }    // end DecodeFunctionBody()
+
+  TFNode** PopArgs(FunctionSig* sig) {
+    if (build()) {
+      int count = static_cast<int>(sig->parameter_count());
+      TFNode** buffer = builder_->Buffer(count + 1);
+      buffer[0] = nullptr;  // reserved for code object or function index.
+      for (int i = count - 1; i >= 0; i--) {
+        buffer[i + 1] = Pop(i, sig->GetParam(i)).node;
+      }
+      return buffer;
+    } else {
+      int count = static_cast<int>(sig->parameter_count());
+      for (int i = count - 1; i >= 0; i--) {
+        Pop(i, sig->GetParam(i));
+      }
+      return nullptr;
     }
   }
 
-  void PushBlock(SsaEnv* ssa_env) {
-    blocks_.push_back({ssa_env, static_cast<int>(stack_.size() - 1)});
+  LocalType GetReturnType(FunctionSig* sig) {
+    return sig->return_count() == 0 ? kAstStmt : sig->GetReturn();
   }
 
-  int DecodeLoadMem(const byte* pc, LocalType type) {
-    MemoryAccessOperand operand(this, pc);
-    Shift(type, 1);
+  void PushBlock(SsaEnv* end_env) {
+    int stack_depth = static_cast<int>(stack_.size());
+    control_.push_back(
+        {pc_, stack_depth, end_env, nullptr, nullptr, kAstEnd, false});
+  }
+
+  void PushLoop(SsaEnv* end_env) {
+    int stack_depth = static_cast<int>(stack_.size());
+    control_.push_back(
+        {pc_, stack_depth, end_env, nullptr, nullptr, kAstEnd, true});
+  }
+
+  void PushIf(SsaEnv* end_env, SsaEnv* false_env) {
+    int stack_depth = static_cast<int>(stack_.size());
+    control_.push_back(
+        {pc_, stack_depth, end_env, false_env, nullptr, kAstStmt, false});
+  }
+
+  int DecodeLoadMem(LocalType type, MachineType mem_type) {
+    MemoryAccessOperand operand(this, pc_);
+    Value index = Pop(0, kAstI32);
+    TFNode* node =
+        BUILD(LoadMem, type, mem_type, index.node, operand.offset, position());
+    Push(type, node);
     return 1 + operand.length;
   }
 
-  int DecodeStoreMem(const byte* pc, LocalType type) {
-    MemoryAccessOperand operand(this, pc);
-    Shift(type, 2);
+  int DecodeStoreMem(LocalType type, MachineType mem_type) {
+    MemoryAccessOperand operand(this, pc_);
+    Value val = Pop(1, type);
+    Value index = Pop(0, kAstI32);
+    BUILD(StoreMem, mem_type, index.node, operand.offset, val.node, position());
+    Push(type, val.node);
     return 1 + operand.length;
   }
 
-  void AddImplicitReturnAtEnd() {
-    int retcount = static_cast<int>(sig_->return_count());
-    if (retcount == 0) {
-      BUILD0(ReturnVoid);
-      return;
+  void DoReturn() {
+    int count = static_cast<int>(sig_->return_count());
+    TFNode** buffer = nullptr;
+    if (build()) buffer = builder_->Buffer(count);
+
+    // Pop return values off the stack in reverse order.
+    for (int i = count - 1; i >= 0; i--) {
+      Value val = Pop(i, sig_->GetReturn(i));
+      if (buffer) buffer[i] = val.node;
     }
 
-    if (static_cast<int>(trees_.size()) < retcount) {
-      error(limit_, nullptr,
-            "ImplicitReturn expects %d arguments, only %d remain", retcount,
-            static_cast<int>(trees_.size()));
-      return;
-    }
+    Push(kAstEnd, BUILD(Return, count, buffer));
+    ssa_env_->Kill(SsaEnv::kControlEnd);
+  }
 
-    TRACE("wasm-decode implicit return of %d args\n", retcount);
+  void Push(LocalType type, TFNode* node) {
+    stack_.push_back({pc_, node, type});
+  }
 
-    TFNode** buffer = BUILD(Buffer, retcount);
-    for (int index = 0; index < retcount; index++) {
-      Tree* tree = trees_[trees_.size() - 1 - index];
-      if (buffer) buffer[index] = tree->node;
-      LocalType expected = sig_->GetReturn(index);
-      if (tree->type != expected) {
-        error(limit_, tree->pc,
-              "ImplicitReturn[%d] expected type %s, found %s of type %s", index,
-              WasmOpcodes::TypeName(expected),
-              WasmOpcodes::OpcodeName(tree->opcode()),
-              WasmOpcodes::TypeName(tree->type));
-        return;
+  const char* SafeOpcodeNameAt(const byte* pc) {
+    if (pc >= end_) return "<end>";
+    return WasmOpcodes::ShortOpcodeName(static_cast<WasmOpcode>(*pc));
+  }
+
+  Value Pop(int index, LocalType expected) {
+    Value val = Pop();
+    if (val.type != expected) {
+      if (val.type != kAstEnd) {
+        error(pc_, val.pc, "%s[%d] expected type %s, found %s of type %s",
+              SafeOpcodeNameAt(pc_), index, WasmOpcodes::TypeName(expected),
+              SafeOpcodeNameAt(val.pc), WasmOpcodes::TypeName(val.type));
       }
     }
+    return val;
+  }
 
-    BUILD(Return, retcount, buffer);
+  Value Pop() {
+    size_t limit = control_.empty() ? 0 : control_.back().stack_depth;
+    if (stack_.size() <= limit) {
+      Value val = {pc_, nullptr, kAstStmt};
+      error(pc_, pc_, "%s found empty stack", SafeOpcodeNameAt(pc_));
+      return val;
+    }
+    Value val = stack_.back();
+    stack_.pop_back();
+    return val;
+  }
+
+  Value PopUpTo(int stack_depth) {
+    if (stack_depth == stack_.size()) {
+      Value val = {pc_, nullptr, kAstStmt};
+      return val;
+    } else {
+      DCHECK_LE(stack_depth, static_cast<int>(stack_.size()));
+      Value val = Pop();
+      stack_.resize(stack_depth);
+      return val;
+    }
   }
 
   int baserel(const byte* ptr) {
@@ -937,437 +1185,65 @@
 
   int startrel(const byte* ptr) { return static_cast<int>(ptr - start_); }
 
-  void Reduce(Production* p) {
-    WasmOpcode opcode = p->opcode();
-    TRACE("-----reduce module+%-6d %s func+%d: 0x%02x %s\n", baserel(p->pc()),
-          indentation(), startrel(p->pc()), opcode,
-          WasmOpcodes::OpcodeName(opcode));
-    FunctionSig* sig = WasmOpcodes::Signature(opcode);
-    if (sig) {
-      // A simple expression with a fixed signature.
-      TypeCheckLast(p, sig->GetParam(p->index - 1));
-      if (p->done() && build()) {
-        if (sig->parameter_count() == 2) {
-          p->tree->node = builder_->Binop(opcode, p->tree->children[0]->node,
-                                          p->tree->children[1]->node);
-        } else if (sig->parameter_count() == 1) {
-          p->tree->node = builder_->Unop(opcode, p->tree->children[0]->node);
-        } else {
-          UNREACHABLE();
-        }
-      }
-      return;
-    }
-
-    switch (opcode) {
-      case kExprBlock: {
-        if (p->done()) {
-          Block* last = &blocks_.back();
-          DCHECK_EQ(stack_.size() - 1, last->stack_depth);
-          // fallthrough with the last expression.
-          ReduceBreakToExprBlock(p, last);
-          SetEnv("block:end", last->ssa_env);
-          blocks_.pop_back();
-        }
-        break;
-      }
-      case kExprLoop: {
-        if (p->done()) {
-          // Pop the continue environment.
-          blocks_.pop_back();
-          // Get the break environment.
-          Block* last = &blocks_.back();
-          DCHECK_EQ(stack_.size() - 1, last->stack_depth);
-          // fallthrough with the last expression.
-          ReduceBreakToExprBlock(p, last);
-          SetEnv("loop:end", last->ssa_env);
-          blocks_.pop_back();
-        }
-        break;
-      }
-      case kExprIf: {
-        if (p->index == 1) {
-          // Condition done. Split environment for true branch.
-          TypeCheckLast(p, kAstI32);
-          SsaEnv* false_env = ssa_env_;
-          SsaEnv* true_env = Split(ssa_env_);
-          ifs_.push_back({nullptr, false_env, nullptr});
-          BUILD(Branch, p->last()->node, &true_env->control,
-                &false_env->control);
-          SetEnv("if:true", true_env);
-        } else if (p->index == 2) {
-          // True block done. Merge true and false environments.
-          IfEnv* env = &ifs_.back();
-          SsaEnv* merge = env->merge_env;
-          if (merge->go()) {
-            merge->state = SsaEnv::kReached;
-            Goto(ssa_env_, merge);
-          }
-          SetEnv("if:merge", merge);
-          ifs_.pop_back();
-        }
-        break;
-      }
-      case kExprIfElse: {
-        if (p->index == 1) {
-          // Condition done. Split environment for true and false branches.
-          TypeCheckLast(p, kAstI32);
-          SsaEnv* merge_env = ssa_env_;
-          TFNode* if_true = nullptr;
-          TFNode* if_false = nullptr;
-          BUILD(Branch, p->last()->node, &if_true, &if_false);
-          SsaEnv* false_env = Split(ssa_env_);
-          SsaEnv* true_env = Steal(ssa_env_);
-          false_env->control = if_false;
-          true_env->control = if_true;
-          ifs_.push_back({false_env, merge_env, nullptr});
-          SetEnv("if_else:true", true_env);
-        } else if (p->index == 2) {
-          // True expr done.
-          IfEnv* env = &ifs_.back();
-          MergeIntoProduction(p, env->merge_env, p->last());
-          // Switch to environment for false branch.
-          SsaEnv* false_env = ifs_.back().false_env;
-          SetEnv("if_else:false", false_env);
-        } else if (p->index == 3) {
-          // False expr done.
-          IfEnv* env = &ifs_.back();
-          MergeIntoProduction(p, env->merge_env, p->last());
-          SetEnv("if_else:merge", env->merge_env);
-          ifs_.pop_back();
-        }
-        break;
-      }
-      case kExprSelect: {
-        if (p->index == 1) {
-          // True expression done.
-          p->tree->type = p->last()->type;
-          if (p->tree->type == kAstStmt) {
-            error(p->pc(), p->tree->children[1]->pc,
-                  "select operand should be expression");
-          }
-        } else if (p->index == 2) {
-          // False expression done.
-          TypeCheckLast(p, p->tree->type);
-        } else {
-          // Condition done.
-          DCHECK(p->done());
-          TypeCheckLast(p, kAstI32);
-          if (build()) {
-            TFNode* controls[2];
-            builder_->Branch(p->tree->children[2]->node, &controls[0],
-                             &controls[1]);
-            TFNode* merge = builder_->Merge(2, controls);
-            TFNode* vals[2] = {p->tree->children[0]->node,
-                               p->tree->children[1]->node};
-            TFNode* phi = builder_->Phi(p->tree->type, 2, vals, merge);
-            p->tree->node = phi;
-            ssa_env_->control = merge;
-          }
-        }
-        break;
-      }
-      case kExprBr: {
-        BreakDepthOperand operand(this, p->pc());
-        CHECK(Validate(p->pc(), operand, blocks_));
-        ReduceBreakToExprBlock(p, operand.target);
-        break;
-      }
-      case kExprBrIf: {
-        if (p->done()) {
-          TypeCheckLast(p, kAstI32);
-          BreakDepthOperand operand(this, p->pc());
-          CHECK(Validate(p->pc(), operand, blocks_));
-          SsaEnv* fenv = ssa_env_;
-          SsaEnv* tenv = Split(fenv);
-          BUILD(Branch, p->tree->children[1]->node, &tenv->control,
-                &fenv->control);
-          ssa_env_ = tenv;
-          ReduceBreakToExprBlock(p, operand.target, p->tree->children[0]);
-          ssa_env_ = fenv;
-        }
-        break;
-      }
-      case kExprBrTable: {
-        if (p->index == 1) {
-          // Switch key finished.
-          TypeCheckLast(p, kAstI32);
-          if (failed()) break;
-
-          BranchTableOperand operand(this, p->pc());
-          DCHECK(Validate(p->pc(), operand, blocks_.size()));
-
-          // Build a switch only if it has more than just a default target.
-          bool build_switch = operand.table_count > 0;
-          TFNode* sw = nullptr;
-          if (build_switch) {
-            sw = BUILD(Switch, operand.table_count + 1, p->last()->node);
-          }
-
-          // Process the targets of the break table.
-          SsaEnv* prev = ssa_env_;
-          SsaEnv* copy = Steal(prev);
-          for (uint32_t i = 0; i < operand.table_count + 1; i++) {
-            uint32_t target = operand.read_entry(this, i);
-            SsaEnv* env = copy;
-            if (build_switch) {
-              ssa_env_ = env = Split(env);
-              env->control = i == operand.table_count ? BUILD(IfDefault, sw)
-                                                      : BUILD(IfValue, i, sw);
-            }
-            SsaEnv* tenv = blocks_[blocks_.size() - target - 1].ssa_env;
-            Goto(env, tenv);
-          }
-          ssa_env_ = prev;
-        }
-        break;
-      }
-      case kExprReturn: {
-        TypeCheckLast(p, sig_->GetReturn(p->index - 1));
-        if (p->done()) {
-          if (build()) {
-            int count = p->tree->count;
-            TFNode** buffer = builder_->Buffer(count);
-            for (int i = 0; i < count; i++) {
-              buffer[i] = p->tree->children[i]->node;
-            }
-            BUILD(Return, count, buffer);
-          }
-          ssa_env_->Kill(SsaEnv::kControlEnd);
-        }
-        break;
-      }
-      case kExprSetLocal: {
-        LocalIndexOperand operand(this, p->pc());
-        CHECK(Validate(p->pc(), operand));
-        Tree* val = p->last();
-        if (operand.type == val->type) {
-          if (build()) ssa_env_->locals[operand.index] = val->node;
-          p->tree->node = val->node;
-        } else {
-          error(p->pc(), val->pc, "Typecheck failed in SetLocal");
-        }
-        break;
-      }
-      case kExprStoreGlobal: {
-        GlobalIndexOperand operand(this, p->pc());
-        CHECK(Validate(p->pc(), operand));
-        Tree* val = p->last();
-        if (operand.type == val->type) {
-          BUILD(StoreGlobal, operand.index, val->node);
-          p->tree->node = val->node;
-        } else {
-          error(p->pc(), val->pc, "Typecheck failed in StoreGlobal");
-        }
-        break;
-      }
-
-      case kExprI32LoadMem8S:
-        return ReduceLoadMem(p, kAstI32, MachineType::Int8());
-      case kExprI32LoadMem8U:
-        return ReduceLoadMem(p, kAstI32, MachineType::Uint8());
-      case kExprI32LoadMem16S:
-        return ReduceLoadMem(p, kAstI32, MachineType::Int16());
-      case kExprI32LoadMem16U:
-        return ReduceLoadMem(p, kAstI32, MachineType::Uint16());
-      case kExprI32LoadMem:
-        return ReduceLoadMem(p, kAstI32, MachineType::Int32());
-
-      case kExprI64LoadMem8S:
-        return ReduceLoadMem(p, kAstI64, MachineType::Int8());
-      case kExprI64LoadMem8U:
-        return ReduceLoadMem(p, kAstI64, MachineType::Uint8());
-      case kExprI64LoadMem16S:
-        return ReduceLoadMem(p, kAstI64, MachineType::Int16());
-      case kExprI64LoadMem16U:
-        return ReduceLoadMem(p, kAstI64, MachineType::Uint16());
-      case kExprI64LoadMem32S:
-        return ReduceLoadMem(p, kAstI64, MachineType::Int32());
-      case kExprI64LoadMem32U:
-        return ReduceLoadMem(p, kAstI64, MachineType::Uint32());
-      case kExprI64LoadMem:
-        return ReduceLoadMem(p, kAstI64, MachineType::Int64());
-
-      case kExprF32LoadMem:
-        return ReduceLoadMem(p, kAstF32, MachineType::Float32());
-
-      case kExprF64LoadMem:
-        return ReduceLoadMem(p, kAstF64, MachineType::Float64());
-
-      case kExprI32StoreMem8:
-        return ReduceStoreMem(p, kAstI32, MachineType::Int8());
-      case kExprI32StoreMem16:
-        return ReduceStoreMem(p, kAstI32, MachineType::Int16());
-      case kExprI32StoreMem:
-        return ReduceStoreMem(p, kAstI32, MachineType::Int32());
-
-      case kExprI64StoreMem8:
-        return ReduceStoreMem(p, kAstI64, MachineType::Int8());
-      case kExprI64StoreMem16:
-        return ReduceStoreMem(p, kAstI64, MachineType::Int16());
-      case kExprI64StoreMem32:
-        return ReduceStoreMem(p, kAstI64, MachineType::Int32());
-      case kExprI64StoreMem:
-        return ReduceStoreMem(p, kAstI64, MachineType::Int64());
-
-      case kExprF32StoreMem:
-        return ReduceStoreMem(p, kAstF32, MachineType::Float32());
-
-      case kExprF64StoreMem:
-        return ReduceStoreMem(p, kAstF64, MachineType::Float64());
-
-      case kExprGrowMemory:
-        TypeCheckLast(p, kAstI32);
-        // TODO(titzer): build node for GrowMemory
-        p->tree->node = BUILD(Int32Constant, 0);
-        return;
-
-      case kExprCallFunction: {
-        FunctionIndexOperand operand(this, p->pc());
-        CHECK(Validate(p->pc(), operand));
-        if (p->index > 0) {
-          TypeCheckLast(p, operand.sig->GetParam(p->index - 1));
-        }
-        if (p->done() && build()) {
-          uint32_t count = p->tree->count + 1;
-          TFNode** buffer = builder_->Buffer(count);
-          buffer[0] = nullptr;  // reserved for code object.
-          for (uint32_t i = 1; i < count; i++) {
-            buffer[i] = p->tree->children[i - 1]->node;
-          }
-          p->tree->node = builder_->CallDirect(operand.index, buffer);
-        }
-        break;
-      }
-      case kExprCallIndirect: {
-        SignatureIndexOperand operand(this, p->pc());
-        CHECK(Validate(p->pc(), operand));
-        if (p->index == 1) {
-          TypeCheckLast(p, kAstI32);
-        } else {
-          TypeCheckLast(p, operand.sig->GetParam(p->index - 2));
-        }
-        if (p->done() && build()) {
-          uint32_t count = p->tree->count;
-          TFNode** buffer = builder_->Buffer(count);
-          for (uint32_t i = 0; i < count; i++) {
-            buffer[i] = p->tree->children[i]->node;
-          }
-          p->tree->node = builder_->CallIndirect(operand.index, buffer);
-        }
-        break;
-      }
-      case kExprCallImport: {
-        ImportIndexOperand operand(this, p->pc());
-        CHECK(Validate(p->pc(), operand));
-        if (p->index > 0) {
-          TypeCheckLast(p, operand.sig->GetParam(p->index - 1));
-        }
-        if (p->done() && build()) {
-          uint32_t count = p->tree->count + 1;
-          TFNode** buffer = builder_->Buffer(count);
-          buffer[0] = nullptr;  // reserved for code object.
-          for (uint32_t i = 1; i < count; i++) {
-            buffer[i] = p->tree->children[i - 1]->node;
-          }
-          p->tree->node = builder_->CallImport(operand.index, buffer);
-        }
-        break;
-      }
-      default:
-        break;
-    }
-  }
-
-  void ReduceBreakToExprBlock(Production* p, Block* block) {
-    ReduceBreakToExprBlock(p, block, p->tree->count > 0 ? p->last() : nullptr);
-  }
-
-  void ReduceBreakToExprBlock(Production* p, Block* block, Tree* val) {
-    if (block->stack_depth < 0) {
+  void BreakTo(Control* block, Value& val) {
+    if (block->is_loop) {
       // This is the inner loop block, which does not have a value.
-      Goto(ssa_env_, block->ssa_env);
+      Goto(ssa_env_, block->end_env);
     } else {
       // Merge the value into the production for the block.
-      Production* bp = &stack_[block->stack_depth];
-      MergeIntoProduction(bp, block->ssa_env, val);
+      MergeInto(block->end_env, &block->node, &block->type, val);
     }
   }
 
-  void MergeIntoProduction(Production* p, SsaEnv* target, Tree* expr) {
+  void MergeInto(SsaEnv* target, TFNode** node, LocalType* type, Value& val) {
     if (!ssa_env_->go()) return;
+    DCHECK_NE(kAstEnd, val.type);
 
     bool first = target->state == SsaEnv::kUnreachable;
     Goto(ssa_env_, target);
-    if (expr == nullptr || expr->type == kAstEnd) return;
 
     if (first) {
       // first merge to this environment; set the type and the node.
-      p->tree->type = expr->type;
-      p->tree->node = expr->node;
-    } else {
+      *type = val.type;
+      *node = val.node;
+    } else if (val.type == *type && val.type != kAstStmt) {
       // merge with the existing value for this block.
-      LocalType type = p->tree->type;
-      if (expr->type != type) {
-        type = kAstStmt;
-        p->tree->type = kAstStmt;
-        p->tree->node = nullptr;
-      } else if (type != kAstStmt) {
-        p->tree->node = CreateOrMergeIntoPhi(type, target->control,
-                                             p->tree->node, expr->node);
-      }
-    }
-  }
-
-  void ReduceLoadMem(Production* p, LocalType type, MachineType mem_type) {
-    DCHECK_EQ(1, p->index);
-    TypeCheckLast(p, kAstI32);  // index
-    if (build()) {
-      MemoryAccessOperand operand(this, p->pc());
-      p->tree->node =
-          builder_->LoadMem(type, mem_type, p->last()->node, operand.offset);
-    }
-  }
-
-  void ReduceStoreMem(Production* p, LocalType type, MachineType mem_type) {
-    if (p->index == 1) {
-      TypeCheckLast(p, kAstI32);  // index
+      *node = CreateOrMergeIntoPhi(*type, target->control, *node, val.node);
     } else {
-      DCHECK_EQ(2, p->index);
-      TypeCheckLast(p, type);
-      if (build()) {
-        MemoryAccessOperand operand(this, p->pc());
-        TFNode* val = p->tree->children[1]->node;
-        builder_->StoreMem(mem_type, p->tree->children[0]->node, operand.offset,
-                           val);
-        p->tree->node = val;
-      }
-    }
-  }
-
-  void TypeCheckLast(Production* p, LocalType expected) {
-    LocalType result = p->last()->type;
-    if (result == expected) return;
-    if (result == kAstEnd) return;
-    if (expected != kAstStmt) {
-      error(p->pc(), p->last()->pc,
-            "%s[%d] expected type %s, found %s of type %s",
-            WasmOpcodes::OpcodeName(p->opcode()), p->index - 1,
-            WasmOpcodes::TypeName(expected),
-            WasmOpcodes::OpcodeName(p->last()->opcode()),
-            WasmOpcodes::TypeName(p->last()->type));
+      // types don't match, or block is already a stmt.
+      *type = kAstStmt;
+      *node = nullptr;
     }
   }
 
   void SetEnv(const char* reason, SsaEnv* env) {
 #if DEBUG
-    TRACE("  env = %p, block depth = %d, reason = %s", static_cast<void*>(env),
-          static_cast<int>(blocks_.size()), reason);
-    if (FLAG_trace_wasm_decoder && env && env->control) {
-      TRACE(", control = ");
-      compiler::WasmGraphBuilder::PrintDebugName(env->control);
+    if (FLAG_trace_wasm_decoder) {
+      char state = 'X';
+      if (env) {
+        switch (env->state) {
+          case SsaEnv::kReached:
+            state = 'R';
+            break;
+          case SsaEnv::kUnreachable:
+            state = 'U';
+            break;
+          case SsaEnv::kMerged:
+            state = 'M';
+            break;
+          case SsaEnv::kControlEnd:
+            state = 'E';
+            break;
+        }
+      }
+      PrintF("  env = %p, state = %c, reason = %s", static_cast<void*>(env),
+             state, reason);
+      if (env && env->control) {
+        PrintF(", control = ");
+        compiler::WasmGraphBuilder::PrintDebugName(env->control);
+      }
+      PrintF("\n");
     }
-    TRACE("\n");
 #endif
     ssa_env_ = env;
     if (builder_) {
@@ -1417,7 +1293,7 @@
         builder_->AppendToMerge(merge, from->control);
         // Merge effects.
         if (builder_->IsPhiWithMerge(to->effect, merge)) {
-          builder_->AppendToPhi(merge, to->effect, from->effect);
+          builder_->AppendToPhi(to->effect, from->effect);
         } else if (to->effect != from->effect) {
           uint32_t count = builder_->InputCount(merge);
           TFNode** effects = builder_->Buffer(count);
@@ -1432,7 +1308,7 @@
           TFNode* tnode = to->locals[i];
           TFNode* fnode = from->locals[i];
           if (builder_->IsPhiWithMerge(tnode, merge)) {
-            builder_->AppendToPhi(merge, tnode, fnode);
+            builder_->AppendToPhi(tnode, fnode);
           } else if (tnode != fnode) {
             uint32_t count = builder_->InputCount(merge);
             TFNode** vals = builder_->Buffer(count);
@@ -1455,7 +1331,7 @@
   TFNode* CreateOrMergeIntoPhi(LocalType type, TFNode* merge, TFNode* tnode,
                                TFNode* fnode) {
     if (builder_->IsPhiWithMerge(tnode, merge)) {
-      builder_->AppendToPhi(merge, tnode, fnode);
+      builder_->AppendToPhi(tnode, fnode);
     } else if (tnode != fnode) {
       uint32_t count = builder_->InputCount(merge);
       TFNode** vals = builder_->Buffer(count);
@@ -1501,8 +1377,6 @@
     size_t size = sizeof(TFNode*) * EnvironmentCount();
     result->control = from->control;
     result->effect = from->effect;
-    result->state = from->state == SsaEnv::kUnreachable ? SsaEnv::kUnreachable
-                                                        : SsaEnv::kReached;
 
     if (from->go()) {
       result->state = SsaEnv::kReached;
@@ -1549,89 +1423,54 @@
   virtual void onFirstError() {
     limit_ = start_;     // Terminate decoding loop.
     builder_ = nullptr;  // Don't build any more nodes.
-#if DEBUG
-    PrintStackForDebugging();
-#endif
+    TRACE(" !%s\n", error_msg_.get());
   }
-
-#if DEBUG
-  void PrintStackForDebugging() { PrintProduction(0); }
-
-  void PrintProduction(size_t depth) {
-    if (depth >= stack_.size()) return;
-    Production* p = &stack_[depth];
-    for (size_t d = 0; d < depth; d++) PrintF("  ");
-
-    PrintF("@%d %s [%d]\n", static_cast<int>(p->tree->pc - start_),
-           WasmOpcodes::OpcodeName(p->opcode()), p->tree->count);
-    for (int i = 0; i < p->index; i++) {
-      Tree* child = p->tree->children[i];
-      for (size_t d = 0; d <= depth; d++) PrintF("  ");
-      PrintF("@%d %s [%d]", static_cast<int>(child->pc - start_),
-             WasmOpcodes::OpcodeName(child->opcode()), child->count);
-      if (child->node) {
-        PrintF(" => TF");
-        compiler::WasmGraphBuilder::PrintDebugName(child->node);
-      }
-      PrintF("\n");
-    }
-    PrintProduction(depth + 1);
-  }
-#endif
-
   BitVector* AnalyzeLoopAssignment(const byte* pc) {
     if (pc >= limit_) return nullptr;
     if (*pc != kExprLoop) return nullptr;
 
     BitVector* assigned =
-        new (zone_) BitVector(static_cast<int>(total_locals_), zone_);
-    // Keep a stack to model the nesting of expressions.
-    std::vector<int> arity_stack;
-    arity_stack.push_back(OpcodeArity(pc));
-    pc += OpcodeLength(pc);
-
+        new (zone_) BitVector(static_cast<int>(local_type_vec_.size()), zone_);
+    int depth = 0;
     // Iteratively process all AST nodes nested inside the loop.
     while (pc < limit_) {
       WasmOpcode opcode = static_cast<WasmOpcode>(*pc);
-      int arity = 0;
       int length = 1;
-      int assigned_index = -1;
-      if (opcode == kExprSetLocal) {
-        LocalIndexOperand operand(this, pc);
-        if (assigned->length() > 0 &&
-            static_cast<int>(operand.index) < assigned->length()) {
-          // Unverified code might have an out-of-bounds index.
-          // Ignore out-of-bounds indices, as the main verification will fail.
-          assigned->Add(operand.index);
-          assigned_index = operand.index;
+      switch (opcode) {
+        case kExprLoop:
+        case kExprIf:
+        case kExprBlock:
+          depth++;
+          DCHECK_EQ(1, OpcodeLength(pc));
+          break;
+        case kExprSetLocal: {
+          LocalIndexOperand operand(this, pc);
+          if (assigned->length() > 0 &&
+              static_cast<int>(operand.index) < assigned->length()) {
+            // Unverified code might have an out-of-bounds index.
+            assigned->Add(operand.index);
+          }
+          length = 1 + operand.length;
+          break;
         }
-        arity = 1;
-        length = 1 + operand.length;
-      } else {
-        arity = OpcodeArity(pc);
-        length = OpcodeLength(pc);
+        case kExprEnd:
+          depth--;
+          break;
+        default:
+          length = OpcodeLength(pc);
+          break;
       }
-
-      TRACE("loop-assign module+%-6d %s func+%d: 0x%02x %s", baserel(pc),
-            indentation(), startrel(pc), opcode,
-            WasmOpcodes::OpcodeName(opcode));
-
-      if (assigned_index >= 0) {
-        TRACE(" (assigned local #%d)\n", assigned_index);
-      } else {
-        TRACE("\n");
-      }
-
+      if (depth <= 0) break;
       pc += length;
-      arity_stack.push_back(arity);
-      while (arity_stack.back() == 0) {
-        arity_stack.pop_back();
-        if (arity_stack.empty()) return assigned;  // reached end of loop
-        arity_stack.back()--;
-      }
     }
     return assigned;
   }
+
+  inline wasm::WasmCodePosition position() {
+    int offset = static_cast<int>(pc_ - start_);
+    DCHECK_EQ(pc_ - start_, offset);  // overflows cannot happen
+    return offset;
+  }
 };
 
 bool DecodeLocalDecls(AstLocalDecls& decls, const byte* start,
@@ -1647,19 +1486,18 @@
                           FunctionBody& body) {
   Zone zone(allocator);
   SR_WasmDecoder decoder(&zone, nullptr, body);
-  TreeResult result = decoder.Decode();
-  return result;
+  decoder.Decode();
+  return decoder.toResult<Tree*>(nullptr);
 }
 
 TreeResult BuildTFGraph(base::AccountingAllocator* allocator,
                         TFBuilder* builder, FunctionBody& body) {
   Zone zone(allocator);
   SR_WasmDecoder decoder(&zone, builder, body);
-  TreeResult result = decoder.Decode();
-  return result;
+  decoder.Decode();
+  return decoder.toResult<Tree*>(nullptr);
 }
 
-
 std::ostream& operator<<(std::ostream& os, const Tree& tree) {
   if (tree.pc == nullptr) {
     os << "null";
@@ -1675,28 +1513,22 @@
   return os;
 }
 
-
-ReadUnsignedLEB128ErrorCode ReadUnsignedLEB128Operand(const byte* pc,
-                                                      const byte* limit,
-                                                      int* length,
-                                                      uint32_t* result) {
-  Decoder decoder(pc, limit);
-  *result = decoder.checked_read_u32v(pc, 0, length);
-  if (decoder.ok()) return kNoError;
-  return (limit - pc) > 1 ? kInvalidLEB128 : kMissingLEB128;
-}
-
 int OpcodeLength(const byte* pc, const byte* end) {
   WasmDecoder decoder(nullptr, nullptr, pc, end);
   return decoder.OpcodeLength(pc);
 }
 
-int OpcodeArity(ModuleEnv* module, FunctionSig* sig, const byte* pc,
-                const byte* end) {
-  WasmDecoder decoder(module, sig, pc, end);
+int OpcodeArity(const byte* pc, const byte* end) {
+  WasmDecoder decoder(nullptr, nullptr, pc, end);
   return decoder.OpcodeArity(pc);
 }
 
+void PrintAstForDebugging(const byte* start, const byte* end) {
+  FunctionBody body = {nullptr, nullptr, start, start, end};
+  base::AccountingAllocator allocator;
+  PrintAst(&allocator, body);
+}
+
 void PrintAst(base::AccountingAllocator* allocator, FunctionBody& body) {
   Zone zone(allocator);
   SR_WasmDecoder decoder(&zone, nullptr, body);
@@ -1713,7 +1545,7 @@
   decoder.DecodeLocalDecls(decls);
   const byte* pc = decoder.pc();
   if (body.start != decoder.pc()) {
-    printf("// locals:");
+    os << "// locals: ";
     for (auto p : decls.local_types) {
       LocalType type = p.first;
       uint32_t count = p.second;
@@ -1724,64 +1556,90 @@
     for (const byte* locals = body.start; locals < pc; locals++) {
       printf(" 0x%02x,", *locals);
     }
-    printf("\n");
+    os << std::endl;
   }
 
-  printf("// body: \n");
-  std::vector<int> arity_stack;
+  os << "// body: \n";
+  int control_depth = 0;
   while (pc < body.end) {
-    int arity = decoder.OpcodeArity(pc);
     size_t length = decoder.OpcodeLength(pc);
 
-    for (auto arity : arity_stack) {
-      printf("  ");
-      USE(arity);
-    }
-
     WasmOpcode opcode = static_cast<WasmOpcode>(*pc);
+    if (opcode == kExprElse) control_depth--;
+
+    for (int i = 0; i < control_depth && i < 32; i++) printf("  ");
     printf("k%s,", WasmOpcodes::OpcodeName(opcode));
 
     for (size_t i = 1; i < length; i++) {
       printf(" 0x%02x,", pc[i]);
     }
 
-    if (body.module) {
-      switch (opcode) {
-        case kExprCallIndirect: {
-          SignatureIndexOperand operand(&decoder, pc);
-          if (decoder.Validate(pc, operand)) {
-            os << " // sig #" << operand.index << ": " << *operand.sig;
-          }
-          break;
-        }
-        case kExprCallImport: {
-          ImportIndexOperand operand(&decoder, pc);
-          if (decoder.Validate(pc, operand)) {
-            os << " // import #" << operand.index << ": " << *operand.sig;
-          }
-          break;
-        }
-        case kExprCallFunction: {
-          FunctionIndexOperand operand(&decoder, pc);
-          if (decoder.Validate(pc, operand)) {
-            os << " // function #" << operand.index << ": " << *operand.sig;
-          }
-          break;
-        }
-        default:
-          break;
+    switch (opcode) {
+      case kExprIf:
+      case kExprElse:
+      case kExprLoop:
+      case kExprBlock:
+        os << "   // @" << static_cast<int>(pc - body.start);
+        control_depth++;
+        break;
+      case kExprEnd:
+        os << "   // @" << static_cast<int>(pc - body.start);
+        control_depth--;
+        break;
+      case kExprBr: {
+        BreakDepthOperand operand(&decoder, pc);
+        os << "   // arity=" << operand.arity << " depth=" << operand.depth;
+        break;
       }
-    }
+      case kExprBrIf: {
+        BreakDepthOperand operand(&decoder, pc);
+        os << "   // arity=" << operand.arity << " depth" << operand.depth;
+        break;
+      }
+      case kExprBrTable: {
+        BranchTableOperand operand(&decoder, pc);
+        os << "   // arity=" << operand.arity
+           << " entries=" << operand.table_count;
+        break;
+      }
+      case kExprCallIndirect: {
+        CallIndirectOperand operand(&decoder, pc);
+        if (decoder.Validate(pc, operand)) {
+          os << "   // sig #" << operand.index << ": " << *operand.sig;
+        } else {
+          os << " // arity=" << operand.arity << " sig #" << operand.index;
+        }
+        break;
+      }
+      case kExprCallImport: {
+        CallImportOperand operand(&decoder, pc);
+        if (decoder.Validate(pc, operand)) {
+          os << "   // import #" << operand.index << ": " << *operand.sig;
+        } else {
+          os << " // arity=" << operand.arity << " import #" << operand.index;
+        }
+        break;
+      }
+      case kExprCallFunction: {
+        CallFunctionOperand operand(&decoder, pc);
+        if (decoder.Validate(pc, operand)) {
+          os << "   // function #" << operand.index << ": " << *operand.sig;
+        } else {
+          os << " // arity=" << operand.arity << " function #" << operand.index;
+        }
+        break;
+      }
+      case kExprReturn: {
+        ReturnArityOperand operand(&decoder, pc);
+        os << "   // arity=" << operand.arity;
+        break;
+      }
+      default:
+        break;
+      }
 
     pc += length;
-    printf("\n");
-
-    arity_stack.push_back(arity);
-    while (arity_stack.back() == 0) {
-      arity_stack.pop_back();
-      if (arity_stack.empty()) break;
-      arity_stack.back()--;
-    }
+    os << std::endl;
   }
 }
 
diff --git a/src/wasm/ast-decoder.h b/src/wasm/ast-decoder.h
index 5376e7b..9e96053 100644
--- a/src/wasm/ast-decoder.h
+++ b/src/wasm/ast-decoder.h
@@ -89,66 +89,81 @@
   }
 };
 
-struct Block;
+struct Control;
 struct BreakDepthOperand {
+  uint32_t arity;
   uint32_t depth;
-  Block* target;
+  Control* target;
   int length;
   inline BreakDepthOperand(Decoder* decoder, const byte* pc) {
-    depth = decoder->checked_read_u32v(pc, 1, &length, "break depth");
+    int len1 = 0;
+    int len2 = 0;
+    arity = decoder->checked_read_u32v(pc, 1, &len1, "argument count");
+    depth = decoder->checked_read_u32v(pc, 1 + len1, &len2, "break depth");
+    length = len1 + len2;
     target = nullptr;
   }
 };
 
-struct BlockCountOperand {
-  uint32_t count;
-  int length;
-  inline BlockCountOperand(Decoder* decoder, const byte* pc) {
-    count = decoder->checked_read_u32v(pc, 1, &length, "block count");
-  }
-};
-
-struct SignatureIndexOperand {
+struct CallIndirectOperand {
+  uint32_t arity;
   uint32_t index;
   FunctionSig* sig;
   int length;
-  inline SignatureIndexOperand(Decoder* decoder, const byte* pc) {
-    index = decoder->checked_read_u32v(pc, 1, &length, "signature index");
+  inline CallIndirectOperand(Decoder* decoder, const byte* pc) {
+    int len1 = 0;
+    int len2 = 0;
+    arity = decoder->checked_read_u32v(pc, 1, &len1, "argument count");
+    index = decoder->checked_read_u32v(pc, 1 + len1, &len2, "signature index");
+    length = len1 + len2;
     sig = nullptr;
   }
 };
 
-struct FunctionIndexOperand {
+struct CallFunctionOperand {
+  uint32_t arity;
   uint32_t index;
   FunctionSig* sig;
   int length;
-  inline FunctionIndexOperand(Decoder* decoder, const byte* pc) {
-    index = decoder->checked_read_u32v(pc, 1, &length, "function index");
+  inline CallFunctionOperand(Decoder* decoder, const byte* pc) {
+    int len1 = 0;
+    int len2 = 0;
+    arity = decoder->checked_read_u32v(pc, 1, &len1, "argument count");
+    index = decoder->checked_read_u32v(pc, 1 + len1, &len2, "function index");
+    length = len1 + len2;
     sig = nullptr;
   }
 };
 
-struct ImportIndexOperand {
+struct CallImportOperand {
+  uint32_t arity;
   uint32_t index;
   FunctionSig* sig;
   int length;
-  inline ImportIndexOperand(Decoder* decoder, const byte* pc) {
-    index = decoder->checked_read_u32v(pc, 1, &length, "import index");
+  inline CallImportOperand(Decoder* decoder, const byte* pc) {
+    int len1 = 0;
+    int len2 = 0;
+    arity = decoder->checked_read_u32v(pc, 1, &len1, "argument count");
+    index = decoder->checked_read_u32v(pc, 1 + len1, &len2, "import index");
+    length = len1 + len2;
     sig = nullptr;
   }
 };
 
 struct BranchTableOperand {
+  uint32_t arity;
   uint32_t table_count;
   const byte* table;
   int length;
   inline BranchTableOperand(Decoder* decoder, const byte* pc) {
-    int varint_length;
+    int len1 = 0;
+    int len2 = 0;
+    arity = decoder->checked_read_u32v(pc, 1, &len1, "argument count");
     table_count =
-        decoder->checked_read_u32v(pc, 1, &varint_length, "expected #entries");
-    length = varint_length + (table_count + 1) * sizeof(uint32_t);
+        decoder->checked_read_u32v(pc, 1 + len1, &len2, "table count");
+    length = len1 + len2 + (table_count + 1) * sizeof(uint32_t);
 
-    uint32_t table_start = 1 + varint_length;
+    uint32_t table_start = 1 + len1 + len2;
     if (decoder->check(pc, table_start, (table_count + 1) * sizeof(uint32_t),
                        "expected <table entries>")) {
       table = pc + table_start;
@@ -177,6 +192,15 @@
   }
 };
 
+struct ReturnArityOperand {
+  uint32_t arity;
+  int length;
+
+  inline ReturnArityOperand(Decoder* decoder, const byte* pc) {
+    arity = decoder->checked_read_u32v(pc, 1, &length, "return count");
+  }
+};
+
 typedef compiler::WasmGraphBuilder TFBuilder;
 struct ModuleEnv;  // forward declaration of module interface.
 
@@ -200,6 +224,9 @@
                         TFBuilder* builder, FunctionBody& body);
 void PrintAst(base::AccountingAllocator* allocator, FunctionBody& body);
 
+// A simplified form of AST printing, e.g. from a debugger.
+void PrintAstForDebugging(const byte* start, const byte* end);
+
 inline TreeResult VerifyWasmCode(base::AccountingAllocator* allocator,
                                  ModuleEnv* module, FunctionSig* sig,
                                  const byte* start, const byte* end) {
@@ -215,11 +242,6 @@
   return BuildTFGraph(allocator, builder, body);
 }
 
-enum ReadUnsignedLEB128ErrorCode { kNoError, kInvalidLEB128, kMissingLEB128 };
-
-ReadUnsignedLEB128ErrorCode ReadUnsignedLEB128Operand(const byte*, const byte*,
-                                                      int*, uint32_t*);
-
 struct AstLocalDecls {
   // The size of the encoded declarations.
   uint32_t decls_encoded_size;  // size of encoded declarations
@@ -243,8 +265,8 @@
 int OpcodeLength(const byte* pc, const byte* end);
 
 // Computes the arity (number of sub-nodes) of the opcode at the given address.
-int OpcodeArity(ModuleEnv* module, FunctionSig* sig, const byte* pc,
-                const byte* end);
+int OpcodeArity(const byte* pc, const byte* end);
+
 }  // namespace wasm
 }  // namespace internal
 }  // namespace v8
diff --git a/src/wasm/decoder.h b/src/wasm/decoder.h
index f9de2e1..685f5d0 100644
--- a/src/wasm/decoder.h
+++ b/src/wasm/decoder.h
@@ -5,9 +5,11 @@
 #ifndef V8_WASM_DECODER_H_
 #define V8_WASM_DECODER_H_
 
+#include "src/base/compiler-specific.h"
 #include "src/base/smart-pointers.h"
 #include "src/flags.h"
 #include "src/signature.h"
+#include "src/utils.h"
 #include "src/wasm/wasm-result.h"
 #include "src/zone-containers.h"
 
@@ -47,7 +49,7 @@
   inline bool check(const byte* base, int offset, int length, const char* msg) {
     DCHECK_GE(base, start_);
     if ((base + offset + length) > limit_) {
-      error(base, base + offset, msg);
+      error(base, base + offset, "%s", msg);
       return false;
     }
     return true;
@@ -258,12 +260,13 @@
     }
   }
 
-  void error(const char* msg) { error(pc_, nullptr, msg); }
+  void error(const char* msg) { error(pc_, nullptr, "%s", msg); }
 
-  void error(const byte* pc, const char* msg) { error(pc, nullptr, msg); }
+  void error(const byte* pc, const char* msg) { error(pc, nullptr, "%s", msg); }
 
   // Sets internal error state.
-  void error(const byte* pc, const byte* pt, const char* format, ...) {
+  void PRINTF_FORMAT(4, 5)
+      error(const byte* pc, const byte* pt, const char* format, ...) {
     if (ok()) {
 #if DEBUG
       if (FLAG_wasm_break_on_decoder_error) {
@@ -392,7 +395,7 @@
         return 0;
       }
       if ((b & 0x80) != 0) {
-        error(base, ptr, msg);
+        error(base, ptr, "%s", msg);
         return 0;
       }
     }
diff --git a/src/wasm/encoder.cc b/src/wasm/encoder.cc
index 92e6b11..39a2f5a 100644
--- a/src/wasm/encoder.cc
+++ b/src/wasm/encoder.cc
@@ -10,6 +10,7 @@
 
 #include "src/wasm/ast-decoder.h"
 #include "src/wasm/encoder.h"
+#include "src/wasm/leb-helper.h"
 #include "src/wasm/wasm-macro-gen.h"
 #include "src/wasm/wasm-module.h"
 #include "src/wasm/wasm-opcodes.h"
@@ -38,55 +39,34 @@
   *b += 1;
 }
 
-
 void EmitUint16(byte** b, uint16_t x) {
   WriteUnalignedUInt16(*b, x);
   *b += 2;
 }
 
-
 void EmitUint32(byte** b, uint32_t x) {
   WriteUnalignedUInt32(*b, x);
   *b += 4;
 }
 
+void EmitVarInt(byte** b, size_t val) {
+  LEBHelper::write_u32v(b, static_cast<uint32_t>(val));
+}
+
 // Sections all start with a size, but it's unknown at the start.
 // We generate a large varint which we then fixup later when the size is known.
 //
 // TODO(jfb) Not strictly necessary since sizes are calculated ahead of time.
-const size_t padded_varint = 5;
-
-void EmitVarInt(byte** b, size_t val) {
-  while (true) {
-    size_t next = val >> 7;
-    byte out = static_cast<byte>(val & 0x7f);
-    if (next) {
-      *((*b)++) = 0x80 | out;
-      val = next;
-    } else {
-      *((*b)++) = out;
-      break;
-    }
-  }
-}
-
-size_t SizeOfVarInt(size_t value) {
-  size_t size = 0;
-  do {
-    size++;
-    value = value >> 7;
-  } while (value > 0);
-  return size;
-}
+const size_t kPaddedVarintSize = 5;
 
 void FixupSection(byte* start, byte* end) {
-  // Same as EmitVarInt, but fixed-width with zeroes in the MSBs.
-  size_t val = end - start - padded_varint;
+  // Same as LEBHelper::write_u32v, but fixed-width with zeroes in the MSBs.
+  size_t val = end - start - kPaddedVarintSize;
   TRACE("  fixup %u\n", (unsigned)val);
-  for (size_t pos = 0; pos != padded_varint; ++pos) {
+  for (size_t pos = 0; pos != kPaddedVarintSize; ++pos) {
     size_t next = val >> 7;
     byte out = static_cast<byte>(val & 0x7f);
-    if (pos != padded_varint - 1) {
+    if (pos != kPaddedVarintSize - 1) {
       *(start++) = 0x80 | out;
       val = next;
     } else {
@@ -98,77 +78,63 @@
 
 // Returns the start of the section, where the section VarInt size is.
 byte* EmitSection(WasmSection::Code code, byte** b) {
-  byte* start = *b;
+  // Emit the section name.
   const char* name = WasmSection::getName(code);
-  size_t length = WasmSection::getNameLength(code);
   TRACE("emit section: %s\n", name);
-  for (size_t padding = 0; padding != padded_varint; ++padding) {
-    EmitUint8(b, 0xff);  // Will get fixed up later.
-  }
+  size_t length = WasmSection::getNameLength(code);
   EmitVarInt(b, length);  // Section name string size.
   for (size_t i = 0; i != length; ++i) EmitUint8(b, name[i]);
+
+  // Emit a placeholder for the length.
+  byte* start = *b;
+  for (size_t padding = 0; padding != kPaddedVarintSize; ++padding) {
+    EmitUint8(b, 0xff);  // Will get fixed up later.
+  }
+
   return start;
 }
 }  // namespace
 
-struct WasmFunctionBuilder::Type {
-  bool param_;
-  LocalType type_;
-};
-
-
 WasmFunctionBuilder::WasmFunctionBuilder(Zone* zone)
-    : return_type_(kAstI32),
-      locals_(zone),
-      exported_(0),
-      external_(0),
-      body_(zone),
-      local_indices_(zone),
-      name_(zone) {}
+    : locals_(zone), exported_(0), body_(zone), name_(zone) {}
 
-
-uint16_t WasmFunctionBuilder::AddParam(LocalType type) {
-  return AddVar(type, true);
+void WasmFunctionBuilder::EmitVarInt(uint32_t val) {
+  byte buffer[8];
+  byte* ptr = buffer;
+  LEBHelper::write_u32v(&ptr, val);
+  for (byte* p = buffer; p < ptr; p++) {
+    body_.push_back(*p);
+  }
 }
 
-
-uint16_t WasmFunctionBuilder::AddLocal(LocalType type) {
-  return AddVar(type, false);
+void WasmFunctionBuilder::SetSignature(FunctionSig* sig) {
+  DCHECK(!locals_.has_sig());
+  locals_.set_sig(sig);
 }
 
-
-uint16_t WasmFunctionBuilder::AddVar(LocalType type, bool param) {
-  locals_.push_back({param, type});
-  return static_cast<uint16_t>(locals_.size() - 1);
+uint32_t WasmFunctionBuilder::AddLocal(LocalType type) {
+  DCHECK(locals_.has_sig());
+  return locals_.AddLocals(1, type);
 }
 
+void WasmFunctionBuilder::EmitGetLocal(uint32_t local_index) {
+  EmitWithVarInt(kExprGetLocal, local_index);
+}
 
-void WasmFunctionBuilder::ReturnType(LocalType type) { return_type_ = type; }
-
+void WasmFunctionBuilder::EmitSetLocal(uint32_t local_index) {
+  EmitWithVarInt(kExprSetLocal, local_index);
+}
 
 void WasmFunctionBuilder::EmitCode(const byte* code, uint32_t code_size) {
-  EmitCode(code, code_size, nullptr, 0);
-}
-
-
-void WasmFunctionBuilder::EmitCode(const byte* code, uint32_t code_size,
-                                   const uint32_t* local_indices,
-                                   uint32_t indices_size) {
-  size_t size = body_.size();
   for (size_t i = 0; i < code_size; i++) {
     body_.push_back(code[i]);
   }
-  for (size_t i = 0; i < indices_size; i++) {
-    local_indices_.push_back(local_indices[i] + static_cast<uint32_t>(size));
-  }
 }
 
-
 void WasmFunctionBuilder::Emit(WasmOpcode opcode) {
   body_.push_back(static_cast<byte>(opcode));
 }
 
-
 void WasmFunctionBuilder::EmitWithU8(WasmOpcode opcode, const byte immediate) {
   body_.push_back(static_cast<byte>(opcode));
   body_.push_back(immediate);
@@ -184,47 +150,22 @@
 void WasmFunctionBuilder::EmitWithVarInt(WasmOpcode opcode,
                                          uint32_t immediate) {
   body_.push_back(static_cast<byte>(opcode));
-  size_t immediate_size = SizeOfVarInt(immediate);
-  body_.insert(body_.end(), immediate_size, 0);
-  byte* p = &body_[body_.size() - immediate_size];
-  EmitVarInt(&p, immediate);
+  EmitVarInt(immediate);
 }
 
-uint32_t WasmFunctionBuilder::EmitEditableVarIntImmediate() {
-  // Guess that the immediate will be 1 byte. If it is more, we'll have to
-  // shift everything down.
-  body_.push_back(0);
-  return static_cast<uint32_t>(body_.size()) - 1;
-}
-
-void WasmFunctionBuilder::EditVarIntImmediate(uint32_t offset,
-                                              const uint32_t immediate) {
-  uint32_t immediate_size = static_cast<uint32_t>(SizeOfVarInt(immediate));
-  // In EmitEditableVarIntImmediate, we guessed that we'd only need one byte.
-  // If we need more, shift everything down to make room for the larger
-  // immediate.
-  if (immediate_size > 1) {
-    uint32_t diff = immediate_size - 1;
-    body_.insert(body_.begin() + offset, diff, 0);
-
-    for (size_t i = 0; i < local_indices_.size(); ++i) {
-      if (local_indices_[i] >= offset) {
-        local_indices_[i] += diff;
-      }
-    }
+void WasmFunctionBuilder::EmitI32Const(int32_t value) {
+  // TODO(titzer): variable-length signed and unsigned i32 constants.
+  if (-128 <= value && value <= 127) {
+    EmitWithU8(kExprI8Const, static_cast<byte>(value));
+  } else {
+    byte code[] = {WASM_I32V_5(value)};
+    EmitCode(code, sizeof(code));
   }
-  DCHECK(offset + immediate_size <= body_.size());
-  byte* p = &body_[offset];
-  EmitVarInt(&p, immediate);
 }
 
-
 void WasmFunctionBuilder::Exported(uint8_t flag) { exported_ = flag; }
 
-
-void WasmFunctionBuilder::External(uint8_t flag) { external_ = flag; }
-
-void WasmFunctionBuilder::SetName(const unsigned char* name, int name_length) {
+void WasmFunctionBuilder::SetName(const char* name, int name_length) {
   name_.clear();
   if (name_length > 0) {
     for (int i = 0; i < name_length; i++) {
@@ -233,139 +174,43 @@
   }
 }
 
-
 WasmFunctionEncoder* WasmFunctionBuilder::Build(Zone* zone,
                                                 WasmModuleBuilder* mb) const {
   WasmFunctionEncoder* e =
-      new (zone) WasmFunctionEncoder(zone, return_type_, exported_, external_);
-  uint16_t* var_index = zone->NewArray<uint16_t>(locals_.size());
-  IndexVars(e, var_index);
-  if (body_.size() > 0) {
-    // TODO(titzer): iterate over local indexes, not the bytes.
-    const byte* start = &body_[0];
-    const byte* end = start + body_.size();
-    size_t local_index = 0;
-    for (size_t i = 0; i < body_.size();) {
-      if (local_index < local_indices_.size() &&
-          i == local_indices_[local_index]) {
-        int length = 0;
-        uint32_t index;
-        ReadUnsignedLEB128Operand(start + i, end, &length, &index);
-        uint16_t new_index = var_index[index];
-        const std::vector<uint8_t>& index_vec = UnsignedLEB128From(new_index);
-        for (size_t j = 0; j < index_vec.size(); j++) {
-          e->body_.push_back(index_vec.at(j));
-        }
-        i += length;
-        local_index++;
-      } else {
-        e->body_.push_back(*(start + i));
-        i++;
-      }
-    }
-  }
-  FunctionSig::Builder sig(zone, return_type_ == kAstStmt ? 0 : 1,
-                           e->params_.size());
-  if (return_type_ != kAstStmt) {
-    sig.AddReturn(static_cast<LocalType>(return_type_));
-  }
-  for (size_t i = 0; i < e->params_.size(); i++) {
-    sig.AddParam(static_cast<LocalType>(e->params_[i]));
-  }
-  e->signature_index_ = mb->AddSignature(sig.Build());
+      new (zone) WasmFunctionEncoder(zone, locals_, exported_);
+  // TODO(titzer): lame memcpy here.
+  e->body_.insert(e->body_.begin(), body_.begin(), body_.end());
+  e->signature_index_ = mb->AddSignature(locals_.get_sig());
   e->name_.insert(e->name_.begin(), name_.begin(), name_.end());
   return e;
 }
 
-
-void WasmFunctionBuilder::IndexVars(WasmFunctionEncoder* e,
-                                    uint16_t* var_index) const {
-  uint16_t param = 0;
-  uint16_t i32 = 0;
-  uint16_t i64 = 0;
-  uint16_t f32 = 0;
-  uint16_t f64 = 0;
-  for (size_t i = 0; i < locals_.size(); i++) {
-    if (locals_.at(i).param_) {
-      param++;
-    } else if (locals_.at(i).type_ == kAstI32) {
-      i32++;
-    } else if (locals_.at(i).type_ == kAstI64) {
-      i64++;
-    } else if (locals_.at(i).type_ == kAstF32) {
-      f32++;
-    } else if (locals_.at(i).type_ == kAstF64) {
-      f64++;
-    }
-  }
-  e->local_i32_count_ = i32;
-  e->local_i64_count_ = i64;
-  e->local_f32_count_ = f32;
-  e->local_f64_count_ = f64;
-  f64 = param + i32 + i64 + f32;
-  f32 = param + i32 + i64;
-  i64 = param + i32;
-  i32 = param;
-  param = 0;
-  for (size_t i = 0; i < locals_.size(); i++) {
-    if (locals_.at(i).param_) {
-      e->params_.push_back(locals_.at(i).type_);
-      var_index[i] = param++;
-    } else if (locals_.at(i).type_ == kAstI32) {
-      var_index[i] = i32++;
-    } else if (locals_.at(i).type_ == kAstI64) {
-      var_index[i] = i64++;
-    } else if (locals_.at(i).type_ == kAstF32) {
-      var_index[i] = f32++;
-    } else if (locals_.at(i).type_ == kAstF64) {
-      var_index[i] = f64++;
-    }
-  }
-}
-
-
-WasmFunctionEncoder::WasmFunctionEncoder(Zone* zone, LocalType return_type,
-                                         bool exported, bool external)
-    : params_(zone),
-      exported_(exported),
-      external_(external),
-      body_(zone),
-      name_(zone) {}
-
+WasmFunctionEncoder::WasmFunctionEncoder(Zone* zone, LocalDeclEncoder locals,
+                                         bool exported)
+    : locals_(locals), exported_(exported), body_(zone), name_(zone) {}
 
 uint32_t WasmFunctionEncoder::HeaderSize() const {
   uint32_t size = 3;
-  if (!external_) size += 2;
+  size += 2;
   if (HasName()) {
     uint32_t name_size = NameSize();
-    size += static_cast<uint32_t>(SizeOfVarInt(name_size)) + name_size;
+    size +=
+        static_cast<uint32_t>(LEBHelper::sizeof_u32v(name_size)) + name_size;
   }
   return size;
 }
 
-
 uint32_t WasmFunctionEncoder::BodySize(void) const {
-  // TODO(titzer): embed a LocalDeclEncoder in the WasmFunctionEncoder
-  LocalDeclEncoder local_decl;
-  local_decl.AddLocals(local_i32_count_, kAstI32);
-  local_decl.AddLocals(local_i64_count_, kAstI64);
-  local_decl.AddLocals(local_f32_count_, kAstF32);
-  local_decl.AddLocals(local_f64_count_, kAstF64);
-
-  return external_ ? 0
-                   : static_cast<uint32_t>(body_.size() + local_decl.Size());
+  return static_cast<uint32_t>(body_.size() + locals_.Size());
 }
 
-
 uint32_t WasmFunctionEncoder::NameSize() const {
   return HasName() ? static_cast<uint32_t>(name_.size()) : 0;
 }
 
-
 void WasmFunctionEncoder::Serialize(byte* buffer, byte** header,
                                     byte** body) const {
   uint8_t decl_bits = (exported_ ? kDeclFunctionExport : 0) |
-                      (external_ ? kDeclFunctionImport : 0) |
                       (HasName() ? kDeclFunctionName : 0);
 
   EmitUint8(header, decl_bits);
@@ -378,25 +223,14 @@
     }
   }
 
-
-  if (!external_) {
-    // TODO(titzer): embed a LocalDeclEncoder in the WasmFunctionEncoder
-    LocalDeclEncoder local_decl;
-    local_decl.AddLocals(local_i32_count_, kAstI32);
-    local_decl.AddLocals(local_i64_count_, kAstI64);
-    local_decl.AddLocals(local_f32_count_, kAstF32);
-    local_decl.AddLocals(local_f64_count_, kAstF64);
-
-    EmitUint16(header, static_cast<uint16_t>(body_.size() + local_decl.Size()));
-    (*header) += local_decl.Emit(*header);
-    if (body_.size() > 0) {
-      std::memcpy(*header, &body_[0], body_.size());
-      (*header) += body_.size();
-    }
+  EmitUint16(header, static_cast<uint16_t>(body_.size() + locals_.Size()));
+  (*header) += locals_.Emit(*header);
+  if (body_.size() > 0) {
+    std::memcpy(*header, &body_[0], body_.size());
+    (*header) += body_.size();
   }
 }
 
-
 WasmDataSegmentEncoder::WasmDataSegmentEncoder(Zone* zone, const byte* data,
                                                uint32_t size, uint32_t dest)
     : data_(zone), dest_(dest) {
@@ -405,18 +239,15 @@
   }
 }
 
-
 uint32_t WasmDataSegmentEncoder::HeaderSize() const {
   static const int kDataSegmentSize = 13;
   return kDataSegmentSize;
 }
 
-
 uint32_t WasmDataSegmentEncoder::BodySize() const {
   return static_cast<uint32_t>(data_.size());
 }
 
-
 void WasmDataSegmentEncoder::Serialize(byte* buffer, byte** header,
                                        byte** body) const {
   EmitVarInt(header, dest_);
@@ -429,6 +260,7 @@
 WasmModuleBuilder::WasmModuleBuilder(Zone* zone)
     : zone_(zone),
       signatures_(zone),
+      imports_(zone),
       functions_(zone),
       data_segments_(zone),
       indirect_functions_(zone),
@@ -436,12 +268,11 @@
       signature_map_(zone),
       start_function_index_(-1) {}
 
-uint16_t WasmModuleBuilder::AddFunction() {
+uint32_t WasmModuleBuilder::AddFunction() {
   functions_.push_back(new (zone_) WasmFunctionBuilder(zone_));
-  return static_cast<uint16_t>(functions_.size() - 1);
+  return static_cast<uint32_t>(functions_.size() - 1);
 }
 
-
 WasmFunctionBuilder* WasmModuleBuilder::FunctionAt(size_t index) {
   if (functions_.size() > index) {
     return functions_.at(index);
@@ -450,12 +281,10 @@
   }
 }
 
-
 void WasmModuleBuilder::AddDataSegment(WasmDataSegmentEncoder* data) {
   data_segments_.push_back(data);
 }
 
-
 bool WasmModuleBuilder::CompareFunctionSigs::operator()(FunctionSig* a,
                                                         FunctionSig* b) const {
   if (a->return_count() < b->return_count()) return true;
@@ -473,30 +302,37 @@
   return false;
 }
 
-
-uint16_t WasmModuleBuilder::AddSignature(FunctionSig* sig) {
+uint32_t WasmModuleBuilder::AddSignature(FunctionSig* sig) {
   SignatureMap::iterator pos = signature_map_.find(sig);
   if (pos != signature_map_.end()) {
     return pos->second;
   } else {
-    uint16_t index = static_cast<uint16_t>(signatures_.size());
+    uint32_t index = static_cast<uint32_t>(signatures_.size());
     signature_map_[sig] = index;
     signatures_.push_back(sig);
     return index;
   }
 }
 
-
-void WasmModuleBuilder::AddIndirectFunction(uint16_t index) {
+void WasmModuleBuilder::AddIndirectFunction(uint32_t index) {
   indirect_functions_.push_back(index);
 }
 
-void WasmModuleBuilder::MarkStartFunction(uint16_t index) {
+uint32_t WasmModuleBuilder::AddImport(const char* name, int name_length,
+                                      FunctionSig* sig) {
+  imports_.push_back({AddSignature(sig), name, name_length});
+  return static_cast<uint32_t>(imports_.size() - 1);
+}
+
+void WasmModuleBuilder::MarkStartFunction(uint32_t index) {
   start_function_index_ = index;
 }
 
 WasmModuleWriter* WasmModuleBuilder::Build(Zone* zone) {
   WasmModuleWriter* writer = new (zone) WasmModuleWriter(zone);
+  for (auto import : imports_) {
+    writer->imports_.push_back(import);
+  }
   for (auto function : functions_) {
     writer->functions_.push_back(function->Build(zone, this));
   }
@@ -516,15 +352,14 @@
   return writer;
 }
 
-
 uint32_t WasmModuleBuilder::AddGlobal(MachineType type, bool exported) {
   globals_.push_back(std::make_pair(type, exported));
   return static_cast<uint32_t>(globals_.size() - 1);
 }
 
-
 WasmModuleWriter::WasmModuleWriter(Zone* zone)
-    : functions_(zone),
+    : imports_(zone),
+      functions_(zone),
       data_segments_(zone),
       signatures_(zone),
       indirect_functions_(zone),
@@ -542,10 +377,11 @@
   }
 
   void AddSection(WasmSection::Code code, size_t other_size) {
-    Add(padded_varint + SizeOfVarInt(WasmSection::getNameLength(code)) +
+    Add(kPaddedVarintSize +
+            LEBHelper::sizeof_u32v(WasmSection::getNameLength(code)) +
             WasmSection::getNameLength(code),
         0);
-    if (other_size) Add(SizeOfVarInt(other_size), 0);
+    if (other_size) Add(LEBHelper::sizeof_u32v(other_size), 0);
   }
 };
 
@@ -554,11 +390,6 @@
 
   sizes.Add(2 * sizeof(uint32_t), 0);  // header
 
-  sizes.AddSection(WasmSection::Code::Memory, 0);
-  sizes.Add(kDeclMemorySize, 0);
-  TRACE("Size after memory: %u, %u\n", (unsigned)sizes.header_size,
-        (unsigned)sizes.body_size);
-
   if (globals_.size() > 0) {
     sizes.AddSection(WasmSection::Code::Globals, globals_.size());
     /* These globals never have names, so are always 3 bytes. */
@@ -570,15 +401,18 @@
   if (signatures_.size() > 0) {
     sizes.AddSection(WasmSection::Code::Signatures, signatures_.size());
     for (auto sig : signatures_) {
-      sizes.Add(
-          1 + SizeOfVarInt(sig->parameter_count()) + sig->parameter_count(), 0);
+      sizes.Add(1 + LEBHelper::sizeof_u32v(sig->parameter_count()) +
+                    sig->parameter_count() +
+                    LEBHelper::sizeof_u32v(sig->return_count()) +
+                    sig->return_count(),
+                0);
     }
     TRACE("Size after signatures: %u, %u\n", (unsigned)sizes.header_size,
           (unsigned)sizes.body_size);
   }
 
   if (functions_.size() > 0) {
-    sizes.AddSection(WasmSection::Code::Functions, functions_.size());
+    sizes.AddSection(WasmSection::Code::OldFunctions, functions_.size());
     for (auto function : functions_) {
       sizes.Add(function->HeaderSize() + function->BodySize(),
                 function->NameSize());
@@ -587,9 +421,36 @@
           (unsigned)sizes.body_size);
   }
 
+  if (imports_.size() > 0) {
+    sizes.AddSection(WasmSection::Code::ImportTable, imports_.size());
+    for (auto import : imports_) {
+      sizes.Add(LEBHelper::sizeof_u32v(import.sig_index), 0);
+      sizes.Add(LEBHelper::sizeof_u32v(import.name_length), 0);
+      sizes.Add(import.name_length, 0);
+      sizes.Add(1, 0);
+    }
+    TRACE("Size after imports: %u, %u\n", (unsigned)sizes.header_size,
+          (unsigned)sizes.body_size);
+  }
+
+  if (indirect_functions_.size() > 0) {
+    sizes.AddSection(WasmSection::Code::FunctionTable,
+                     indirect_functions_.size());
+    for (auto function_index : indirect_functions_) {
+      sizes.Add(LEBHelper::sizeof_u32v(function_index), 0);
+    }
+    TRACE("Size after indirect functions: %u, %u\n",
+          (unsigned)sizes.header_size, (unsigned)sizes.body_size);
+  }
+
+  sizes.AddSection(WasmSection::Code::Memory, 0);
+  sizes.Add(kDeclMemorySize, 0);
+  TRACE("Size after memory: %u, %u\n", (unsigned)sizes.header_size,
+        (unsigned)sizes.body_size);
+
   if (start_function_index_ >= 0) {
     sizes.AddSection(WasmSection::Code::StartFunction, 0);
-    sizes.Add(SizeOfVarInt(start_function_index_), 0);
+    sizes.Add(LEBHelper::sizeof_u32v(start_function_index_), 0);
     TRACE("Size after start: %u, %u\n", (unsigned)sizes.header_size,
           (unsigned)sizes.body_size);
   }
@@ -603,16 +464,6 @@
           (unsigned)sizes.body_size);
   }
 
-  if (indirect_functions_.size() > 0) {
-    sizes.AddSection(WasmSection::Code::FunctionTable,
-                     indirect_functions_.size());
-    for (auto function_index : indirect_functions_) {
-      sizes.Add(SizeOfVarInt(function_index), 0);
-    }
-    TRACE("Size after indirect functions: %u, %u\n",
-          (unsigned)sizes.header_size, (unsigned)sizes.body_size);
-  }
-
   if (sizes.body_size > 0) {
     sizes.AddSection(WasmSection::Code::End, 0);
     TRACE("Size after end: %u, %u\n", (unsigned)sizes.header_size,
@@ -629,16 +480,6 @@
   EmitUint32(&header, kWasmMagic);
   EmitUint32(&header, kWasmVersion);
 
-  // -- emit memory declaration ------------------------------------------------
-  {
-    byte* section = EmitSection(WasmSection::Code::Memory, &header);
-    EmitVarInt(&header, 16);  // min memory size
-    EmitVarInt(&header, 16);  // max memory size
-    EmitUint8(&header, 0);    // memory export
-    static_assert(kDeclMemorySize == 3, "memory size must match emit above");
-    FixupSection(section, header);
-  }
-
   // -- emit globals -----------------------------------------------------------
   if (globals_.size() > 0) {
     byte* section = EmitSection(WasmSection::Code::Globals, &header);
@@ -658,22 +499,36 @@
     EmitVarInt(&header, signatures_.size());
 
     for (FunctionSig* sig : signatures_) {
+      EmitUint8(&header, kWasmFunctionTypeForm);
       EmitVarInt(&header, sig->parameter_count());
-      if (sig->return_count() > 0) {
-        EmitUint8(&header, WasmOpcodes::LocalTypeCodeFor(sig->GetReturn()));
-      } else {
-        EmitUint8(&header, kLocalVoid);
-      }
       for (size_t j = 0; j < sig->parameter_count(); j++) {
         EmitUint8(&header, WasmOpcodes::LocalTypeCodeFor(sig->GetParam(j)));
       }
+      EmitVarInt(&header, sig->return_count());
+      for (size_t j = 0; j < sig->return_count(); j++) {
+        EmitUint8(&header, WasmOpcodes::LocalTypeCodeFor(sig->GetReturn(j)));
+      }
+    }
+    FixupSection(section, header);
+  }
+
+  // -- emit imports -----------------------------------------------------------
+  if (imports_.size() > 0) {
+    byte* section = EmitSection(WasmSection::Code::ImportTable, &header);
+    EmitVarInt(&header, imports_.size());
+    for (auto import : imports_) {
+      EmitVarInt(&header, import.sig_index);
+      EmitVarInt(&header, import.name_length);
+      std::memcpy(header, import.name, import.name_length);
+      header += import.name_length;
+      EmitVarInt(&header, 0);
     }
     FixupSection(section, header);
   }
 
   // -- emit functions ---------------------------------------------------------
   if (functions_.size() > 0) {
-    byte* section = EmitSection(WasmSection::Code::Functions, &header);
+    byte* section = EmitSection(WasmSection::Code::OldFunctions, &header);
     EmitVarInt(&header, functions_.size());
 
     for (auto func : functions_) {
@@ -682,6 +537,27 @@
     FixupSection(section, header);
   }
 
+  // -- emit function table ----------------------------------------------------
+  if (indirect_functions_.size() > 0) {
+    byte* section = EmitSection(WasmSection::Code::FunctionTable, &header);
+    EmitVarInt(&header, indirect_functions_.size());
+
+    for (auto index : indirect_functions_) {
+      EmitVarInt(&header, index);
+    }
+    FixupSection(section, header);
+  }
+
+  // -- emit memory declaration ------------------------------------------------
+  {
+    byte* section = EmitSection(WasmSection::Code::Memory, &header);
+    EmitVarInt(&header, 16);  // min memory size
+    EmitVarInt(&header, 16);  // max memory size
+    EmitUint8(&header, 0);    // memory export
+    static_assert(kDeclMemorySize == 3, "memory size must match emit above");
+    FixupSection(section, header);
+  }
+
   // -- emit start function index ----------------------------------------------
   if (start_function_index_ >= 0) {
     byte* section = EmitSection(WasmSection::Code::StartFunction, &header);
@@ -700,17 +576,6 @@
     FixupSection(section, header);
   }
 
-  // -- emit function table ----------------------------------------------------
-  if (indirect_functions_.size() > 0) {
-    byte* section = EmitSection(WasmSection::Code::FunctionTable, &header);
-    EmitVarInt(&header, indirect_functions_.size());
-
-    for (auto index : indirect_functions_) {
-      EmitVarInt(&header, index);
-    }
-    FixupSection(section, header);
-  }
-
   if (sizes.body_size > 0) {
     byte* section = EmitSection(WasmSection::Code::End, &header);
     FixupSection(section, header);
@@ -718,22 +583,6 @@
 
   return new (zone) WasmModuleIndex(buffer, buffer + sizes.total());
 }
-
-
-std::vector<uint8_t> UnsignedLEB128From(uint32_t result) {
-  std::vector<uint8_t> output;
-  uint8_t next = 0;
-  int shift = 0;
-  do {
-    next = static_cast<uint8_t>(result >> shift);
-    if (((result >> shift) & 0xFFFFFF80) != 0) {
-      next = next | 0x80;
-    }
-    output.push_back(next);
-    shift += 7;
-  } while ((next & 0x80) != 0);
-  return output;
-}
 }  // namespace wasm
 }  // namespace internal
 }  // namespace v8
diff --git a/src/wasm/encoder.h b/src/wasm/encoder.h
index 49a7bf7..0f2118d 100644
--- a/src/wasm/encoder.h
+++ b/src/wasm/encoder.h
@@ -10,6 +10,7 @@
 
 #include "src/base/smart-pointers.h"
 
+#include "src/wasm/wasm-macro-gen.h"
 #include "src/wasm/wasm-module.h"
 #include "src/wasm/wasm-opcodes.h"
 #include "src/wasm/wasm-result.h"
@@ -28,55 +29,42 @@
   void Serialize(byte* buffer, byte** header, byte** body) const;
 
  private:
-  WasmFunctionEncoder(Zone* zone, LocalType return_type, bool exported,
-                      bool external);
+  WasmFunctionEncoder(Zone* zone, LocalDeclEncoder locals, bool exported);
   friend class WasmFunctionBuilder;
-  uint16_t signature_index_;
-  ZoneVector<LocalType> params_;
-  uint16_t local_i32_count_;
-  uint16_t local_i64_count_;
-  uint16_t local_f32_count_;
-  uint16_t local_f64_count_;
+  uint32_t signature_index_;
+  LocalDeclEncoder locals_;
   bool exported_;
-  bool external_;
   ZoneVector<uint8_t> body_;
   ZoneVector<char> name_;
 
-  bool HasName() const { return (exported_ || external_) && name_.size() > 0; }
+  bool HasName() const { return exported_ && name_.size() > 0; }
 };
 
 class WasmFunctionBuilder : public ZoneObject {
  public:
-  uint16_t AddParam(LocalType type);
-  uint16_t AddLocal(LocalType type);
-  void ReturnType(LocalType type);
+  void SetSignature(FunctionSig* sig);
+  uint32_t AddLocal(LocalType type);
+  void EmitVarInt(uint32_t val);
   void EmitCode(const byte* code, uint32_t code_size);
-  void EmitCode(const byte* code, uint32_t code_size,
-                const uint32_t* local_indices, uint32_t indices_size);
   void Emit(WasmOpcode opcode);
+  void EmitGetLocal(uint32_t index);
+  void EmitSetLocal(uint32_t index);
+  void EmitI32Const(int32_t val);
   void EmitWithU8(WasmOpcode opcode, const byte immediate);
   void EmitWithU8U8(WasmOpcode opcode, const byte imm1, const byte imm2);
   void EmitWithVarInt(WasmOpcode opcode, uint32_t immediate);
-  uint32_t EmitEditableVarIntImmediate();
-  void EditVarIntImmediate(uint32_t offset, const uint32_t immediate);
   void Exported(uint8_t flag);
-  void External(uint8_t flag);
-  void SetName(const unsigned char* name, int name_length);
+  void SetName(const char* name, int name_length);
   WasmFunctionEncoder* Build(Zone* zone, WasmModuleBuilder* mb) const;
 
  private:
   explicit WasmFunctionBuilder(Zone* zone);
   friend class WasmModuleBuilder;
-  LocalType return_type_;
-  struct Type;
-  ZoneVector<Type> locals_;
+  LocalDeclEncoder locals_;
   uint8_t exported_;
-  uint8_t external_;
   ZoneVector<uint8_t> body_;
-  ZoneVector<uint32_t> local_indices_;
   ZoneVector<char> name_;
-  uint16_t AddVar(LocalType type, bool param);
-  void IndexVars(WasmFunctionEncoder* e, uint16_t* var_index) const;
+  void IndexVars(WasmFunctionEncoder* e, uint32_t* var_index) const;
 };
 
 class WasmDataSegmentEncoder : public ZoneObject {
@@ -105,6 +93,12 @@
   const byte* end_;
 };
 
+struct WasmFunctionImport {
+  uint32_t sig_index;
+  const char* name;
+  int name_length;
+};
+
 class WasmModuleWriter : public ZoneObject {
  public:
   WasmModuleIndex* WriteTo(Zone* zone) const;
@@ -112,10 +106,11 @@
  private:
   friend class WasmModuleBuilder;
   explicit WasmModuleWriter(Zone* zone);
+  ZoneVector<WasmFunctionImport> imports_;
   ZoneVector<WasmFunctionEncoder*> functions_;
   ZoneVector<WasmDataSegmentEncoder*> data_segments_;
   ZoneVector<FunctionSig*> signatures_;
-  ZoneVector<uint16_t> indirect_functions_;
+  ZoneVector<uint32_t> indirect_functions_;
   ZoneVector<std::pair<MachineType, bool>> globals_;
   int start_function_index_;
 };
@@ -123,32 +118,33 @@
 class WasmModuleBuilder : public ZoneObject {
  public:
   explicit WasmModuleBuilder(Zone* zone);
-  uint16_t AddFunction();
+  uint32_t AddFunction();
   uint32_t AddGlobal(MachineType type, bool exported);
   WasmFunctionBuilder* FunctionAt(size_t index);
   void AddDataSegment(WasmDataSegmentEncoder* data);
-  uint16_t AddSignature(FunctionSig* sig);
-  void AddIndirectFunction(uint16_t index);
-  void MarkStartFunction(uint16_t index);
+  uint32_t AddSignature(FunctionSig* sig);
+  void AddIndirectFunction(uint32_t index);
+  void MarkStartFunction(uint32_t index);
+  uint32_t AddImport(const char* name, int name_length, FunctionSig* sig);
   WasmModuleWriter* Build(Zone* zone);
 
   struct CompareFunctionSigs {
     bool operator()(FunctionSig* a, FunctionSig* b) const;
   };
-  typedef ZoneMap<FunctionSig*, uint16_t, CompareFunctionSigs> SignatureMap;
+  typedef ZoneMap<FunctionSig*, uint32_t, CompareFunctionSigs> SignatureMap;
 
  private:
   Zone* zone_;
   ZoneVector<FunctionSig*> signatures_;
+  ZoneVector<WasmFunctionImport> imports_;
   ZoneVector<WasmFunctionBuilder*> functions_;
   ZoneVector<WasmDataSegmentEncoder*> data_segments_;
-  ZoneVector<uint16_t> indirect_functions_;
+  ZoneVector<uint32_t> indirect_functions_;
   ZoneVector<std::pair<MachineType, bool>> globals_;
   SignatureMap signature_map_;
   int start_function_index_;
 };
 
-std::vector<uint8_t> UnsignedLEB128From(uint32_t result);
 }  // namespace wasm
 }  // namespace internal
 }  // namespace v8
diff --git a/src/wasm/leb-helper.h b/src/wasm/leb-helper.h
new file mode 100644
index 0000000..7ba244d
--- /dev/null
+++ b/src/wasm/leb-helper.h
@@ -0,0 +1,131 @@
+// Copyright 2016 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_WASM_LEB_HELPER_H_
+#define V8_WASM_LEB_HELPER_H_
+
+namespace v8 {
+namespace internal {
+namespace wasm {
+
+class LEBHelper {
+ public:
+  // Write a 32-bit unsigned LEB to {dest}, updating {dest} to point after
+  // the last uint8_t written. No safety checks.
+  static void write_u32v(uint8_t** dest, uint32_t val) {
+    while (val >= 0x80) {
+      *((*dest)++) = static_cast<uint8_t>(0x80 | (val & 0x7F));
+      val >>= 7;
+    }
+    *((*dest)++) = static_cast<uint8_t>(val & 0x7F);
+  }
+
+  // Write a 32-bit signed LEB to {dest}, updating {dest} to point after
+  // the last uint8_t written. No safety checks.
+  static void write_i32v(uint8_t** dest, int32_t val) {
+    if (val >= 0) {
+      while (val >= 0x40) {  // prevent sign extension.
+        *((*dest)++) = static_cast<uint8_t>(0x80 | (val & 0x7F));
+        val >>= 7;
+      }
+      *((*dest)++) = static_cast<uint8_t>(val & 0xFF);
+    } else {
+      while ((val >> 6) != -1) {
+        *((*dest)++) = static_cast<uint8_t>(0x80 | (val & 0x7F));
+        val >>= 7;
+      }
+      *((*dest)++) = static_cast<uint8_t>(val & 0x7F);
+    }
+  }
+
+  // Write a 64-bit unsigned LEB to {dest}, updating {dest} to point after
+  // the last uint8_t written. No safety checks.
+  static void write_u64v(uint8_t** dest, uint64_t val) {
+    while (val >= 0x80) {
+      *((*dest)++) = static_cast<uint8_t>(0x80 | (val & 0x7F));
+      val >>= 7;
+    }
+    *((*dest)++) = static_cast<uint8_t>(val & 0x7F);
+  }
+
+  // Write a 64-bit signed LEB to {dest}, updating {dest} to point after
+  // the last uint8_t written. No safety checks.
+  static void write_i64v(uint8_t** dest, int64_t val) {
+    if (val >= 0) {
+      while (val >= 0x40) {  // prevent sign extension.
+        *((*dest)++) = static_cast<uint8_t>(0x80 | (val & 0x7F));
+        val >>= 7;
+      }
+      *((*dest)++) = static_cast<uint8_t>(val & 0xFF);
+    } else {
+      while ((val >> 6) != -1) {
+        *((*dest)++) = static_cast<uint8_t>(0x80 | (val & 0x7F));
+        val >>= 7;
+      }
+      *((*dest)++) = static_cast<uint8_t>(val & 0x7F);
+    }
+  }
+
+  // TODO(titzer): move core logic for decoding LEBs from decoder.h to here.
+
+  // Compute the size of {val} if emitted as an LEB32.
+  static inline size_t sizeof_u32v(size_t val) {
+    size_t size = 0;
+    do {
+      size++;
+      val = val >> 7;
+    } while (val > 0);
+    return size;
+  }
+
+  // Compute the size of {val} if emitted as an LEB32.
+  static inline size_t sizeof_i32v(int32_t val) {
+    size_t size = 1;
+    if (val >= 0) {
+      while (val >= 0x40) {  // prevent sign extension.
+        size++;
+        val >>= 7;
+      }
+    } else {
+      while ((val >> 6) != -1) {
+        size++;
+        val >>= 7;
+      }
+    }
+    return size;
+  }
+
+  // Compute the size of {val} if emitted as an unsigned LEB64.
+  static inline size_t sizeof_u64v(uint64_t val) {
+    size_t size = 0;
+    do {
+      size++;
+      val = val >> 7;
+    } while (val > 0);
+    return size;
+  }
+
+  // Compute the size of {val} if emitted as a signed LEB64.
+  static inline size_t sizeof_i64v(int64_t val) {
+    size_t size = 1;
+    if (val >= 0) {
+      while (val >= 0x40) {  // prevent sign extension.
+        size++;
+        val >>= 7;
+      }
+    } else {
+      while ((val >> 6) != -1) {
+        size++;
+        val >>= 7;
+      }
+    }
+    return size;
+  }
+};
+
+}  // namespace wasm
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_WASM_LEB_HELPER_H_
diff --git a/src/wasm/module-decoder.cc b/src/wasm/module-decoder.cc
index 3e85a1b..f7d26a5 100644
--- a/src/wasm/module-decoder.cc
+++ b/src/wasm/module-decoder.cc
@@ -25,7 +25,6 @@
 #define TRACE(...)
 #endif
 
-
 // The main logic for decoding the bytes of a module.
 class ModuleDecoder : public Decoder {
  public:
@@ -79,9 +78,8 @@
     module->mem_external = false;
     module->origin = origin_;
 
-    bool sections[(size_t)WasmSection::Code::Max] = {false};
-
     const byte* pos = pc_;
+    int current_order = 0;
     uint32_t magic_word = consume_u32("wasm magic");
 #define BYTES(x) (x & 0xff), (x >> 8) & 0xff, (x >> 16) & 0xff, (x >> 24) & 0xff
     if (magic_word != kWasmMagic) {
@@ -109,45 +107,45 @@
       TRACE("DecodeSection\n");
       pos = pc_;
 
-      int length;
-      uint32_t section_length = consume_u32v(&length, "section size");
-
-      int section_string_leb_length = 0;
-      uint32_t section_string_length = 0;
-      WasmSection::Code section = consume_section_name(
-          &section_string_leb_length, &section_string_length);
-      uint32_t string_and_leb_length =
-          section_string_leb_length + section_string_length;
-      if (string_and_leb_length > section_length) {
-        error(pos, pos,
-              "section string of size %u longer than total section bytes %u",
-              string_and_leb_length, section_length);
+      // Read the section name.
+      int string_leb_length = 0;
+      uint32_t string_length =
+          consume_u32v(&string_leb_length, "section name length");
+      const byte* section_name_start = pc_;
+      consume_bytes(string_length);
+      if (failed()) {
+        TRACE("Section name of length %u couldn't be read\n", string_length);
         break;
       }
 
-      if (section == WasmSection::Code::Max) {
-        // Skip unknown section.
-        uint32_t skip = section_length - string_and_leb_length;
-        TRACE("skipping %u bytes from unknown section\n", skip);
-        consume_bytes(skip);
-        continue;
-      }
+      WasmSection::Code section =
+          WasmSection::lookup(section_name_start, string_length);
 
-      // Each section should appear at most once.
-      CheckForPreviousSection(sections, section, false);
-      sections[(size_t)section] = true;
+      // Read and check the section size.
+      int section_leb_length = 0;
+      uint32_t section_length =
+          consume_u32v(&section_leb_length, "section length");
+      if (!checkAvailable(section_length)) {
+        // The section would extend beyond the end of the module.
+        break;
+      }
+      const byte* section_start = pc_;
+      const byte* expected_section_end = pc_ + section_length;
+
+      current_order = CheckSectionOrder(current_order, section);
 
       switch (section) {
         case WasmSection::Code::End:
           // Terminate section decoding.
           limit_ = pc_;
           break;
-        case WasmSection::Code::Memory:
+        case WasmSection::Code::Memory: {
           int length;
           module->min_mem_pages = consume_u32v(&length, "min memory");
           module->max_mem_pages = consume_u32v(&length, "max memory");
           module->mem_export = consume_u8("export memory") != 0;
           break;
+        }
         case WasmSection::Code::Signatures: {
           int length;
           uint32_t signatures_count = consume_u32v(&length, "signatures count");
@@ -157,30 +155,30 @@
             if (failed()) break;
             TRACE("DecodeSignature[%d] module+%d\n", i,
                   static_cast<int>(pc_ - start_));
-            FunctionSig* s = consume_sig();  // read function sig.
+            FunctionSig* s = consume_sig();
             module->signatures.push_back(s);
           }
           break;
         }
         case WasmSection::Code::FunctionSignatures: {
-          // Functions require a signature table first.
-          CheckForPreviousSection(sections, WasmSection::Code::Signatures,
-                                  true);
           int length;
           uint32_t functions_count = consume_u32v(&length, "functions count");
           module->functions.reserve(SafeReserve(functions_count));
           for (uint32_t i = 0; i < functions_count; i++) {
-            module->functions.push_back(
-                {nullptr, i, 0, 0, 0, 0, 0, 0, false, false});
+            module->functions.push_back({nullptr,  // sig
+                                         i,        // func_index
+                                         0,        // sig_index
+                                         0,        // name_offset
+                                         0,        // name_length
+                                         0,        // code_start_offset
+                                         0,        // code_end_offset
+                                         false});  // exported
             WasmFunction* function = &module->functions.back();
             function->sig_index = consume_sig_index(module, &function->sig);
           }
           break;
         }
         case WasmSection::Code::FunctionBodies: {
-          // Function bodies should follow signatures.
-          CheckForPreviousSection(sections,
-                                  WasmSection::Code::FunctionSignatures, true);
           int length;
           const byte* pos = pc_;
           uint32_t functions_count = consume_u32v(&length, "functions count");
@@ -206,10 +204,7 @@
           }
           break;
         }
-        case WasmSection::Code::Functions: {
-          // Functions require a signature table first.
-          CheckForPreviousSection(sections, WasmSection::Code::Signatures,
-                                  true);
+        case WasmSection::Code::OldFunctions: {
           int length;
           uint32_t functions_count = consume_u32v(&length, "functions count");
           module->functions.reserve(SafeReserve(functions_count));
@@ -224,8 +219,14 @@
             TRACE("DecodeFunction[%d] module+%d\n", i,
                   static_cast<int>(pc_ - start_));
 
-            module->functions.push_back(
-                {nullptr, i, 0, 0, 0, 0, 0, 0, false, false});
+            module->functions.push_back({nullptr,  // sig
+                                         i,        // func_index
+                                         0,        // sig_index
+                                         0,        // name_offset
+                                         0,        // name_length
+                                         0,        // code_start_offset
+                                         0,        // code_end_offset
+                                         false});  // exported
             WasmFunction* function = &module->functions.back();
             DecodeFunctionInModule(module, function, false);
           }
@@ -233,19 +234,15 @@
             for (uint32_t i = 0; i < functions_count; i++) {
               if (failed()) break;
               WasmFunction* function = &module->functions[i];
-              if (!function->external) {
-                VerifyFunctionBody(i, &menv, function);
-                if (result_.failed())
-                  error(result_.error_pc, result_.error_msg.get());
+              VerifyFunctionBody(i, &menv, function);
+              if (result_.failed()) {
+                error(result_.error_pc, result_.error_msg.get());
               }
             }
           }
           break;
         }
         case WasmSection::Code::Names: {
-          // Names correspond to functions.
-          CheckForPreviousSection(sections,
-                                  WasmSection::Code::FunctionSignatures, true);
           int length;
           const byte* pos = pc_;
           uint32_t functions_count = consume_u32v(&length, "functions count");
@@ -259,13 +256,13 @@
           for (uint32_t i = 0; i < functions_count; i++) {
             WasmFunction* function = &module->functions[i];
             function->name_offset =
-                consume_string(&function->name_length, "function name");
+                consume_string(&function->name_length, false);
 
             uint32_t local_names_count =
                 consume_u32v(&length, "local names count");
             for (uint32_t j = 0; j < local_names_count; j++) {
               uint32_t unused = 0;
-              uint32_t offset = consume_string(&unused, "local name");
+              uint32_t offset = consume_string(&unused, false);
               USE(unused);
               USE(offset);
             }
@@ -297,7 +294,10 @@
             if (failed()) break;
             TRACE("DecodeDataSegment[%d] module+%d\n", i,
                   static_cast<int>(pc_ - start_));
-            module->data_segments.push_back({0, 0, 0});
+            module->data_segments.push_back({0,        // dest_addr
+                                             0,        // source_offset
+                                             0,        // source_size
+                                             false});  // init
             WasmDataSegment* segment = &module->data_segments.back();
             DecodeDataSegmentInModule(module, segment);
           }
@@ -341,9 +341,6 @@
           break;
         }
         case WasmSection::Code::ImportTable: {
-          // Declares an import table.
-          CheckForPreviousSection(sections, WasmSection::Code::Signatures,
-                                  true);
           int length;
           uint32_t import_table_count =
               consume_u32v(&length, "import table count");
@@ -354,18 +351,23 @@
             TRACE("DecodeImportTable[%d] module+%d\n", i,
                   static_cast<int>(pc_ - start_));
 
-            module->import_table.push_back({nullptr, 0, 0});
+            module->import_table.push_back({nullptr,  // sig
+                                            0,        // sig_index
+                                            0,        // module_name_offset
+                                            0,        // module_name_length
+                                            0,        // function_name_offset
+                                            0});      // function_name_length
             WasmImport* import = &module->import_table.back();
 
             import->sig_index = consume_sig_index(module, &import->sig);
             const byte* pos = pc_;
-            import->module_name_offset = consume_string(
-                &import->module_name_length, "import module name");
+            import->module_name_offset =
+                consume_string(&import->module_name_length, true);
             if (import->module_name_length == 0) {
               error(pos, "import module name cannot be NULL");
             }
-            import->function_name_offset = consume_string(
-                &import->function_name_length, "import function name");
+            import->function_name_offset =
+                consume_string(&import->function_name_length, true);
           }
           break;
         }
@@ -382,17 +384,37 @@
             TRACE("DecodeExportTable[%d] module+%d\n", i,
                   static_cast<int>(pc_ - start_));
 
-            module->export_table.push_back({0, 0});
+            module->export_table.push_back({0,    // func_index
+                                            0,    // name_offset
+                                            0});  // name_length
             WasmExport* exp = &module->export_table.back();
 
             WasmFunction* func;
             exp->func_index = consume_func_index(module, &func);
-            exp->name_offset = consume_string(&exp->name_length, "export name");
+            exp->name_offset = consume_string(&exp->name_length, true);
           }
           break;
         }
         case WasmSection::Code::Max:
-          UNREACHABLE();  // Already skipped unknown sections.
+          // Skip unknown sections.
+          TRACE("Unknown section: '");
+          for (uint32_t i = 0; i != string_length; ++i) {
+            TRACE("%c", *(section_name_start + i));
+          }
+          TRACE("'\n");
+          consume_bytes(section_length);
+          break;
+      }
+
+      if (pc_ != expected_section_end) {
+        const char* diff = pc_ < expected_section_end ? "shorter" : "longer";
+        size_t expected_length = static_cast<size_t>(section_length);
+        size_t actual_length = static_cast<size_t>(pc_ - section_start);
+        error(pc_, pc_,
+              "section \"%s\" %s (%zu bytes) than specified (%zu bytes)",
+              WasmSection::getName(section), diff, actual_length,
+              expected_length);
+        break;
       }
     }
 
@@ -417,17 +439,18 @@
     }
   }
 
-  void CheckForPreviousSection(bool* sections, WasmSection::Code section,
-                               bool present) {
-    if (section >= WasmSection::Code::Max) return;
-    if (sections[(size_t)section] == present) return;
-    if (present) {
-      error(pc_ - 1, nullptr, "required %s section missing",
-            WasmSection::getName(section));
-    } else {
-      error(pc_ - 1, nullptr, "%s section already present",
+  int CheckSectionOrder(int current_order, WasmSection::Code section) {
+    int next_order = WasmSection::getOrder(section);
+    if (next_order == 0) return current_order;
+    if (next_order == current_order) {
+      error(pc_, pc_, "section \"%s\" already defined",
             WasmSection::getName(section));
     }
+    if (next_order < current_order) {
+      error(pc_, pc_, "section \"%s\" out of order",
+            WasmSection::getName(section));
+    }
+    return next_order;
   }
 
   // Decodes a single anonymous function starting at {start_}.
@@ -440,7 +463,6 @@
     function->code_start_offset = off(pc_);   // ---- code start
     function->code_end_offset = off(limit_);  // ---- code end
     function->exported = false;               // ---- exported
-    function->external = false;               // ---- external
 
     if (ok()) VerifyFunctionBody(0, module_env, function);
 
@@ -466,7 +488,9 @@
 
   // Decodes a single global entry inside a module starting at {pc_}.
   void DecodeGlobalInModule(WasmGlobal* global) {
-    global->name_offset = consume_string(&global->name_length, "global name");
+    global->name_offset = consume_string(&global->name_length, false);
+    DCHECK(unibrow::Utf8::Validate(start_ + global->name_offset,
+                                   global->name_length));
     global->type = mem_type();
     global->offset = 0;
     global->exported = consume_u8("exported") != 0;
@@ -487,32 +511,15 @@
       function->sig = module->signatures[function->sig_index];
     }
 
-    TRACE("  +%d  <function attributes:%s%s%s%s%s>\n",
-          static_cast<int>(pc_ - start_),
+    TRACE("  +%d  <function attributes:%s%s>\n", static_cast<int>(pc_ - start_),
           decl_bits & kDeclFunctionName ? " name" : "",
-          decl_bits & kDeclFunctionImport ? " imported" : "",
-          decl_bits & kDeclFunctionLocals ? " locals" : "",
-          decl_bits & kDeclFunctionExport ? " exported" : "",
-          (decl_bits & kDeclFunctionImport) == 0 ? " body" : "");
-
-    if (decl_bits & kDeclFunctionName) {
-      function->name_offset =
-          consume_string(&function->name_length, "function name");
-    }
+          decl_bits & kDeclFunctionExport ? " exported" : "");
 
     function->exported = decl_bits & kDeclFunctionExport;
 
-    // Imported functions have no locals or body.
-    if (decl_bits & kDeclFunctionImport) {
-      function->external = true;
-      return;
-    }
-
-    if (decl_bits & kDeclFunctionLocals) {
-      function->local_i32_count = consume_u16("i32 count");
-      function->local_i64_count = consume_u16("i64 count");
-      function->local_f32_count = consume_u16("f32 count");
-      function->local_f64_count = consume_u16("f64 count");
+    if (decl_bits & kDeclFunctionName) {
+      function->name_offset =
+          consume_string(&function->name_length, function->exported);
     }
 
     uint16_t size = consume_u16("body size");
@@ -566,11 +573,10 @@
   // Verifies the body (code) of a given function.
   void VerifyFunctionBody(uint32_t func_num, ModuleEnv* menv,
                           WasmFunction* function) {
-    if (FLAG_trace_wasm_decode_time) {
+    if (FLAG_trace_wasm_decoder || FLAG_trace_wasm_decode_time) {
       OFStream os(stdout);
       os << "Verifying WASM function " << WasmFunctionName(function, menv)
          << std::endl;
-      os << std::endl;
     }
     FunctionBody body = {menv, function->sig, start_,
                          start_ + function->code_start_offset,
@@ -606,11 +612,14 @@
 
   // Reads a length-prefixed string, checking that it is within bounds. Returns
   // the offset of the string, and the length as an out parameter.
-  uint32_t consume_string(uint32_t* length, const char* name = nullptr) {
+  uint32_t consume_string(uint32_t* length, bool validate_utf8) {
     int varint_length;
     *length = consume_u32v(&varint_length, "string length");
     uint32_t offset = pc_offset();
     TRACE("  +%u  %-20s: (%u bytes)\n", offset, "string", *length);
+    if (validate_utf8 && !unibrow::Utf8::Validate(pc_, *length)) {
+      error(pc_, "no valid UTF-8 string");
+    }
     consume_bytes(*length);
     return offset;
   }
@@ -643,30 +652,6 @@
     return func_index;
   }
 
-  // Reads a section name.
-  WasmSection::Code consume_section_name(int* string_leb_length,
-                                         uint32_t* string_length) {
-    *string_length = consume_u32v(string_leb_length, "name length");
-    const byte* start = pc_;
-    consume_bytes(*string_length);
-    if (failed()) {
-      TRACE("Section name of length %u couldn't be read\n", *string_length);
-      return WasmSection::Code::Max;
-    }
-    // TODO(jfb) Linear search, it may be better to do a common-prefix search.
-    for (WasmSection::Code i = WasmSection::begin(); i != WasmSection::end();
-         i = WasmSection::next(i)) {
-      if (WasmSection::getNameLength(i) == *string_length &&
-          0 == memcmp(WasmSection::getName(i), start, *string_length)) {
-        return i;
-      }
-    }
-    TRACE("Unknown section: '");
-    for (uint32_t i = 0; i != *string_length; ++i) TRACE("%c", *(start + i));
-    TRACE("'\n");
-    return WasmSection::Code::Max;
-  }
-
   // Reads a single 8-bit integer, interpreting it as a local type.
   LocalType consume_local_type() {
     byte val = consume_u8("local type");
@@ -719,24 +704,51 @@
     }
   }
 
-  // Parses an inline function signature.
+  // Parses a type entry, which is currently limited to functions only.
   FunctionSig* consume_sig() {
+    const byte* pos = pc_;
+    byte form = consume_u8("type form");
+    if (form != kWasmFunctionTypeForm) {
+      error(pos, pos, "expected function type form (0x%02x), got: 0x%02x",
+            kWasmFunctionTypeForm, form);
+      return nullptr;
+    }
     int length;
-    byte count = consume_u32v(&length, "param count");
-    LocalType ret = consume_local_type();
-    FunctionSig::Builder builder(module_zone, ret == kAstStmt ? 0 : 1, count);
-    if (ret != kAstStmt) builder.AddReturn(ret);
-
-    for (int i = 0; i < count; i++) {
+    // parse parameter types
+    uint32_t param_count = consume_u32v(&length, "param count");
+    std::vector<LocalType> params;
+    for (uint32_t i = 0; i < param_count; i++) {
       LocalType param = consume_local_type();
       if (param == kAstStmt) error(pc_ - 1, "invalid void parameter type");
-      builder.AddParam(param);
+      params.push_back(param);
     }
-    return builder.Build();
+
+    // parse return types
+    const byte* pt = pc_;
+    uint32_t return_count = consume_u32v(&length, "return count");
+    if (return_count > kMaxReturnCount) {
+      error(pt, pt, "return count of %u exceeds maximum of %u", return_count,
+            kMaxReturnCount);
+      return nullptr;
+    }
+    std::vector<LocalType> returns;
+    for (uint32_t i = 0; i < return_count; i++) {
+      LocalType ret = consume_local_type();
+      if (ret == kAstStmt) error(pc_ - 1, "invalid void return type");
+      returns.push_back(ret);
+    }
+
+    // FunctionSig stores the return types first.
+    LocalType* buffer =
+        module_zone->NewArray<LocalType>(param_count + return_count);
+    uint32_t b = 0;
+    for (uint32_t i = 0; i < return_count; i++) buffer[b++] = returns[i];
+    for (uint32_t i = 0; i < param_count; i++) buffer[b++] = params[i];
+
+    return new (module_zone) FunctionSig(return_count, param_count, buffer);
   }
 };
 
-
 // Helpers for nice error messages.
 class ModuleError : public ModuleResult {
  public:
@@ -750,7 +762,6 @@
   }
 };
 
-
 // Helpers for nice error messages.
 class FunctionError : public FunctionResult {
  public:
@@ -767,30 +778,42 @@
 ModuleResult DecodeWasmModule(Isolate* isolate, Zone* zone,
                               const byte* module_start, const byte* module_end,
                               bool verify_functions, ModuleOrigin origin) {
+  size_t decode_memory_start = zone->allocation_size();
+  HistogramTimerScope wasm_decode_module_time_scope(
+      isolate->counters()->wasm_decode_module_time());
   size_t size = module_end - module_start;
   if (module_start > module_end) return ModuleError("start > end");
   if (size >= kMaxModuleSize) return ModuleError("size > maximum module size");
+  // TODO(bradnelson): Improve histogram handling of size_t.
+  isolate->counters()->wasm_module_size_bytes()->AddSample(
+      static_cast<int>(size));
   WasmModule* module = new WasmModule();
   ModuleDecoder decoder(zone, module_start, module_end, origin);
-  return decoder.DecodeModule(module, verify_functions);
+  ModuleResult result = decoder.DecodeModule(module, verify_functions);
+  // TODO(bradnelson): Improve histogram handling of size_t.
+  isolate->counters()->wasm_decode_module_peak_memory_bytes()->AddSample(
+      static_cast<int>(zone->allocation_size() - decode_memory_start));
+  return result;
 }
 
-
 FunctionSig* DecodeWasmSignatureForTesting(Zone* zone, const byte* start,
                                            const byte* end) {
   ModuleDecoder decoder(zone, start, end, kWasmOrigin);
   return decoder.DecodeFunctionSignature(start);
 }
 
-
 FunctionResult DecodeWasmFunction(Isolate* isolate, Zone* zone,
                                   ModuleEnv* module_env,
                                   const byte* function_start,
                                   const byte* function_end) {
+  HistogramTimerScope wasm_decode_function_time_scope(
+      isolate->counters()->wasm_decode_function_time());
   size_t size = function_end - function_start;
   if (function_start > function_end) return FunctionError("start > end");
   if (size > kMaxFunctionSize)
     return FunctionError("size > maximum function size");
+  isolate->counters()->wasm_function_size_bytes()->AddSample(
+      static_cast<int>(size));
   WasmFunction* function = new WasmFunction();
   ModuleDecoder decoder(zone, function_start, function_end, kWasmOrigin);
   return decoder.DecodeSingleFunction(module_env, function);
diff --git a/src/wasm/switch-logic.cc b/src/wasm/switch-logic.cc
new file mode 100644
index 0000000..f8e3f0d
--- /dev/null
+++ b/src/wasm/switch-logic.cc
@@ -0,0 +1,63 @@
+// Copyright 2016 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/wasm/switch-logic.h"
+
+namespace v8 {
+namespace internal {
+namespace wasm {
+
+namespace {
+CaseNode* CreateBst(ZoneVector<CaseNode*>* nodes, size_t begin, size_t end) {
+  if (end < begin) {
+    return nullptr;
+  } else if (end == begin) {
+    return nodes->at(begin);
+  } else {
+    size_t root_index = (begin + end) / 2;
+    CaseNode* root = nodes->at(root_index);
+    if (root_index != 0) {
+      root->left = CreateBst(nodes, begin, root_index - 1);
+    }
+    root->right = CreateBst(nodes, root_index + 1, end);
+    return root;
+  }
+}
+}  // namespace
+
+CaseNode* OrderCases(ZoneVector<int>* cases, Zone* zone) {
+  const int max_distance = 2;
+  const int min_size = 4;
+  if (cases->empty()) {
+    return nullptr;
+  }
+  std::sort(cases->begin(), cases->end());
+  ZoneVector<size_t> table_breaks(zone);
+  for (size_t i = 1; i < cases->size(); i++) {
+    if (cases->at(i) - cases->at(i - 1) > max_distance) {
+      table_breaks.push_back(i);
+    }
+  }
+  table_breaks.push_back(cases->size());
+  ZoneVector<CaseNode*> nodes(zone);
+  size_t curr_pos = 0;
+  for (size_t i = 0; i < table_breaks.size(); i++) {
+    size_t break_pos = table_breaks[i];
+    if (break_pos - curr_pos >= min_size) {
+      int begin = cases->at(curr_pos);
+      int end = cases->at(break_pos - 1);
+      nodes.push_back(new (zone) CaseNode(begin, end));
+      curr_pos = break_pos;
+    } else {
+      for (; curr_pos < break_pos; curr_pos++) {
+        nodes.push_back(new (zone)
+                            CaseNode(cases->at(curr_pos), cases->at(curr_pos)));
+      }
+    }
+  }
+  return CreateBst(&nodes, 0, nodes.size() - 1);
+}
+}  // namespace wasm
+}  // namespace internal
+}  // namespace v8
diff --git a/src/wasm/switch-logic.h b/src/wasm/switch-logic.h
new file mode 100644
index 0000000..8cef08b
--- /dev/null
+++ b/src/wasm/switch-logic.h
@@ -0,0 +1,31 @@
+// Copyright 2016 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_WASM_SWITCH_LOGIC_H
+#define V8_WASM_SWITCH_LOGIC_H
+
+#include "src/zone-containers.h"
+#include "src/zone.h"
+
+namespace v8 {
+namespace internal {
+namespace wasm {
+
+struct CaseNode : public ZoneObject {
+  const int begin;
+  const int end;
+  CaseNode* left;
+  CaseNode* right;
+  CaseNode(int begin, int end) : begin(begin), end(end) {
+    left = nullptr;
+    right = nullptr;
+  }
+};
+
+CaseNode* OrderCases(ZoneVector<int>* cases, Zone* zone);
+}  // namespace wasm
+}  // namespace internal
+}  // namespace v8
+
+#endif
diff --git a/src/wasm/wasm-external-refs.cc b/src/wasm/wasm-external-refs.cc
new file mode 100644
index 0000000..e155f3c
--- /dev/null
+++ b/src/wasm/wasm-external-refs.cc
@@ -0,0 +1,199 @@
+// Copyright 2016 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <math.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <limits>
+
+#include "include/v8config.h"
+
+#include "src/base/bits.h"
+#include "src/wasm/wasm-external-refs.h"
+
+namespace v8 {
+namespace internal {
+namespace wasm {
+
+void f32_trunc_wrapper(float* param) { *param = truncf(*param); }
+
+void f32_floor_wrapper(float* param) { *param = floorf(*param); }
+
+void f32_ceil_wrapper(float* param) { *param = ceilf(*param); }
+
+void f32_nearest_int_wrapper(float* param) { *param = nearbyintf(*param); }
+
+void f64_trunc_wrapper(double* param) { *param = trunc(*param); }
+
+void f64_floor_wrapper(double* param) { *param = floor(*param); }
+
+void f64_ceil_wrapper(double* param) { *param = ceil(*param); }
+
+void f64_nearest_int_wrapper(double* param) { *param = nearbyint(*param); }
+
+void int64_to_float32_wrapper(int64_t* input, float* output) {
+  *output = static_cast<float>(*input);
+}
+
+void uint64_to_float32_wrapper(uint64_t* input, float* output) {
+#if V8_CC_MSVC
+  // With MSVC we use static_cast<float>(uint32_t) instead of
+  // static_cast<float>(uint64_t) to achieve round-to-nearest-ties-even
+  // semantics. The idea is to calculate
+  // static_cast<float>(high_word) * 2^32 + static_cast<float>(low_word). To
+  // achieve proper rounding in all cases we have to adjust the high_word
+  // with a "rounding bit" sometimes. The rounding bit is stored in the LSB of
+  // the high_word if the low_word may affect the rounding of the high_word.
+  uint32_t low_word = static_cast<uint32_t>(*input & 0xffffffff);
+  uint32_t high_word = static_cast<uint32_t>(*input >> 32);
+
+  float shift = static_cast<float>(1ull << 32);
+  // If the MSB of the high_word is set, then we make space for a rounding bit.
+  if (high_word < 0x80000000) {
+    high_word <<= 1;
+    shift = static_cast<float>(1ull << 31);
+  }
+
+  if ((high_word & 0xfe000000) && low_word) {
+    // Set the rounding bit.
+    high_word |= 1;
+  }
+
+  float result = static_cast<float>(high_word);
+  result *= shift;
+  result += static_cast<float>(low_word);
+  *output = result;
+
+#else
+  *output = static_cast<float>(*input);
+#endif
+}
+
+void int64_to_float64_wrapper(int64_t* input, double* output) {
+  *output = static_cast<double>(*input);
+}
+
+void uint64_to_float64_wrapper(uint64_t* input, double* output) {
+#if V8_CC_MSVC
+  // With MSVC we use static_cast<double>(uint32_t) instead of
+  // static_cast<double>(uint64_t) to achieve round-to-nearest-ties-even
+  // semantics. The idea is to calculate
+  // static_cast<double>(high_word) * 2^32 + static_cast<double>(low_word).
+  uint32_t low_word = static_cast<uint32_t>(*input & 0xffffffff);
+  uint32_t high_word = static_cast<uint32_t>(*input >> 32);
+
+  double shift = static_cast<double>(1ull << 32);
+
+  double result = static_cast<double>(high_word);
+  result *= shift;
+  result += static_cast<double>(low_word);
+  *output = result;
+
+#else
+  *output = static_cast<double>(*input);
+#endif
+}
+
+int32_t float32_to_int64_wrapper(float* input, int64_t* output) {
+  // We use "<" here to check the upper bound because of rounding problems: With
+  // "<=" some inputs would be considered within int64 range which are actually
+  // not within int64 range.
+  if (*input >= static_cast<float>(std::numeric_limits<int64_t>::min()) &&
+      *input < static_cast<float>(std::numeric_limits<int64_t>::max())) {
+    *output = static_cast<int64_t>(*input);
+    return 1;
+  }
+  return 0;
+}
+
+int32_t float32_to_uint64_wrapper(float* input, uint64_t* output) {
+  // We use "<" here to check the upper bound because of rounding problems: With
+  // "<=" some inputs would be considered within uint64 range which are actually
+  // not within uint64 range.
+  if (*input > -1.0 &&
+      *input < static_cast<float>(std::numeric_limits<uint64_t>::max())) {
+    *output = static_cast<uint64_t>(*input);
+    return 1;
+  }
+  return 0;
+}
+
+int32_t float64_to_int64_wrapper(double* input, int64_t* output) {
+  // We use "<" here to check the upper bound because of rounding problems: With
+  // "<=" some inputs would be considered within int64 range which are actually
+  // not within int64 range.
+  if (*input >= static_cast<double>(std::numeric_limits<int64_t>::min()) &&
+      *input < static_cast<double>(std::numeric_limits<int64_t>::max())) {
+    *output = static_cast<int64_t>(*input);
+    return 1;
+  }
+  return 0;
+}
+
+int32_t float64_to_uint64_wrapper(double* input, uint64_t* output) {
+  // We use "<" here to check the upper bound because of rounding problems: With
+  // "<=" some inputs would be considered within uint64 range which are actually
+  // not within uint64 range.
+  if (*input > -1.0 &&
+      *input < static_cast<double>(std::numeric_limits<uint64_t>::max())) {
+    *output = static_cast<uint64_t>(*input);
+    return 1;
+  }
+  return 0;
+}
+
+int32_t int64_div_wrapper(int64_t* dst, int64_t* src) {
+  if (*src == 0) {
+    return 0;
+  }
+  if (*src == -1 && *dst == std::numeric_limits<int64_t>::min()) {
+    return -1;
+  }
+  *dst /= *src;
+  return 1;
+}
+
+int32_t int64_mod_wrapper(int64_t* dst, int64_t* src) {
+  if (*src == 0) {
+    return 0;
+  }
+  *dst %= *src;
+  return 1;
+}
+
+int32_t uint64_div_wrapper(uint64_t* dst, uint64_t* src) {
+  if (*src == 0) {
+    return 0;
+  }
+  *dst /= *src;
+  return 1;
+}
+
+int32_t uint64_mod_wrapper(uint64_t* dst, uint64_t* src) {
+  if (*src == 0) {
+    return 0;
+  }
+  *dst %= *src;
+  return 1;
+}
+
+uint32_t word32_ctz_wrapper(uint32_t* input) {
+  return static_cast<uint32_t>(base::bits::CountTrailingZeros32(*input));
+}
+
+uint32_t word64_ctz_wrapper(uint64_t* input) {
+  return static_cast<uint32_t>(base::bits::CountTrailingZeros64(*input));
+}
+
+uint32_t word32_popcnt_wrapper(uint32_t* input) {
+  return static_cast<uint32_t>(base::bits::CountPopulation(*input));
+}
+
+uint32_t word64_popcnt_wrapper(uint64_t* input) {
+  return static_cast<uint32_t>(base::bits::CountPopulation(*input));
+}
+
+}  // namespace wasm
+}  // namespace internal
+}  // namespace v8
diff --git a/src/wasm/wasm-external-refs.h b/src/wasm/wasm-external-refs.h
index 4aa452b..ac938d6 100644
--- a/src/wasm/wasm-external-refs.h
+++ b/src/wasm/wasm-external-refs.h
@@ -2,6 +2,8 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
+#include <stdint.h>
+
 #ifndef WASM_EXTERNAL_REFS_H
 #define WASM_EXTERNAL_REFS_H
 
@@ -9,173 +11,54 @@
 namespace internal {
 namespace wasm {
 
-static void f32_trunc_wrapper(float* param) { *param = truncf(*param); }
+void f32_trunc_wrapper(float* param);
 
-static void f32_floor_wrapper(float* param) { *param = floorf(*param); }
+void f32_floor_wrapper(float* param);
 
-static void f32_ceil_wrapper(float* param) { *param = ceilf(*param); }
+void f32_ceil_wrapper(float* param);
 
-static void f32_nearest_int_wrapper(float* param) {
-  *param = nearbyintf(*param);
-}
+void f32_nearest_int_wrapper(float* param);
 
-static void f64_trunc_wrapper(double* param) { *param = trunc(*param); }
+void f64_trunc_wrapper(double* param);
 
-static void f64_floor_wrapper(double* param) { *param = floor(*param); }
+void f64_floor_wrapper(double* param);
 
-static void f64_ceil_wrapper(double* param) { *param = ceil(*param); }
+void f64_ceil_wrapper(double* param);
 
-static void f64_nearest_int_wrapper(double* param) {
-  *param = nearbyint(*param);
-}
+void f64_nearest_int_wrapper(double* param);
 
-static void int64_to_float32_wrapper(int64_t* input, float* output) {
-  *output = static_cast<float>(*input);
-}
+void int64_to_float32_wrapper(int64_t* input, float* output);
 
-static void uint64_to_float32_wrapper(uint64_t* input, float* output) {
-#if V8_CC_MSVC
-  // With MSVC we use static_cast<float>(uint32_t) instead of
-  // static_cast<float>(uint64_t) to achieve round-to-nearest-ties-even
-  // semantics. The idea is to calculate
-  // static_cast<float>(high_word) * 2^32 + static_cast<float>(low_word). To
-  // achieve proper rounding in all cases we have to adjust the high_word
-  // with a "rounding bit" sometimes. The rounding bit is stored in the LSB of
-  // the high_word if the low_word may affect the rounding of the high_word.
-  uint32_t low_word = static_cast<uint32_t>(*input & 0xffffffff);
-  uint32_t high_word = static_cast<uint32_t>(*input >> 32);
+void uint64_to_float32_wrapper(uint64_t* input, float* output);
 
-  float shift = static_cast<float>(1ull << 32);
-  // If the MSB of the high_word is set, then we make space for a rounding bit.
-  if (high_word < 0x80000000) {
-    high_word <<= 1;
-    shift = static_cast<float>(1ull << 31);
-  }
+void int64_to_float64_wrapper(int64_t* input, double* output);
 
-  if ((high_word & 0xfe000000) && low_word) {
-    // Set the rounding bit.
-    high_word |= 1;
-  }
+void uint64_to_float64_wrapper(uint64_t* input, double* output);
 
-  float result = static_cast<float>(high_word);
-  result *= shift;
-  result += static_cast<float>(low_word);
-  *output = result;
+int32_t float32_to_int64_wrapper(float* input, int64_t* output);
 
-#else
-  *output = static_cast<float>(*input);
-#endif
-}
+int32_t float32_to_uint64_wrapper(float* input, uint64_t* output);
 
-static void int64_to_float64_wrapper(int64_t* input, double* output) {
-  *output = static_cast<double>(*input);
-}
+int32_t float64_to_int64_wrapper(double* input, int64_t* output);
 
-static void uint64_to_float64_wrapper(uint64_t* input, double* output) {
-#if V8_CC_MSVC
-  // With MSVC we use static_cast<double>(uint32_t) instead of
-  // static_cast<double>(uint64_t) to achieve round-to-nearest-ties-even
-  // semantics. The idea is to calculate
-  // static_cast<double>(high_word) * 2^32 + static_cast<double>(low_word).
-  uint32_t low_word = static_cast<uint32_t>(*input & 0xffffffff);
-  uint32_t high_word = static_cast<uint32_t>(*input >> 32);
+int32_t float64_to_uint64_wrapper(double* input, uint64_t* output);
 
-  double shift = static_cast<double>(1ull << 32);
+int32_t int64_div_wrapper(int64_t* dst, int64_t* src);
 
-  double result = static_cast<double>(high_word);
-  result *= shift;
-  result += static_cast<double>(low_word);
-  *output = result;
+int32_t int64_mod_wrapper(int64_t* dst, int64_t* src);
 
-#else
-  *output = static_cast<double>(*input);
-#endif
-}
+int32_t uint64_div_wrapper(uint64_t* dst, uint64_t* src);
 
-static int32_t float32_to_int64_wrapper(float* input, int64_t* output) {
-  // We use "<" here to check the upper bound because of rounding problems: With
-  // "<=" some inputs would be considered within int64 range which are actually
-  // not within int64 range.
-  if (*input >= static_cast<float>(std::numeric_limits<int64_t>::min()) &&
-      *input < static_cast<float>(std::numeric_limits<int64_t>::max())) {
-    *output = static_cast<int64_t>(*input);
-    return 1;
-  }
-  return 0;
-}
+int32_t uint64_mod_wrapper(uint64_t* dst, uint64_t* src);
 
-static int32_t float32_to_uint64_wrapper(float* input, uint64_t* output) {
-  // We use "<" here to check the upper bound because of rounding problems: With
-  // "<=" some inputs would be considered within uint64 range which are actually
-  // not within uint64 range.
-  if (*input > -1.0 &&
-      *input < static_cast<float>(std::numeric_limits<uint64_t>::max())) {
-    *output = static_cast<uint64_t>(*input);
-    return 1;
-  }
-  return 0;
-}
+uint32_t word32_ctz_wrapper(uint32_t* input);
 
-static int32_t float64_to_int64_wrapper(double* input, int64_t* output) {
-  // We use "<" here to check the upper bound because of rounding problems: With
-  // "<=" some inputs would be considered within int64 range which are actually
-  // not within int64 range.
-  if (*input >= static_cast<double>(std::numeric_limits<int64_t>::min()) &&
-      *input < static_cast<double>(std::numeric_limits<int64_t>::max())) {
-    *output = static_cast<int64_t>(*input);
-    return 1;
-  }
-  return 0;
-}
+uint32_t word64_ctz_wrapper(uint64_t* input);
 
-static int32_t float64_to_uint64_wrapper(double* input, uint64_t* output) {
-  // We use "<" here to check the upper bound because of rounding problems: With
-  // "<=" some inputs would be considered within uint64 range which are actually
-  // not within uint64 range.
-  if (*input > -1.0 &&
-      *input < static_cast<double>(std::numeric_limits<uint64_t>::max())) {
-    *output = static_cast<uint64_t>(*input);
-    return 1;
-  }
-  return 0;
-}
+uint32_t word32_popcnt_wrapper(uint32_t* input);
 
-static int32_t int64_div_wrapper(int64_t* dst, int64_t* src) {
-  if (*src == 0) {
-    return 0;
-  }
-  if (*src == -1 && *dst == std::numeric_limits<int64_t>::min()) {
-    return -1;
-  }
-  *dst /= *src;
-  return 1;
-}
-
-static int32_t int64_mod_wrapper(int64_t* dst, int64_t* src) {
-  if (*src == 0) {
-    return 0;
-  }
-  *dst %= *src;
-  return 1;
-}
-
-static int32_t uint64_div_wrapper(uint64_t* dst, uint64_t* src) {
-  if (*src == 0) {
-    return 0;
-  }
-  *dst /= *src;
-  return 1;
-}
-
-static int32_t uint64_mod_wrapper(uint64_t* dst, uint64_t* src) {
-  if (*src == 0) {
-    return 0;
-  }
-  *dst %= *src;
-  return 1;
-}
+uint32_t word64_popcnt_wrapper(uint64_t* input);
 }  // namespace wasm
 }  // namespace internal
 }  // namespace v8
-
 #endif
diff --git a/src/wasm/wasm-function-name-table.cc b/src/wasm/wasm-function-name-table.cc
new file mode 100644
index 0000000..f082704
--- /dev/null
+++ b/src/wasm/wasm-function-name-table.cc
@@ -0,0 +1,74 @@
+// Copyright 2016 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/wasm/wasm-function-name-table.h"
+
+#include "src/wasm/wasm-module.h"
+
+namespace v8 {
+namespace internal {
+namespace wasm {
+
+// Build an array with all function names. If there are N functions in the
+// module, then the first (kIntSize * (N+1)) bytes are integer entries.
+// The first integer entry encodes the number of functions in the module.
+// The entries 1 to N contain offsets into the second part of this array.
+// After these N+1 integer entries, the second part begins, which holds a
+// concatenation of all function names.
+//
+// Returns undefined if the array length would not fit in an integer value
+Handle<Object> BuildFunctionNamesTable(Isolate* isolate, WasmModule* module) {
+  uint64_t func_names_length = 0;
+  for (auto& func : module->functions) func_names_length += func.name_length;
+  int num_funcs_int = static_cast<int>(module->functions.size());
+  int current_offset = (num_funcs_int + 1) * kIntSize;
+  uint64_t total_array_length = current_offset + func_names_length;
+  int total_array_length_int = static_cast<int>(total_array_length);
+  // Check for overflow. Just skip function names if it happens.
+  if (total_array_length_int != total_array_length || num_funcs_int < 0 ||
+      num_funcs_int != module->functions.size())
+    return isolate->factory()->undefined_value();
+  Handle<ByteArray> func_names_array =
+      isolate->factory()->NewByteArray(total_array_length_int, TENURED);
+  if (func_names_array.is_null()) return isolate->factory()->undefined_value();
+  func_names_array->set_int(0, num_funcs_int);
+  int func_index = 0;
+  for (WasmFunction& fun : module->functions) {
+    WasmName name = module->GetNameOrNull(&fun);
+    func_names_array->copy_in(current_offset,
+                              reinterpret_cast<const byte*>(name.start()),
+                              name.length());
+    func_names_array->set_int(func_index + 1, current_offset);
+    current_offset += name.length();
+    ++func_index;
+  }
+  return func_names_array;
+}
+
+Handle<Object> GetWasmFunctionNameFromTable(Handle<ByteArray> func_names_array,
+                                            uint32_t func_index) {
+  uint32_t num_funcs = static_cast<uint32_t>(func_names_array->get_int(0));
+  DCHECK(static_cast<int>(num_funcs) >= 0);
+  auto undefined = [&func_names_array]() -> Handle<Object> {
+    return func_names_array->GetIsolate()->factory()->undefined_value();
+  };
+  if (func_index >= num_funcs) return undefined();
+  int offset = func_names_array->get_int(func_index + 1);
+  int next_offset = func_index == num_funcs - 1
+                        ? func_names_array->length()
+                        : func_names_array->get_int(func_index + 2);
+  ScopedVector<byte> buffer(next_offset - offset);
+  func_names_array->copy_out(offset, buffer.start(), next_offset - offset);
+  if (!unibrow::Utf8::Validate(buffer.start(), buffer.length())) {
+    return undefined();
+  }
+  MaybeHandle<Object> maybe_name =
+      func_names_array->GetIsolate()->factory()->NewStringFromUtf8(
+          Vector<const char>::cast(buffer));
+  return maybe_name.is_null() ? undefined() : maybe_name.ToHandleChecked();
+}
+
+}  // namespace wasm
+}  // namespace internal
+}  // namespace v8
diff --git a/src/wasm/wasm-function-name-table.h b/src/wasm/wasm-function-name-table.h
new file mode 100644
index 0000000..1a71372
--- /dev/null
+++ b/src/wasm/wasm-function-name-table.h
@@ -0,0 +1,30 @@
+// Copyright 2016 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_WASM_FUNCTION_NAME_TABLE_H_
+#define V8_WASM_FUNCTION_NAME_TABLE_H_
+
+#include "src/handles.h"
+#include "src/objects.h"
+
+namespace v8 {
+namespace internal {
+namespace wasm {
+
+// Forward declarations for some WASM data structures.
+struct WasmModule;
+
+// Encode all function names of the WasmModule into one ByteArray.
+Handle<Object> BuildFunctionNamesTable(Isolate* isolate, WasmModule* module);
+
+// Extract the function name for the given func_index from the wasm module.
+// Returns undefined if the function index is invalid.
+Handle<Object> GetWasmFunctionNameFromTable(Handle<ByteArray> wasm_names_table,
+                                            uint32_t func_index);
+
+}  // namespace wasm
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_WASM_FUNCTION_NAME_TABLE_H_
diff --git a/src/wasm/wasm-js.cc b/src/wasm/wasm-js.cc
index 83009d7..8a4b2ff 100644
--- a/src/wasm/wasm-js.cc
+++ b/src/wasm/wasm-js.cc
@@ -34,7 +34,6 @@
   size_t size() { return static_cast<size_t>(end - start); }
 };
 
-
 RawBuffer GetRawBufferArgument(
     ErrorThrower& thrower, const v8::FunctionCallbackInfo<v8::Value>& args) {
   if (args.Length() < 1) {
@@ -77,7 +76,6 @@
   return {start, end};
 }
 
-
 void VerifyModule(const v8::FunctionCallbackInfo<v8::Value>& args) {
   HandleScope scope(args.GetIsolate());
   i::Isolate* isolate = reinterpret_cast<i::Isolate*>(args.GetIsolate());
@@ -98,7 +96,6 @@
   if (result.val) delete result.val;
 }
 
-
 void VerifyFunction(const v8::FunctionCallbackInfo<v8::Value>& args) {
   HandleScope scope(args.GetIsolate());
   i::Isolate* isolate = reinterpret_cast<i::Isolate*>(args.GetIsolate());
@@ -134,6 +131,11 @@
     return nullptr;
   }
 
+  if (info->scope()->declarations()->length() == 0) {
+    thrower->Error("Asm.js validation failed: no declarations in scope");
+    return nullptr;
+  }
+
   info->set_literal(
       info->scope()->declarations()->at(0)->AsFunctionDeclaration()->fun());
 
@@ -180,10 +182,10 @@
     thrower->Failed("", result);
   } else {
     // Success. Instantiate the module and return the object.
-    i::Handle<i::JSObject> ffi = i::Handle<i::JSObject>::null();
+    i::Handle<i::JSReceiver> ffi = i::Handle<i::JSObject>::null();
     if (args.Length() > 1 && args[1]->IsObject()) {
       Local<Object> obj = Local<Object>::Cast(args[1]);
-      ffi = i::Handle<i::JSObject>::cast(v8::Utils::OpenHandle(*obj));
+      ffi = i::Handle<i::JSReceiver>::cast(v8::Utils::OpenHandle(*obj));
     }
 
     i::MaybeHandle<i::JSObject> object =
@@ -197,7 +199,6 @@
   if (result.val) delete result.val;
 }
 
-
 void InstantiateModuleFromAsm(const v8::FunctionCallbackInfo<v8::Value>& args) {
   HandleScope scope(args.GetIsolate());
   i::Isolate* isolate = reinterpret_cast<i::Isolate*>(args.GetIsolate());
@@ -229,7 +230,6 @@
                           internal::wasm::kAsmJsOrigin);
 }
 
-
 void InstantiateModule(const v8::FunctionCallbackInfo<v8::Value>& args) {
   HandleScope scope(args.GetIsolate());
   i::Isolate* isolate = reinterpret_cast<i::Isolate*>(args.GetIsolate());
@@ -243,7 +243,6 @@
 }
 }  // namespace
 
-
 // TODO(titzer): we use the API to create the function template because the
 // internal guts are too ugly to replicate here.
 static i::Handle<i::FunctionTemplateInfo> NewTemplate(i::Isolate* i_isolate,
@@ -253,13 +252,11 @@
   return v8::Utils::OpenHandle(*local);
 }
 
-
 namespace internal {
 static Handle<String> v8_str(Isolate* isolate, const char* str) {
   return isolate->factory()->NewStringFromAsciiChecked(str);
 }
 
-
 static void InstallFunc(Isolate* isolate, Handle<JSObject> object,
                         const char* str, FunctionCallback func) {
   Handle<String> name = v8_str(isolate, str);
@@ -271,7 +268,6 @@
   JSObject::AddProperty(object, name, function, attributes);
 }
 
-
 void WasmJs::Install(Isolate* isolate, Handle<JSGlobalObject> global) {
   // Setup wasm function map.
   Handle<Context> context(global->native_context(), isolate);
@@ -294,8 +290,16 @@
   InstallFunc(isolate, wasm_object, "instantiateModule", InstantiateModule);
   InstallFunc(isolate, wasm_object, "instantiateModuleFromAsm",
               InstantiateModuleFromAsm);
-}
 
+  {
+    // Add the Wasm.experimentalVersion property.
+    Handle<String> name = v8_str(isolate, "experimentalVersion");
+    PropertyAttributes attributes =
+        static_cast<PropertyAttributes>(DONT_DELETE | READ_ONLY);
+    Handle<Smi> value = Handle<Smi>(Smi::FromInt(wasm::kWasmVersion), isolate);
+    JSObject::AddProperty(wasm_object, name, value, attributes);
+  }
+}
 
 void WasmJs::InstallWasmFunctionMap(Isolate* isolate, Handle<Context> context) {
   if (!context->get(Context::WASM_FUNCTION_MAP_INDEX)->IsMap()) {
diff --git a/src/wasm/wasm-macro-gen.h b/src/wasm/wasm-macro-gen.h
index d9199e8..83ac86a 100644
--- a/src/wasm/wasm-macro-gen.h
+++ b/src/wasm/wasm-macro-gen.h
@@ -7,6 +7,8 @@
 
 #include "src/wasm/wasm-opcodes.h"
 
+#include "src/zone-containers.h"
+
 #define U32_LE(v)                                    \
   static_cast<byte>(v), static_cast<byte>((v) >> 8), \
       static_cast<byte>((v) >> 16), static_cast<byte>((v) >> 24)
@@ -58,27 +60,38 @@
 //------------------------------------------------------------------------------
 #define WASM_NOP kExprNop
 
-#define WASM_BLOCK(count, ...) kExprBlock, static_cast<byte>(count), __VA_ARGS__
-#define WASM_INFINITE_LOOP kExprLoop, 1, kExprBr, 0, kExprNop
-#define WASM_LOOP(count, ...) kExprLoop, static_cast<byte>(count), __VA_ARGS__
-#define WASM_IF(cond, tstmt) kExprIf, cond, tstmt
-#define WASM_IF_ELSE(cond, tstmt, fstmt) kExprIfElse, cond, tstmt, fstmt
-#define WASM_SELECT(cond, tval, fval) kExprSelect, cond, tval, fval
-#define WASM_BR(depth) kExprBr, static_cast<byte>(depth), kExprNop
+#define ARITY_0 0
+#define ARITY_1 1
+#define DEPTH_0 0
+#define DEPTH_1 1
+
+#define WASM_BLOCK(count, ...) kExprBlock, __VA_ARGS__, kExprEnd
+#define WASM_INFINITE_LOOP kExprLoop, kExprBr, ARITY_0, DEPTH_0, kExprEnd
+#define WASM_LOOP(count, ...) kExprLoop, __VA_ARGS__, kExprEnd
+#define WASM_IF(cond, tstmt) cond, kExprIf, tstmt, kExprEnd
+#define WASM_IF_ELSE(cond, tstmt, fstmt) \
+  cond, kExprIf, tstmt, kExprElse, fstmt, kExprEnd
+#define WASM_SELECT(tval, fval, cond) tval, fval, cond, kExprSelect
+#define WASM_BR(depth) kExprBr, ARITY_0, static_cast<byte>(depth)
 #define WASM_BR_IF(depth, cond) \
-  kExprBrIf, static_cast<byte>(depth), kExprNop, cond
-#define WASM_BRV(depth, val) kExprBr, static_cast<byte>(depth), val
+  cond, kExprBrIf, ARITY_0, static_cast<byte>(depth)
+#define WASM_BRV(depth, val) val, kExprBr, ARITY_1, static_cast<byte>(depth)
 #define WASM_BRV_IF(depth, val, cond) \
-  kExprBrIf, static_cast<byte>(depth), val, cond
-#define WASM_BREAK(depth) kExprBr, static_cast<byte>(depth + 1), kExprNop
-#define WASM_CONTINUE(depth) kExprBr, static_cast<byte>(depth), kExprNop
-#define WASM_BREAKV(depth, val) kExprBr, static_cast<byte>(depth + 1), val
-#define WASM_RETURN0 kExprReturn
-#define WASM_RETURN(...) kExprReturn, __VA_ARGS__
+  val, cond, kExprBrIf, ARITY_1, static_cast<byte>(depth)
+#define WASM_BREAK(depth) kExprBr, ARITY_0, static_cast<byte>(depth + 1)
+#define WASM_CONTINUE(depth) kExprBr, ARITY_0, static_cast<byte>(depth)
+#define WASM_BREAKV(depth, val) \
+  val, kExprBr, ARITY_1, static_cast<byte>(depth + 1)
+#define WASM_RETURN0 kExprReturn, ARITY_0
+#define WASM_RETURN1(val) val, kExprReturn, ARITY_1
+#define WASM_RETURNN(count, ...) __VA_ARGS__, kExprReturn, count
 #define WASM_UNREACHABLE kExprUnreachable
 
 #define WASM_BR_TABLE(key, count, ...) \
-  kExprBrTable, U32V_1(count), __VA_ARGS__, key
+  key, kExprBrTable, ARITY_0, U32V_1(count), __VA_ARGS__
+
+#define WASM_BR_TABLEV(val, key, count, ...) \
+  val, key, kExprBrTable, ARITY_1, U32V_1(count), __VA_ARGS__
 
 #define WASM_CASE(x) static_cast<byte>(x), static_cast<byte>(x >> 8)
 #define WASM_CASE_BR(x) static_cast<byte>(x), static_cast<byte>(0x80 | (x) >> 8)
@@ -119,8 +132,12 @@
 
 // A helper for encoding local declarations prepended to the body of a
 // function.
+// TODO(titzer): move this to an appropriate header.
 class LocalDeclEncoder {
  public:
+  explicit LocalDeclEncoder(Zone* zone, FunctionSig* s = nullptr)
+      : sig(s), local_decls(zone), total(0) {}
+
   // Prepend local declarations by creating a new buffer and copying data
   // over. The new buffer must be delete[]'d by the caller.
   void Prepend(const byte** start, const byte** end) const {
@@ -146,19 +163,16 @@
 
   // Add locals declarations to this helper. Return the index of the newly added
   // local(s), with an optional adjustment for the parameters.
-  uint32_t AddLocals(uint32_t count, LocalType type,
-                     FunctionSig* sig = nullptr) {
-    if (count == 0) {
-      return static_cast<uint32_t>((sig ? sig->parameter_count() : 0) +
-                                   local_decls.size());
-    }
-    size_t pos = local_decls.size();
+  uint32_t AddLocals(uint32_t count, LocalType type) {
+    uint32_t result =
+        static_cast<uint32_t>(total + (sig ? sig->parameter_count() : 0));
+    total += count;
     if (local_decls.size() > 0 && local_decls.back().second == type) {
       count += local_decls.back().first;
       local_decls.pop_back();
     }
     local_decls.push_back(std::pair<uint32_t, LocalType>(count, type));
-    return static_cast<uint32_t>(pos + (sig ? sig->parameter_count() : 0));
+    return result;
   }
 
   size_t Size() const {
@@ -167,8 +181,14 @@
     return size;
   }
 
+  bool has_sig() const { return sig != nullptr; }
+  FunctionSig* get_sig() const { return sig; }
+  void set_sig(FunctionSig* s) { sig = s; }
+
  private:
-  std::vector<std::pair<uint32_t, LocalType>> local_decls;
+  FunctionSig* sig;
+  ZoneVector<std::pair<uint32_t, LocalType>> local_decls;
+  size_t total;
 
   size_t SizeofUint32v(uint32_t val) const {
     size_t size = 1;
@@ -322,193 +342,251 @@
       static_cast<byte>(bit_cast<uint64_t>(val) >> 48),      \
       static_cast<byte>(bit_cast<uint64_t>(val) >> 56)
 #define WASM_GET_LOCAL(index) kExprGetLocal, static_cast<byte>(index)
-#define WASM_SET_LOCAL(index, val) kExprSetLocal, static_cast<byte>(index), val
+#define WASM_SET_LOCAL(index, val) val, kExprSetLocal, static_cast<byte>(index)
 #define WASM_LOAD_GLOBAL(index) kExprLoadGlobal, static_cast<byte>(index)
 #define WASM_STORE_GLOBAL(index, val) \
-  kExprStoreGlobal, static_cast<byte>(index), val
-#define WASM_LOAD_MEM(type, index)                                      \
-  static_cast<byte>(                                                    \
-      v8::internal::wasm::WasmOpcodes::LoadStoreOpcodeOf(type, false)), \
-      ZERO_ALIGNMENT, ZERO_OFFSET, index
-#define WASM_STORE_MEM(type, index, val)                               \
-  static_cast<byte>(                                                   \
-      v8::internal::wasm::WasmOpcodes::LoadStoreOpcodeOf(type, true)), \
-      ZERO_ALIGNMENT, ZERO_OFFSET, index, val
-#define WASM_LOAD_MEM_OFFSET(type, offset, index)                       \
-  static_cast<byte>(                                                    \
-      v8::internal::wasm::WasmOpcodes::LoadStoreOpcodeOf(type, false)), \
-      ZERO_ALIGNMENT, U32V_1(offset), index
-#define WASM_STORE_MEM_OFFSET(type, offset, index, val)                \
-  static_cast<byte>(                                                   \
-      v8::internal::wasm::WasmOpcodes::LoadStoreOpcodeOf(type, true)), \
-      ZERO_ALIGNMENT, U32V_1(offset), index, val
-#define WASM_CALL_FUNCTION(index, ...) \
-  kExprCallFunction, static_cast<byte>(index), __VA_ARGS__
-#define WASM_CALL_IMPORT(index, ...) \
-  kExprCallImport, static_cast<byte>(index), __VA_ARGS__
-#define WASM_CALL_INDIRECT(index, func, ...) \
-  kExprCallIndirect, static_cast<byte>(index), func, __VA_ARGS__
-#define WASM_CALL_FUNCTION0(index) kExprCallFunction, static_cast<byte>(index)
-#define WASM_CALL_IMPORT0(index) kExprCallImport, static_cast<byte>(index)
+  val, kExprStoreGlobal, static_cast<byte>(index)
+#define WASM_LOAD_MEM(type, index)                                             \
+  index, static_cast<byte>(                                                    \
+             v8::internal::wasm::WasmOpcodes::LoadStoreOpcodeOf(type, false)), \
+      ZERO_ALIGNMENT, ZERO_OFFSET
+#define WASM_STORE_MEM(type, index, val)                                   \
+  index, val,                                                              \
+      static_cast<byte>(                                                   \
+          v8::internal::wasm::WasmOpcodes::LoadStoreOpcodeOf(type, true)), \
+      ZERO_ALIGNMENT, ZERO_OFFSET
+#define WASM_LOAD_MEM_OFFSET(type, offset, index)                              \
+  index, static_cast<byte>(                                                    \
+             v8::internal::wasm::WasmOpcodes::LoadStoreOpcodeOf(type, false)), \
+      ZERO_ALIGNMENT, static_cast<byte>(offset)
+#define WASM_STORE_MEM_OFFSET(type, offset, index, val)                    \
+  index, val,                                                              \
+      static_cast<byte>(                                                   \
+          v8::internal::wasm::WasmOpcodes::LoadStoreOpcodeOf(type, true)), \
+      ZERO_ALIGNMENT, static_cast<byte>(offset)
+
+#define WASM_CALL_FUNCTION0(index) \
+  kExprCallFunction, 0, static_cast<byte>(index)
+#define WASM_CALL_FUNCTION1(index, a) \
+  a, kExprCallFunction, 1, static_cast<byte>(index)
+#define WASM_CALL_FUNCTION2(index, a, b) \
+  a, b, kExprCallFunction, 2, static_cast<byte>(index)
+#define WASM_CALL_FUNCTION3(index, a, b, c) \
+  a, b, c, kExprCallFunction, 3, static_cast<byte>(index)
+#define WASM_CALL_FUNCTION4(index, a, b, c, d) \
+  a, b, c, d, kExprCallFunction, 4, static_cast<byte>(index)
+#define WASM_CALL_FUNCTION5(index, a, b, c, d, e) \
+  kExprCallFunction, 5, static_cast<byte>(index)
+#define WASM_CALL_FUNCTIONN(arity, index, ...) \
+  __VA_ARGS__, kExprCallFunction, arity, static_cast<byte>(index)
+
+#define WASM_CALL_IMPORT0(index) kExprCallImport, 0, static_cast<byte>(index)
+#define WASM_CALL_IMPORT1(index, a) \
+  a, kExprCallImport, 1, static_cast<byte>(index)
+#define WASM_CALL_IMPORT2(index, a, b) \
+  a, b, kExprCallImport, 2, static_cast<byte>(index)
+#define WASM_CALL_IMPORT3(index, a, b, c) \
+  a, b, c, kExprCallImport, 3, static_cast<byte>(index)
+#define WASM_CALL_IMPORT4(index, a, b, c, d) \
+  a, b, c, d, kExprCallImport, 4, static_cast<byte>(index)
+#define WASM_CALL_IMPORT5(index, a, b, c, d, e) \
+  a, b, c, d, e, kExprCallImport, 5, static_cast<byte>(index)
+#define WASM_CALL_IMPORTN(arity, index, ...) \
+  __VA_ARGS__, kExprCallImport, U32V_1(arity), static_cast<byte>(index),
+
 #define WASM_CALL_INDIRECT0(index, func) \
-  kExprCallIndirect, static_cast<byte>(index), func
-#define WASM_NOT(x) kExprI32Eqz, x
+  func, kExprCallIndirect, 0, static_cast<byte>(index)
+#define WASM_CALL_INDIRECT1(index, func, a) \
+  func, a, kExprCallIndirect, 1, static_cast<byte>(index)
+#define WASM_CALL_INDIRECT2(index, func, a, b) \
+  func, a, b, kExprCallIndirect, 2, static_cast<byte>(index)
+#define WASM_CALL_INDIRECT3(index, func, a, b, c) \
+  func, a, b, c, kExprCallIndirect, 3, static_cast<byte>(index)
+#define WASM_CALL_INDIRECT4(index, func, a, b, c, d) \
+  func, a, b, c, d, kExprCallIndirect, 4, static_cast<byte>(index)
+#define WASM_CALL_INDIRECT5(index, func, a, b, c, d, e) \
+  func, a, b, c, d, e, kExprCallIndirect, 5, static_cast<byte>(index)
+#define WASM_CALL_INDIRECTN(arity, index, func, ...) \
+  func, __VA_ARGS__, kExprCallIndirect, U32V_1(arity), static_cast<byte>(index)
+
+#define WASM_NOT(x) x, kExprI32Eqz
+#define WASM_SEQ(...) __VA_ARGS__
 
 //------------------------------------------------------------------------------
 // Constructs that are composed of multiple bytecodes.
 //------------------------------------------------------------------------------
-#define WASM_WHILE(x, y) kExprLoop, 1, kExprIf, x, kExprBr, 0, y
-#define WASM_INC_LOCAL(index)                                          \
-  kExprSetLocal, static_cast<byte>(index), kExprI32Add, kExprGetLocal, \
-      static_cast<byte>(index), kExprI8Const, 1
-#define WASM_INC_LOCAL_BY(index, count)                                \
-  kExprSetLocal, static_cast<byte>(index), kExprI32Add, kExprGetLocal, \
-      static_cast<byte>(index), kExprI8Const, static_cast<int8_t>(count)
-
-#define WASM_UNOP(opcode, x) static_cast<byte>(opcode), x
-#define WASM_BINOP(opcode, x, y) static_cast<byte>(opcode), x, y
+#define WASM_WHILE(x, y) \
+  kExprLoop, x, kExprIf, y, kExprBr, ARITY_1, DEPTH_1, kExprEnd, kExprEnd
+#define WASM_INC_LOCAL(index)                                            \
+  kExprGetLocal, static_cast<byte>(index), kExprI8Const, 1, kExprI32Add, \
+      kExprSetLocal, static_cast<byte>(index)
+#define WASM_INC_LOCAL_BY(index, count)                     \
+  kExprGetLocal, static_cast<byte>(index), kExprI8Const,    \
+      static_cast<byte>(count), kExprI32Add, kExprSetLocal, \
+      static_cast<byte>(index)
+#define WASM_UNOP(opcode, x) x, static_cast<byte>(opcode)
+#define WASM_BINOP(opcode, x, y) x, y, static_cast<byte>(opcode)
 
 //------------------------------------------------------------------------------
 // Int32 operations
 //------------------------------------------------------------------------------
-#define WASM_I32_ADD(x, y) kExprI32Add, x, y
-#define WASM_I32_SUB(x, y) kExprI32Sub, x, y
-#define WASM_I32_MUL(x, y) kExprI32Mul, x, y
-#define WASM_I32_DIVS(x, y) kExprI32DivS, x, y
-#define WASM_I32_DIVU(x, y) kExprI32DivU, x, y
-#define WASM_I32_REMS(x, y) kExprI32RemS, x, y
-#define WASM_I32_REMU(x, y) kExprI32RemU, x, y
-#define WASM_I32_AND(x, y) kExprI32And, x, y
-#define WASM_I32_IOR(x, y) kExprI32Ior, x, y
-#define WASM_I32_XOR(x, y) kExprI32Xor, x, y
-#define WASM_I32_SHL(x, y) kExprI32Shl, x, y
-#define WASM_I32_SHR(x, y) kExprI32ShrU, x, y
-#define WASM_I32_SAR(x, y) kExprI32ShrS, x, y
-#define WASM_I32_ROR(x, y) kExprI32Ror, x, y
-#define WASM_I32_ROL(x, y) kExprI32Rol, x, y
-#define WASM_I32_EQ(x, y) kExprI32Eq, x, y
-#define WASM_I32_NE(x, y) kExprI32Ne, x, y
-#define WASM_I32_LTS(x, y) kExprI32LtS, x, y
-#define WASM_I32_LES(x, y) kExprI32LeS, x, y
-#define WASM_I32_LTU(x, y) kExprI32LtU, x, y
-#define WASM_I32_LEU(x, y) kExprI32LeU, x, y
-#define WASM_I32_GTS(x, y) kExprI32GtS, x, y
-#define WASM_I32_GES(x, y) kExprI32GeS, x, y
-#define WASM_I32_GTU(x, y) kExprI32GtU, x, y
-#define WASM_I32_GEU(x, y) kExprI32GeU, x, y
-#define WASM_I32_CLZ(x) kExprI32Clz, x
-#define WASM_I32_CTZ(x) kExprI32Ctz, x
-#define WASM_I32_POPCNT(x) kExprI32Popcnt, x
-#define WASM_I32_EQZ(x) kExprI32Eqz, x
+#define WASM_I32_ADD(x, y) x, y, kExprI32Add
+#define WASM_I32_SUB(x, y) x, y, kExprI32Sub
+#define WASM_I32_MUL(x, y) x, y, kExprI32Mul
+#define WASM_I32_DIVS(x, y) x, y, kExprI32DivS
+#define WASM_I32_DIVU(x, y) x, y, kExprI32DivU
+#define WASM_I32_REMS(x, y) x, y, kExprI32RemS
+#define WASM_I32_REMU(x, y) x, y, kExprI32RemU
+#define WASM_I32_AND(x, y) x, y, kExprI32And
+#define WASM_I32_IOR(x, y) x, y, kExprI32Ior
+#define WASM_I32_XOR(x, y) x, y, kExprI32Xor
+#define WASM_I32_SHL(x, y) x, y, kExprI32Shl
+#define WASM_I32_SHR(x, y) x, y, kExprI32ShrU
+#define WASM_I32_SAR(x, y) x, y, kExprI32ShrS
+#define WASM_I32_ROR(x, y) x, y, kExprI32Ror
+#define WASM_I32_ROL(x, y) x, y, kExprI32Rol
+#define WASM_I32_EQ(x, y) x, y, kExprI32Eq
+#define WASM_I32_NE(x, y) x, y, kExprI32Ne
+#define WASM_I32_LTS(x, y) x, y, kExprI32LtS
+#define WASM_I32_LES(x, y) x, y, kExprI32LeS
+#define WASM_I32_LTU(x, y) x, y, kExprI32LtU
+#define WASM_I32_LEU(x, y) x, y, kExprI32LeU
+#define WASM_I32_GTS(x, y) x, y, kExprI32GtS
+#define WASM_I32_GES(x, y) x, y, kExprI32GeS
+#define WASM_I32_GTU(x, y) x, y, kExprI32GtU
+#define WASM_I32_GEU(x, y) x, y, kExprI32GeU
+#define WASM_I32_CLZ(x) x, kExprI32Clz
+#define WASM_I32_CTZ(x) x, kExprI32Ctz
+#define WASM_I32_POPCNT(x) x, kExprI32Popcnt
+#define WASM_I32_EQZ(x) x, kExprI32Eqz
 
 //------------------------------------------------------------------------------
 // Int64 operations
 //------------------------------------------------------------------------------
-#define WASM_I64_ADD(x, y) kExprI64Add, x, y
-#define WASM_I64_SUB(x, y) kExprI64Sub, x, y
-#define WASM_I64_MUL(x, y) kExprI64Mul, x, y
-#define WASM_I64_DIVS(x, y) kExprI64DivS, x, y
-#define WASM_I64_DIVU(x, y) kExprI64DivU, x, y
-#define WASM_I64_REMS(x, y) kExprI64RemS, x, y
-#define WASM_I64_REMU(x, y) kExprI64RemU, x, y
-#define WASM_I64_AND(x, y) kExprI64And, x, y
-#define WASM_I64_IOR(x, y) kExprI64Ior, x, y
-#define WASM_I64_XOR(x, y) kExprI64Xor, x, y
-#define WASM_I64_SHL(x, y) kExprI64Shl, x, y
-#define WASM_I64_SHR(x, y) kExprI64ShrU, x, y
-#define WASM_I64_SAR(x, y) kExprI64ShrS, x, y
-#define WASM_I64_ROR(x, y) kExprI64Ror, x, y
-#define WASM_I64_ROL(x, y) kExprI64Rol, x, y
-#define WASM_I64_EQ(x, y) kExprI64Eq, x, y
-#define WASM_I64_NE(x, y) kExprI64Ne, x, y
-#define WASM_I64_LTS(x, y) kExprI64LtS, x, y
-#define WASM_I64_LES(x, y) kExprI64LeS, x, y
-#define WASM_I64_LTU(x, y) kExprI64LtU, x, y
-#define WASM_I64_LEU(x, y) kExprI64LeU, x, y
-#define WASM_I64_GTS(x, y) kExprI64GtS, x, y
-#define WASM_I64_GES(x, y) kExprI64GeS, x, y
-#define WASM_I64_GTU(x, y) kExprI64GtU, x, y
-#define WASM_I64_GEU(x, y) kExprI64GeU, x, y
-#define WASM_I64_CLZ(x) kExprI64Clz, x
-#define WASM_I64_CTZ(x) kExprI64Ctz, x
-#define WASM_I64_POPCNT(x) kExprI64Popcnt, x
-#define WASM_I64_EQZ(x) kExprI64Eqz, x
+#define WASM_I64_ADD(x, y) x, y, kExprI64Add
+#define WASM_I64_SUB(x, y) x, y, kExprI64Sub
+#define WASM_I64_MUL(x, y) x, y, kExprI64Mul
+#define WASM_I64_DIVS(x, y) x, y, kExprI64DivS
+#define WASM_I64_DIVU(x, y) x, y, kExprI64DivU
+#define WASM_I64_REMS(x, y) x, y, kExprI64RemS
+#define WASM_I64_REMU(x, y) x, y, kExprI64RemU
+#define WASM_I64_AND(x, y) x, y, kExprI64And
+#define WASM_I64_IOR(x, y) x, y, kExprI64Ior
+#define WASM_I64_XOR(x, y) x, y, kExprI64Xor
+#define WASM_I64_SHL(x, y) x, y, kExprI64Shl
+#define WASM_I64_SHR(x, y) x, y, kExprI64ShrU
+#define WASM_I64_SAR(x, y) x, y, kExprI64ShrS
+#define WASM_I64_ROR(x, y) x, y, kExprI64Ror
+#define WASM_I64_ROL(x, y) x, y, kExprI64Rol
+#define WASM_I64_EQ(x, y) x, y, kExprI64Eq
+#define WASM_I64_NE(x, y) x, y, kExprI64Ne
+#define WASM_I64_LTS(x, y) x, y, kExprI64LtS
+#define WASM_I64_LES(x, y) x, y, kExprI64LeS
+#define WASM_I64_LTU(x, y) x, y, kExprI64LtU
+#define WASM_I64_LEU(x, y) x, y, kExprI64LeU
+#define WASM_I64_GTS(x, y) x, y, kExprI64GtS
+#define WASM_I64_GES(x, y) x, y, kExprI64GeS
+#define WASM_I64_GTU(x, y) x, y, kExprI64GtU
+#define WASM_I64_GEU(x, y) x, y, kExprI64GeU
+#define WASM_I64_CLZ(x) x, kExprI64Clz
+#define WASM_I64_CTZ(x) x, kExprI64Ctz
+#define WASM_I64_POPCNT(x) x, kExprI64Popcnt
+#define WASM_I64_EQZ(x) x, kExprI64Eqz
 
 //------------------------------------------------------------------------------
 // Float32 operations
 //------------------------------------------------------------------------------
-#define WASM_F32_ADD(x, y) kExprF32Add, x, y
-#define WASM_F32_SUB(x, y) kExprF32Sub, x, y
-#define WASM_F32_MUL(x, y) kExprF32Mul, x, y
-#define WASM_F32_DIV(x, y) kExprF32Div, x, y
-#define WASM_F32_MIN(x, y) kExprF32Min, x, y
-#define WASM_F32_MAX(x, y) kExprF32Max, x, y
-#define WASM_F32_ABS(x) kExprF32Abs, x
-#define WASM_F32_NEG(x) kExprF32Neg, x
-#define WASM_F32_COPYSIGN(x, y) kExprF32CopySign, x, y
-#define WASM_F32_CEIL(x) kExprF32Ceil, x
-#define WASM_F32_FLOOR(x) kExprF32Floor, x
-#define WASM_F32_TRUNC(x) kExprF32Trunc, x
-#define WASM_F32_NEARESTINT(x) kExprF32NearestInt, x
-#define WASM_F32_SQRT(x) kExprF32Sqrt, x
-#define WASM_F32_EQ(x, y) kExprF32Eq, x, y
-#define WASM_F32_NE(x, y) kExprF32Ne, x, y
-#define WASM_F32_LT(x, y) kExprF32Lt, x, y
-#define WASM_F32_LE(x, y) kExprF32Le, x, y
-#define WASM_F32_GT(x, y) kExprF32Gt, x, y
-#define WASM_F32_GE(x, y) kExprF32Ge, x, y
+#define WASM_F32_ADD(x, y) x, y, kExprF32Add
+#define WASM_F32_SUB(x, y) x, y, kExprF32Sub
+#define WASM_F32_MUL(x, y) x, y, kExprF32Mul
+#define WASM_F32_DIV(x, y) x, y, kExprF32Div
+#define WASM_F32_MIN(x, y) x, y, kExprF32Min
+#define WASM_F32_MAX(x, y) x, y, kExprF32Max
+#define WASM_F32_ABS(x) x, kExprF32Abs
+#define WASM_F32_NEG(x) x, kExprF32Neg
+#define WASM_F32_COPYSIGN(x, y) x, y, kExprF32CopySign
+#define WASM_F32_CEIL(x) x, kExprF32Ceil
+#define WASM_F32_FLOOR(x) x, kExprF32Floor
+#define WASM_F32_TRUNC(x) x, kExprF32Trunc
+#define WASM_F32_NEARESTINT(x) x, kExprF32NearestInt
+#define WASM_F32_SQRT(x) x, kExprF32Sqrt
+#define WASM_F32_EQ(x, y) x, y, kExprF32Eq
+#define WASM_F32_NE(x, y) x, y, kExprF32Ne
+#define WASM_F32_LT(x, y) x, y, kExprF32Lt
+#define WASM_F32_LE(x, y) x, y, kExprF32Le
+#define WASM_F32_GT(x, y) x, y, kExprF32Gt
+#define WASM_F32_GE(x, y) x, y, kExprF32Ge
 
 //------------------------------------------------------------------------------
 // Float64 operations
 //------------------------------------------------------------------------------
-#define WASM_F64_ADD(x, y) kExprF64Add, x, y
-#define WASM_F64_SUB(x, y) kExprF64Sub, x, y
-#define WASM_F64_MUL(x, y) kExprF64Mul, x, y
-#define WASM_F64_DIV(x, y) kExprF64Div, x, y
-#define WASM_F64_MIN(x, y) kExprF64Min, x, y
-#define WASM_F64_MAX(x, y) kExprF64Max, x, y
-#define WASM_F64_ABS(x) kExprF64Abs, x
-#define WASM_F64_NEG(x) kExprF64Neg, x
-#define WASM_F64_COPYSIGN(x, y) kExprF64CopySign, x, y
-#define WASM_F64_CEIL(x) kExprF64Ceil, x
-#define WASM_F64_FLOOR(x) kExprF64Floor, x
-#define WASM_F64_TRUNC(x) kExprF64Trunc, x
-#define WASM_F64_NEARESTINT(x) kExprF64NearestInt, x
-#define WASM_F64_SQRT(x) kExprF64Sqrt, x
-#define WASM_F64_EQ(x, y) kExprF64Eq, x, y
-#define WASM_F64_NE(x, y) kExprF64Ne, x, y
-#define WASM_F64_LT(x, y) kExprF64Lt, x, y
-#define WASM_F64_LE(x, y) kExprF64Le, x, y
-#define WASM_F64_GT(x, y) kExprF64Gt, x, y
-#define WASM_F64_GE(x, y) kExprF64Ge, x, y
+#define WASM_F64_ADD(x, y) x, y, kExprF64Add
+#define WASM_F64_SUB(x, y) x, y, kExprF64Sub
+#define WASM_F64_MUL(x, y) x, y, kExprF64Mul
+#define WASM_F64_DIV(x, y) x, y, kExprF64Div
+#define WASM_F64_MIN(x, y) x, y, kExprF64Min
+#define WASM_F64_MAX(x, y) x, y, kExprF64Max
+#define WASM_F64_ABS(x) x, kExprF64Abs
+#define WASM_F64_NEG(x) x, kExprF64Neg
+#define WASM_F64_COPYSIGN(x, y) x, y, kExprF64CopySign
+#define WASM_F64_CEIL(x) x, kExprF64Ceil
+#define WASM_F64_FLOOR(x) x, kExprF64Floor
+#define WASM_F64_TRUNC(x) x, kExprF64Trunc
+#define WASM_F64_NEARESTINT(x) x, kExprF64NearestInt
+#define WASM_F64_SQRT(x) x, kExprF64Sqrt
+#define WASM_F64_EQ(x, y) x, y, kExprF64Eq
+#define WASM_F64_NE(x, y) x, y, kExprF64Ne
+#define WASM_F64_LT(x, y) x, y, kExprF64Lt
+#define WASM_F64_LE(x, y) x, y, kExprF64Le
+#define WASM_F64_GT(x, y) x, y, kExprF64Gt
+#define WASM_F64_GE(x, y) x, y, kExprF64Ge
 
 //------------------------------------------------------------------------------
 // Type conversions.
 //------------------------------------------------------------------------------
-#define WASM_I32_SCONVERT_F32(x) kExprI32SConvertF32, x
-#define WASM_I32_SCONVERT_F64(x) kExprI32SConvertF64, x
-#define WASM_I32_UCONVERT_F32(x) kExprI32UConvertF32, x
-#define WASM_I32_UCONVERT_F64(x) kExprI32UConvertF64, x
-#define WASM_I32_CONVERT_I64(x) kExprI32ConvertI64, x
-#define WASM_I64_SCONVERT_F32(x) kExprI64SConvertF32, x
-#define WASM_I64_SCONVERT_F64(x) kExprI64SConvertF64, x
-#define WASM_I64_UCONVERT_F32(x) kExprI64UConvertF32, x
-#define WASM_I64_UCONVERT_F64(x) kExprI64UConvertF64, x
-#define WASM_I64_SCONVERT_I32(x) kExprI64SConvertI32, x
-#define WASM_I64_UCONVERT_I32(x) kExprI64UConvertI32, x
-#define WASM_F32_SCONVERT_I32(x) kExprF32SConvertI32, x
-#define WASM_F32_UCONVERT_I32(x) kExprF32UConvertI32, x
-#define WASM_F32_SCONVERT_I64(x) kExprF32SConvertI64, x
-#define WASM_F32_UCONVERT_I64(x) kExprF32UConvertI64, x
-#define WASM_F32_CONVERT_F64(x) kExprF32ConvertF64, x
-#define WASM_F32_REINTERPRET_I32(x) kExprF32ReinterpretI32, x
-#define WASM_F64_SCONVERT_I32(x) kExprF64SConvertI32, x
-#define WASM_F64_UCONVERT_I32(x) kExprF64UConvertI32, x
-#define WASM_F64_SCONVERT_I64(x) kExprF64SConvertI64, x
-#define WASM_F64_UCONVERT_I64(x) kExprF64UConvertI64, x
-#define WASM_F64_CONVERT_F32(x) kExprF64ConvertF32, x
-#define WASM_F64_REINTERPRET_I64(x) kExprF64ReinterpretI64, x
-#define WASM_I32_REINTERPRET_F32(x) kExprI32ReinterpretF32, x
-#define WASM_I64_REINTERPRET_F64(x) kExprI64ReinterpretF64, x
+#define WASM_I32_SCONVERT_F32(x) x, kExprI32SConvertF32
+#define WASM_I32_SCONVERT_F64(x) x, kExprI32SConvertF64
+#define WASM_I32_UCONVERT_F32(x) x, kExprI32UConvertF32
+#define WASM_I32_UCONVERT_F64(x) x, kExprI32UConvertF64
+#define WASM_I32_CONVERT_I64(x) x, kExprI32ConvertI64
+#define WASM_I64_SCONVERT_F32(x) x, kExprI64SConvertF32
+#define WASM_I64_SCONVERT_F64(x) x, kExprI64SConvertF64
+#define WASM_I64_UCONVERT_F32(x) x, kExprI64UConvertF32
+#define WASM_I64_UCONVERT_F64(x) x, kExprI64UConvertF64
+#define WASM_I64_SCONVERT_I32(x) x, kExprI64SConvertI32
+#define WASM_I64_UCONVERT_I32(x) x, kExprI64UConvertI32
+#define WASM_F32_SCONVERT_I32(x) x, kExprF32SConvertI32
+#define WASM_F32_UCONVERT_I32(x) x, kExprF32UConvertI32
+#define WASM_F32_SCONVERT_I64(x) x, kExprF32SConvertI64
+#define WASM_F32_UCONVERT_I64(x) x, kExprF32UConvertI64
+#define WASM_F32_CONVERT_F64(x) x, kExprF32ConvertF64
+#define WASM_F32_REINTERPRET_I32(x) x, kExprF32ReinterpretI32
+#define WASM_F64_SCONVERT_I32(x) x, kExprF64SConvertI32
+#define WASM_F64_UCONVERT_I32(x) x, kExprF64UConvertI32
+#define WASM_F64_SCONVERT_I64(x) x, kExprF64SConvertI64
+#define WASM_F64_UCONVERT_I64(x) x, kExprF64UConvertI64
+#define WASM_F64_CONVERT_F32(x) x, kExprF64ConvertF32
+#define WASM_F64_REINTERPRET_I64(x) x, kExprF64ReinterpretI64
+#define WASM_I32_REINTERPRET_F32(x) x, kExprI32ReinterpretF32
+#define WASM_I64_REINTERPRET_F64(x) x, kExprI64ReinterpretF64
+
+#define SIG_ENTRY_v_v kWasmFunctionTypeForm, 0, 0
+#define SIZEOF_SIG_ENTRY_v_v 3
+
+#define SIG_ENTRY_v_x(a) kWasmFunctionTypeForm, 1, a, 0
+#define SIG_ENTRY_v_xx(a, b) kWasmFunctionTypeForm, 2, a, b, 0
+#define SIG_ENTRY_v_xxx(a, b, c) kWasmFunctionTypeForm, 3, a, b, c, 0
+#define SIZEOF_SIG_ENTRY_v_x 4
+#define SIZEOF_SIG_ENTRY_v_xx 5
+#define SIZEOF_SIG_ENTRY_v_xxx 6
+
+#define SIG_ENTRY_x(r) kWasmFunctionTypeForm, 0, 1, r
+#define SIG_ENTRY_x_x(r, a) kWasmFunctionTypeForm, 1, a, 1, r
+#define SIG_ENTRY_x_xx(r, a, b) kWasmFunctionTypeForm, 2, a, b, 1, r
+#define SIG_ENTRY_x_xxx(r, a, b, c) kWasmFunctionTypeForm, 3, a, b, c, 1, r
+#define SIZEOF_SIG_ENTRY_x 4
+#define SIZEOF_SIG_ENTRY_x_x 5
+#define SIZEOF_SIG_ENTRY_x_xx 6
+#define SIZEOF_SIG_ENTRY_x_xxx 7
 
 #endif  // V8_WASM_MACRO_GEN_H_
diff --git a/src/wasm/wasm-module.cc b/src/wasm/wasm-module.cc
index a1c2a7a..c9a4279 100644
--- a/src/wasm/wasm-module.cc
+++ b/src/wasm/wasm-module.cc
@@ -2,14 +2,17 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
+#include "src/base/atomic-utils.h"
 #include "src/macro-assembler.h"
 #include "src/objects.h"
+#include "src/property-descriptor.h"
 #include "src/v8.h"
 
 #include "src/simulator.h"
 
 #include "src/wasm/ast-decoder.h"
 #include "src/wasm/module-decoder.h"
+#include "src/wasm/wasm-function-name-table.h"
 #include "src/wasm/wasm-module.h"
 #include "src/wasm/wasm-result.h"
 
@@ -20,19 +23,28 @@
 namespace wasm {
 
 static const char* wasmSections[] = {
-#define F(enumerator, string) string,
+#define F(enumerator, order, string) string,
     FOR_EACH_WASM_SECTION_TYPE(F)
 #undef F
+        "<unknown>"  // entry for "Max"
 };
 
 static uint8_t wasmSectionsLengths[]{
-#define F(enumerator, string) sizeof(string) - 1,
+#define F(enumerator, order, string) sizeof(string) - 1,
     FOR_EACH_WASM_SECTION_TYPE(F)
 #undef F
+        9  // entry for "Max"
+};
+
+static uint8_t wasmSectionsOrders[]{
+#define F(enumerator, order, string) order,
+    FOR_EACH_WASM_SECTION_TYPE(F)
+#undef F
+        0  // entry for "Max"
 };
 
 static_assert(sizeof(wasmSections) / sizeof(wasmSections[0]) ==
-                  (size_t)WasmSection::Code::Max,
+                  (size_t)WasmSection::Code::Max + 1,
               "expected enum WasmSection::Code to be monotonic from 0");
 
 WasmSection::Code WasmSection::begin() { return (WasmSection::Code)0; }
@@ -49,6 +61,20 @@
   return wasmSectionsLengths[(size_t)code];
 }
 
+int WasmSection::getOrder(WasmSection::Code code) {
+  return wasmSectionsOrders[(size_t)code];
+}
+
+WasmSection::Code WasmSection::lookup(const byte* string, uint32_t length) {
+  // TODO(jfb) Linear search, it may be better to do a common-prefix search.
+  for (Code i = begin(); i != end(); i = next(i)) {
+    if (getNameLength(i) == length && 0 == memcmp(getName(i), string, length)) {
+      return i;
+    }
+  }
+  return Code::Max;
+}
+
 std::ostream& operator<<(std::ostream& os, const WasmModule& module) {
   os << "WASM module with ";
   os << (module.min_mem_pages * module.kPageSize) << " min mem";
@@ -59,16 +85,9 @@
   return os;
 }
 
-
 std::ostream& operator<<(std::ostream& os, const WasmFunction& function) {
   os << "WASM function with signature " << *function.sig;
 
-  os << " locals: ";
-  if (function.local_i32_count) os << function.local_i32_count << " i32s ";
-  if (function.local_i64_count) os << function.local_i64_count << " i64s ";
-  if (function.local_f32_count) os << function.local_f32_count << " f32s ";
-  if (function.local_f64_count) os << function.local_f64_count << " f64s ";
-
   os << " code bytes: "
      << (function.code_end_offset - function.code_start_offset);
   return os;
@@ -80,7 +99,7 @@
     if (pair.module_) {
       WasmName name = pair.module_->GetName(pair.function_->name_offset,
                                             pair.function_->name_length);
-      os.write(name.name, name.length);
+      os.write(name.start(), name.length());
     } else {
       os << "+" << pair.function_->func_index;
     }
@@ -105,11 +124,11 @@
       // Create a placeholder code object and encode the corresponding index in
       // the {constant_pool_offset} field of the code object.
       // TODO(titzer): placeholder code objects are somewhat dangerous.
-      Handle<Code> self(nullptr, isolate_);
       byte buffer[] = {0, 0, 0, 0, 0, 0, 0, 0};  // fake instructions.
       CodeDesc desc = {buffer, 8, 8, 0, 0, nullptr};
       Handle<Code> code = isolate_->factory()->NewCode(
-          desc, Code::KindField::encode(Code::WASM_FUNCTION), self);
+          desc, Code::KindField::encode(Code::WASM_FUNCTION),
+          Handle<Object>::null());
       code->set_constant_pool_offset(index + kPlaceholderMarker);
       placeholder_code_[index] = code;
       function_code_[index] = code;
@@ -177,11 +196,12 @@
 
 namespace {
 // Internal constants for the layout of the module object.
-const int kWasmModuleInternalFieldCount = 4;
+const int kWasmModuleInternalFieldCount = 5;
 const int kWasmModuleFunctionTable = 0;
 const int kWasmModuleCodeTable = 1;
 const int kWasmMemArrayBuffer = 2;
 const int kWasmGlobalsArrayBuffer = 3;
+const int kWasmFunctionNamesArray = 4;
 
 size_t AllocateGlobalsOffsets(std::vector<WasmGlobal>& globals) {
   uint32_t offset = 0;
@@ -195,7 +215,6 @@
   return offset;
 }
 
-
 void LoadDataSegments(WasmModule* module, byte* mem_addr, size_t mem_size) {
   for (const WasmDataSegment& segment : module->data_segments) {
     if (!segment.init) continue;
@@ -209,7 +228,6 @@
   }
 }
 
-
 Handle<FixedArray> BuildFunctionTable(Isolate* isolate, WasmModule* module) {
   if (module->function_table.size() == 0) {
     return Handle<FixedArray>::null();
@@ -314,20 +332,20 @@
                                               const char* error, uint32_t index,
                                               wasm::WasmName module_name,
                                               wasm::WasmName function_name) {
-  if (function_name.name) {
+  if (function_name.start()) {
     thrower.Error("Import #%d module=\"%.*s\" function=\"%.*s\" error: %s",
-                  index, module_name.length, module_name.name,
-                  function_name.length, function_name.name, error);
+                  index, module_name.length(), module_name.start(),
+                  function_name.length(), function_name.start(), error);
   } else {
     thrower.Error("Import #%d module=\"%.*s\" error: %s", index,
-                  module_name.length, module_name.name, error);
+                  module_name.length(), module_name.start(), error);
   }
   thrower.Error("Import ");
   return MaybeHandle<JSFunction>();
 }
 
 static MaybeHandle<JSFunction> LookupFunction(
-    ErrorThrower& thrower, Factory* factory, Handle<JSObject> ffi,
+    ErrorThrower& thrower, Factory* factory, Handle<JSReceiver> ffi,
     uint32_t index, wasm::WasmName module_name, wasm::WasmName function_name) {
   if (ffi.is_null()) {
     return ReportFFIError(thrower, "FFI is not an object", index, module_name,
@@ -335,8 +353,7 @@
   }
 
   // Look up the module first.
-  Handle<String> name = factory->InternalizeUtf8String(
-      Vector<const char>(module_name.name, module_name.length));
+  Handle<String> name = factory->InternalizeUtf8String(module_name);
   MaybeHandle<Object> result = Object::GetProperty(ffi, name);
   if (result.is_null()) {
     return ReportFFIError(thrower, "module not found", index, module_name,
@@ -351,10 +368,9 @@
   }
 
   Handle<Object> function;
-  if (function_name.name) {
+  if (function_name.start()) {
     // Look up the function in the module.
-    Handle<String> name = factory->InternalizeUtf8String(
-        Vector<const char>(function_name.name, function_name.length));
+    Handle<String> name = factory->InternalizeUtf8String(function_name);
     MaybeHandle<Object> result = Object::GetProperty(module, name);
     if (result.is_null()) {
       return ReportFFIError(thrower, "function not found", index, module_name,
@@ -374,18 +390,253 @@
   return Handle<JSFunction>::cast(function);
 }
 
+namespace {
+// Fetches the compilation unit of a wasm function and executes its parallel
+// phase.
+bool FetchAndExecuteCompilationUnit(
+    Isolate* isolate,
+    std::vector<compiler::WasmCompilationUnit*>* compilation_units,
+    std::queue<compiler::WasmCompilationUnit*>* executed_units,
+    base::Mutex* result_mutex, base::AtomicNumber<size_t>* next_unit) {
+  DisallowHeapAllocation no_allocation;
+  DisallowHandleAllocation no_handles;
+  DisallowHandleDereference no_deref;
+  DisallowCodeDependencyChange no_dependency_change;
+
+  // - 1 because AtomicIntrement returns the value after the atomic increment.
+  size_t index = next_unit->Increment(1) - 1;
+  if (index >= compilation_units->size()) {
+    return false;
+  }
+
+  compiler::WasmCompilationUnit* unit = compilation_units->at(index);
+  if (unit != nullptr) {
+    compiler::ExecuteCompilation(unit);
+    {
+      base::LockGuard<base::Mutex> guard(result_mutex);
+      executed_units->push(unit);
+    }
+  }
+  return true;
+}
+
+class WasmCompilationTask : public CancelableTask {
+ public:
+  WasmCompilationTask(
+      Isolate* isolate,
+      std::vector<compiler::WasmCompilationUnit*>* compilation_units,
+      std::queue<compiler::WasmCompilationUnit*>* executed_units,
+      base::Semaphore* on_finished, base::Mutex* result_mutex,
+      base::AtomicNumber<size_t>* next_unit)
+      : CancelableTask(isolate),
+        isolate_(isolate),
+        compilation_units_(compilation_units),
+        executed_units_(executed_units),
+        on_finished_(on_finished),
+        result_mutex_(result_mutex),
+        next_unit_(next_unit) {}
+
+  void RunInternal() override {
+    while (FetchAndExecuteCompilationUnit(isolate_, compilation_units_,
+                                          executed_units_, result_mutex_,
+                                          next_unit_)) {
+    }
+    on_finished_->Signal();
+  }
+
+  Isolate* isolate_;
+  std::vector<compiler::WasmCompilationUnit*>* compilation_units_;
+  std::queue<compiler::WasmCompilationUnit*>* executed_units_;
+  base::Semaphore* on_finished_;
+  base::Mutex* result_mutex_;
+  base::AtomicNumber<size_t>* next_unit_;
+};
+
+void record_code_size(uint32_t& total_code_size, Code* code) {
+  if (FLAG_print_wasm_code_size) {
+    total_code_size += code->body_size() + code->relocation_info()->length();
+  }
+}
+
+bool CompileWrappersToImportedFunctions(Isolate* isolate, WasmModule* module,
+                                        const Handle<JSReceiver> ffi,
+                                        WasmModuleInstance* instance,
+                                        ErrorThrower* thrower, Factory* factory,
+                                        ModuleEnv* module_env,
+                                        uint32_t& total_code_size) {
+  uint32_t index = 0;
+  if (module->import_table.size() > 0) {
+    instance->import_code.reserve(module->import_table.size());
+    for (const WasmImport& import : module->import_table) {
+      WasmName module_name = module->GetNameOrNull(import.module_name_offset,
+                                                   import.module_name_length);
+      WasmName function_name = module->GetNameOrNull(
+          import.function_name_offset, import.function_name_length);
+      MaybeHandle<JSFunction> function = LookupFunction(
+          *thrower, factory, ffi, index, module_name, function_name);
+      if (function.is_null()) return false;
+
+      Handle<Code> code = compiler::CompileWasmToJSWrapper(
+          isolate, module_env, function.ToHandleChecked(), import.sig,
+          module_name, function_name);
+      instance->import_code.push_back(code);
+      record_code_size(total_code_size, *code);
+      index++;
+    }
+  }
+  return true;
+}
+
+void InitializeParallelCompilation(
+    Isolate* isolate, std::vector<WasmFunction>& functions,
+    std::vector<compiler::WasmCompilationUnit*>& compilation_units,
+    ModuleEnv& module_env, ErrorThrower& thrower) {
+  // Create a placeholder code object for all functions.
+  // TODO(ahaas): Maybe we could skip this for external functions.
+  for (uint32_t i = 0; i < functions.size(); i++) {
+    module_env.linker->GetFunctionCode(i);
+  }
+
+  for (uint32_t i = FLAG_skip_compiling_wasm_funcs; i < functions.size(); i++) {
+    compilation_units[i] = compiler::CreateWasmCompilationUnit(
+        &thrower, isolate, &module_env, &functions[i], i);
+  }
+}
+
+uint32_t* StartCompilationTasks(
+    Isolate* isolate,
+    std::vector<compiler::WasmCompilationUnit*>& compilation_units,
+    std::queue<compiler::WasmCompilationUnit*>& executed_units,
+    const base::SmartPointer<base::Semaphore>& pending_tasks,
+    base::Mutex& result_mutex, base::AtomicNumber<size_t>& next_unit) {
+  const size_t num_tasks =
+      Min(static_cast<size_t>(FLAG_wasm_num_compilation_tasks),
+          V8::GetCurrentPlatform()->NumberOfAvailableBackgroundThreads());
+  uint32_t* task_ids = new uint32_t[num_tasks];
+  for (size_t i = 0; i < num_tasks; i++) {
+    WasmCompilationTask* task =
+        new WasmCompilationTask(isolate, &compilation_units, &executed_units,
+                                pending_tasks.get(), &result_mutex, &next_unit);
+    task_ids[i] = task->id();
+    V8::GetCurrentPlatform()->CallOnBackgroundThread(
+        task, v8::Platform::kShortRunningTask);
+  }
+  return task_ids;
+}
+
+void WaitForCompilationTasks(
+    Isolate* isolate, uint32_t* task_ids,
+    const base::SmartPointer<base::Semaphore>& pending_tasks) {
+  const size_t num_tasks =
+      Min(static_cast<size_t>(FLAG_wasm_num_compilation_tasks),
+          V8::GetCurrentPlatform()->NumberOfAvailableBackgroundThreads());
+  for (size_t i = 0; i < num_tasks; i++) {
+    // If the task has not started yet, then we abort it. Otherwise we wait for
+    // it to finish.
+    if (!isolate->cancelable_task_manager()->TryAbort(task_ids[i])) {
+      pending_tasks->Wait();
+    }
+  }
+}
+
+void FinishCompilationUnits(
+    WasmModule* module,
+    std::queue<compiler::WasmCompilationUnit*>& executed_units,
+    std::vector<Handle<Code>>& results, base::Mutex& result_mutex) {
+  while (true) {
+    compiler::WasmCompilationUnit* unit = nullptr;
+    {
+      base::LockGuard<base::Mutex> guard(&result_mutex);
+      if (executed_units.empty()) {
+        break;
+      }
+      unit = executed_units.front();
+      executed_units.pop();
+    }
+    int j = compiler::GetIndexOfWasmCompilationUnit(unit);
+    results[j] = compiler::FinishCompilation(unit);
+  }
+}
+
+bool FinishCompilation(Isolate* isolate, WasmModule* module,
+                       const Handle<JSReceiver> ffi,
+                       const std::vector<Handle<Code>>& results,
+                       const WasmModuleInstance& instance,
+                       const Handle<FixedArray>& code_table,
+                       ErrorThrower& thrower, Factory* factory,
+                       ModuleEnv& module_env, uint32_t& total_code_size,
+                       PropertyDescriptor& desc) {
+  for (uint32_t i = FLAG_skip_compiling_wasm_funcs;
+       i < module->functions.size(); i++) {
+    const WasmFunction& func = module->functions[i];
+    if (thrower.error()) break;
+
+    DCHECK_EQ(i, func.func_index);
+    WasmName str = module->GetName(func.name_offset, func.name_length);
+    Handle<Code> code = Handle<Code>::null();
+    Handle<JSFunction> function = Handle<JSFunction>::null();
+    Handle<String> function_name = Handle<String>::null();
+    if (FLAG_wasm_num_compilation_tasks != 0) {
+      code = results[i];
+    } else {
+      // Compile the function.
+      code =
+          compiler::CompileWasmFunction(&thrower, isolate, &module_env, &func);
+    }
+    if (code.is_null()) {
+      thrower.Error("Compilation of #%d:%.*s failed.", i, str.length(),
+                    str.start());
+      return false;
+    }
+    if (func.exported) {
+      function_name = factory->InternalizeUtf8String(str);
+      function = compiler::CompileJSToWasmWrapper(
+          isolate, &module_env, function_name, code, instance.js_object, i);
+      record_code_size(total_code_size, function->code());
+    }
+    if (!code.is_null()) {
+      // Install the code into the linker table.
+      module_env.linker->Finish(i, code);
+      code_table->set(i, *code);
+      record_code_size(total_code_size, *code);
+    }
+    if (func.exported) {
+      // Exported functions are installed as read-only properties on the
+      // module.
+      desc.set_value(function);
+      Maybe<bool> status = JSReceiver::DefineOwnProperty(
+          isolate, instance.js_object, function_name, &desc,
+          Object::THROW_ON_ERROR);
+      if (!status.IsJust())
+        thrower.Error("export of %.*s failed.", str.length(), str.start());
+    }
+  }
+  return true;
+}
+}  // namespace
+
 // Instantiates a wasm module as a JSObject.
 //  * allocates a backing store of {mem_size} bytes.
 //  * installs a named property "memory" for that buffer if exported
 //  * installs named properties on the object for exported functions
 //  * compiles wasm code to machine code
 MaybeHandle<JSObject> WasmModule::Instantiate(Isolate* isolate,
-                                              Handle<JSObject> ffi,
+                                              Handle<JSReceiver> ffi,
                                               Handle<JSArrayBuffer> memory) {
+  HistogramTimerScope wasm_instantiate_module_time_scope(
+      isolate->counters()->wasm_instantiate_module_time());
   this->shared_isolate = isolate;  // TODO(titzer): have a real shared isolate.
   ErrorThrower thrower(isolate, "WasmModule::Instantiate()");
   Factory* factory = isolate->factory();
 
+  PropertyDescriptor desc;
+  desc.set_writable(false);
+
+  // If FLAG_print_wasm_code_size is set, this aggregates the sum of all code
+  // objects created for this module.
+  // TODO(titzer): switch this to TRACE_EVENT
+  uint32_t total_code_size = 0;
+
   //-------------------------------------------------------------------------
   // Allocate the instance and its JS counterpart.
   //-------------------------------------------------------------------------
@@ -402,6 +653,10 @@
   //-------------------------------------------------------------------------
   // Allocate and initialize the linear memory.
   //-------------------------------------------------------------------------
+  isolate->counters()->wasm_min_mem_pages_count()->AddSample(
+      instance.module->min_mem_pages);
+  isolate->counters()->wasm_max_mem_pages_count()->AddSample(
+      instance.module->max_mem_pages);
   if (memory.is_null()) {
     if (!AllocateMemory(&thrower, isolate, &instance)) {
       return MaybeHandle<JSObject>();
@@ -424,10 +679,9 @@
                                          *instance.globals_buffer);
   }
 
-  //-------------------------------------------------------------------------
-  // Compile wrappers to imported functions.
-  //-------------------------------------------------------------------------
-  uint32_t index = 0;
+  HistogramTimerScope wasm_compile_module_time_scope(
+      isolate->counters()->wasm_compile_module_time());
+
   instance.function_table = BuildFunctionTable(isolate, this);
   WasmLinker linker(isolate, functions.size());
   ModuleEnv module_env;
@@ -436,113 +690,147 @@
   module_env.linker = &linker;
   module_env.origin = origin;
 
-  if (import_table.size() > 0) {
-    instance.import_code.reserve(import_table.size());
-    for (const WasmImport& import : import_table) {
-      WasmName module_name =
-          GetNameOrNull(import.module_name_offset, import.module_name_length);
-      WasmName function_name = GetNameOrNull(import.function_name_offset,
-                                             import.function_name_length);
-      MaybeHandle<JSFunction> function = LookupFunction(
-          thrower, factory, ffi, index, module_name, function_name);
-      if (function.is_null()) return MaybeHandle<JSObject>();
-      Handle<Code> code = compiler::CompileWasmToJSWrapper(
-          isolate, &module_env, function.ToHandleChecked(), import.sig,
-          module_name, function_name);
-      instance.import_code.push_back(code);
-      index++;
-    }
+  //-------------------------------------------------------------------------
+  // Compile wrappers to imported functions.
+  //-------------------------------------------------------------------------
+  if (!CompileWrappersToImportedFunctions(isolate, this, ffi, &instance,
+                                          &thrower, factory, &module_env,
+                                          total_code_size)) {
+    return MaybeHandle<JSObject>();
   }
-
   //-------------------------------------------------------------------------
   // Compile all functions in the module.
   //-------------------------------------------------------------------------
+  {
+    isolate->counters()->wasm_functions_per_module()->AddSample(
+        static_cast<int>(functions.size()));
 
-  // First pass: compile each function and initialize the code table.
-  index = FLAG_skip_compiling_wasm_funcs;
-  while (index < functions.size()) {
-    const WasmFunction& func = functions[index];
-    if (thrower.error()) break;
-    DCHECK_EQ(index, func.func_index);
+    // Data structures for the parallel compilation.
+    std::vector<compiler::WasmCompilationUnit*> compilation_units(
+        functions.size());
+    std::queue<compiler::WasmCompilationUnit*> executed_units;
+    std::vector<Handle<Code>> results(functions.size());
 
-    WasmName str = GetName(func.name_offset, func.name_length);
-    WasmName str_null = {nullptr, 0};
-    Handle<String> name = factory->InternalizeUtf8String(
-        Vector<const char>(str.name, str.length));
-    Handle<Code> code = Handle<Code>::null();
-    Handle<JSFunction> function = Handle<JSFunction>::null();
-    if (func.external) {
-      // Lookup external function in FFI object.
-      MaybeHandle<JSFunction> function =
-          LookupFunction(thrower, factory, ffi, index, str, str_null);
-      if (function.is_null()) return MaybeHandle<JSObject>();
-      code = compiler::CompileWasmToJSWrapper(isolate, &module_env,
-                                              function.ToHandleChecked(),
-                                              func.sig, str, str_null);
-    } else {
-      // Compile the function.
-      code = compiler::CompileWasmFunction(thrower, isolate, &module_env, func);
-      if (code.is_null()) {
-        thrower.Error("Compilation of #%d:%.*s failed.", index, str.length,
-                      str.name);
-        return MaybeHandle<JSObject>();
+    if (FLAG_wasm_num_compilation_tasks != 0) {
+      //-----------------------------------------------------------------------
+      // For parallel compilation:
+      // 1) The main thread allocates a compilation unit for each wasm function
+      //    and stores them in the vector {compilation_units}.
+      // 2) The main thread spawns {WasmCompilationTask} instances which run on
+      //    the background threads.
+      // 3.a) The background threads and the main thread pick one compilation
+      //      unit at a time and execute the parallel phase of the compilation
+      //      unit. After finishing the execution of the parallel phase, the
+      //      result is enqueued in {executed_units}.
+      // 3.b) If {executed_units} contains a compilation unit, the main thread
+      //      dequeues it and finishes the compilation.
+      // 4) After the parallel phase of all compilation units has started, the
+      //    main thread waits for all {WasmCompilationTask} instances to finish.
+      // 5) The main thread finishes the compilation.
+
+      // Turn on the {CanonicalHandleScope} so that the background threads can
+      // use the node cache.
+      CanonicalHandleScope canonical(isolate);
+
+      // 1) The main thread allocates a compilation unit for each wasm function
+      //    and stores them in the vector {compilation_units}.
+      InitializeParallelCompilation(isolate, functions, compilation_units,
+                                    module_env, thrower);
+
+      // Objects for the synchronization with the background threads.
+      base::SmartPointer<base::Semaphore> pending_tasks(new base::Semaphore(0));
+      base::Mutex result_mutex;
+      base::AtomicNumber<size_t> next_unit(
+          static_cast<size_t>(FLAG_skip_compiling_wasm_funcs));
+
+      // 2) The main thread spawns {WasmCompilationTask} instances which run on
+      //    the background threads.
+      base::SmartArrayPointer<uint32_t> task_ids(
+          StartCompilationTasks(isolate, compilation_units, executed_units,
+                                pending_tasks, result_mutex, next_unit));
+
+      // 3.a) The background threads and the main thread pick one compilation
+      //      unit at a time and execute the parallel phase of the compilation
+      //      unit. After finishing the execution of the parallel phase, the
+      //      result is enqueued in {executed_units}.
+      while (FetchAndExecuteCompilationUnit(isolate, &compilation_units,
+                                            &executed_units, &result_mutex,
+                                            &next_unit)) {
+        // 3.b) If {executed_units} contains a compilation unit, the main thread
+        //      dequeues it and finishes the compilation unit. Compilation units
+        //      are finished concurrently to the background threads to save
+        //      memory.
+        FinishCompilationUnits(this, executed_units, results, result_mutex);
       }
-      if (func.exported) {
-        function = compiler::CompileJSToWasmWrapper(
-            isolate, &module_env, name, code, instance.js_object, index);
-      }
+      // 4) After the parallel phase of all compilation units has started, the
+      //    main thread waits for all {WasmCompilationTask} instances to finish.
+      WaitForCompilationTasks(isolate, task_ids.get(), pending_tasks);
+      // Finish the compilation of the remaining compilation units.
+      FinishCompilationUnits(this, executed_units, results, result_mutex);
     }
-    if (!code.is_null()) {
-      // Install the code into the linker table.
-      linker.Finish(index, code);
-      code_table->set(index, *code);
-    }
-    if (func.exported) {
-      // Exported functions are installed as read-only properties on the module.
-      JSObject::AddProperty(instance.js_object, name, function, READ_ONLY);
-    }
-    index++;
-  }
-
-  // Second pass: patch all direct call sites.
-  linker.Link(instance.function_table, this->function_table);
-  instance.js_object->SetInternalField(kWasmModuleFunctionTable,
-                                       Smi::FromInt(0));
-
-  //-------------------------------------------------------------------------
-  // Create and populate the exports object.
-  //-------------------------------------------------------------------------
-  if (export_table.size() > 0 || mem_export) {
-    index = 0;
-    // Create the "exports" object.
-    Handle<JSFunction> object_function = Handle<JSFunction>(
-        isolate->native_context()->object_function(), isolate);
-    Handle<JSObject> exports_object =
-        factory->NewJSObject(object_function, TENURED);
-    Handle<String> exports_name = factory->InternalizeUtf8String("exports");
-    JSObject::AddProperty(instance.js_object, exports_name, exports_object,
-                          READ_ONLY);
-
-    // Compile wrappers and add them to the exports object.
-    for (const WasmExport& exp : export_table) {
-      if (thrower.error()) break;
-      WasmName str = GetName(exp.name_offset, exp.name_length);
-      Handle<String> name = factory->InternalizeUtf8String(
-          Vector<const char>(str.name, str.length));
-      Handle<Code> code = linker.GetFunctionCode(exp.func_index);
-      Handle<JSFunction> function = compiler::CompileJSToWasmWrapper(
-          isolate, &module_env, name, code, instance.js_object, exp.func_index);
-      JSObject::AddProperty(exports_object, name, function, READ_ONLY);
+    // 5) The main thread finishes the compilation.
+    if (!FinishCompilation(isolate, this, ffi, results, instance, code_table,
+                           thrower, factory, module_env, total_code_size,
+                           desc)) {
+      return MaybeHandle<JSObject>();
     }
 
-    if (mem_export) {
-      // Export the memory as a named property.
-      Handle<String> name = factory->InternalizeUtf8String("memory");
-      JSObject::AddProperty(exports_object, name, instance.mem_buffer,
+    // Patch all direct call sites.
+    linker.Link(instance.function_table, this->function_table);
+    instance.js_object->SetInternalField(kWasmModuleFunctionTable,
+                                         Smi::FromInt(0));
+
+    //-------------------------------------------------------------------------
+    // Create and populate the exports object.
+    //-------------------------------------------------------------------------
+    if (export_table.size() > 0 || mem_export) {
+      // Create the "exports" object.
+      Handle<JSFunction> object_function = Handle<JSFunction>(
+          isolate->native_context()->object_function(), isolate);
+      Handle<JSObject> exports_object =
+          factory->NewJSObject(object_function, TENURED);
+      Handle<String> exports_name = factory->InternalizeUtf8String("exports");
+      JSObject::AddProperty(instance.js_object, exports_name, exports_object,
                             READ_ONLY);
+
+      // Compile wrappers and add them to the exports object.
+      for (const WasmExport& exp : export_table) {
+        if (thrower.error()) break;
+        WasmName str = GetName(exp.name_offset, exp.name_length);
+        Handle<String> name = factory->InternalizeUtf8String(str);
+        Handle<Code> code = linker.GetFunctionCode(exp.func_index);
+        Handle<JSFunction> function = compiler::CompileJSToWasmWrapper(
+            isolate, &module_env, name, code, instance.js_object,
+            exp.func_index);
+        record_code_size(total_code_size, function->code());
+        desc.set_value(function);
+        Maybe<bool> status = JSReceiver::DefineOwnProperty(
+            isolate, exports_object, name, &desc, Object::THROW_ON_ERROR);
+        if (!status.IsJust())
+          thrower.Error("export of %.*s failed.", str.length(), str.start());
+      }
+
+      if (mem_export) {
+        // Export the memory as a named property.
+        Handle<String> name = factory->InternalizeUtf8String("memory");
+        JSObject::AddProperty(exports_object, name, instance.mem_buffer,
+                              READ_ONLY);
+      }
     }
   }
 
+  //-------------------------------------------------------------------------
+  // Attach an array with function names and an array with offsets into that
+  // first array.
+  //-------------------------------------------------------------------------
+  {
+    Handle<Object> arr = BuildFunctionNamesTable(isolate, module_env.module);
+    instance.js_object->SetInternalField(kWasmFunctionNamesArray, *arr);
+  }
+
+  if (FLAG_print_wasm_code_size)
+    printf("Total generated wasm code: %u bytes\n", total_code_size);
+
   // Run the start function if one was specified.
   if (this->start_function_index >= 0) {
     HandleScope scope(isolate);
@@ -564,7 +852,6 @@
   return instance.js_object;
 }
 
-
 Handle<Code> ModuleEnv::GetFunctionCode(uint32_t index) {
   DCHECK(IsValidFunction(index));
   if (linker) return linker->GetFunctionCode(index);
@@ -585,7 +872,6 @@
   return GetWasmCallDescriptor(zone, function->sig);
 }
 
-
 int32_t CompileAndRunWasmModule(Isolate* isolate, const byte* module_start,
                                 const byte* module_end, bool asm_js) {
   HandleScope scope(isolate);
@@ -611,7 +897,6 @@
   return retval;
 }
 
-
 int32_t CompileAndRunWasmModule(Isolate* isolate, WasmModule* module) {
   ErrorThrower thrower(isolate, "CompileAndRunWasmModule");
   WasmModuleInstance instance(module);
@@ -644,19 +929,17 @@
   int main_index = 0;
   for (const WasmFunction& func : module->functions) {
     DCHECK_EQ(index, func.func_index);
-    if (!func.external) {
-      // Compile the function and install it in the code table.
-      Handle<Code> code =
-          compiler::CompileWasmFunction(thrower, isolate, &module_env, func);
-      if (!code.is_null()) {
-        if (func.exported) {
-          main_code = code;
-          main_index = index;
-        }
-        linker.Finish(index, code);
+    // Compile the function and install it in the code table.
+    Handle<Code> code =
+        compiler::CompileWasmFunction(&thrower, isolate, &module_env, &func);
+    if (!code.is_null()) {
+      if (func.exported) {
+        main_code = code;
+        main_index = index;
       }
-      if (thrower.error()) return -1;
+      linker.Finish(index, code);
     }
+    if (thrower.error()) return -1;
     index++;
   }
 
@@ -693,6 +976,16 @@
   thrower.Error("WASM.compileRun() failed: Return value should be number");
   return -1;
 }
+
+Handle<Object> GetWasmFunctionName(Handle<JSObject> wasm, uint32_t func_index) {
+  Handle<Object> func_names_arr_obj = handle(
+      wasm->GetInternalField(kWasmFunctionNamesArray), wasm->GetIsolate());
+  if (func_names_arr_obj->IsUndefined())
+    return func_names_arr_obj;  // Return undefined.
+  return GetWasmFunctionNameFromTable(
+      Handle<ByteArray>::cast(func_names_arr_obj), func_index);
+}
+
 }  // namespace wasm
 }  // namespace internal
 }  // namespace v8
diff --git a/src/wasm/wasm-module.h b/src/wasm/wasm-module.h
index 4e5aa78..2ac0425 100644
--- a/src/wasm/wasm-module.h
+++ b/src/wasm/wasm-module.h
@@ -16,6 +16,7 @@
 
 namespace compiler {
 class CallDescriptor;
+class WasmCompilationUnit;
 }
 
 namespace wasm {
@@ -23,69 +24,63 @@
 const size_t kMaxFunctionSize = 128 * 1024;
 const size_t kMaxStringSize = 256;
 const uint32_t kWasmMagic = 0x6d736100;
-const uint32_t kWasmVersion = 0x0a;
+const uint32_t kWasmVersion = 0x0b;
+const uint8_t kWasmFunctionTypeForm = 0x40;
 
 // WebAssembly sections are named as strings in the binary format, but
 // internally V8 uses an enum to handle them.
 //
 // Entries have the form F(enumerator, string).
-#define FOR_EACH_WASM_SECTION_TYPE(F)          \
-  F(Memory, "memory")                          \
-  F(Signatures, "signatures")                  \
-  F(Functions, "functions")                    \
-  F(Globals, "globals")                        \
-  F(DataSegments, "data_segments")             \
-  F(FunctionTable, "function_table")           \
-  F(End, "end")                                \
-  F(StartFunction, "start_function")           \
-  F(ImportTable, "import_table")               \
-  F(ExportTable, "export_table")               \
-  F(FunctionSignatures, "function_signatures") \
-  F(FunctionBodies, "function_bodies")         \
-  F(Names, "names")
+#define FOR_EACH_WASM_SECTION_TYPE(F)  \
+  F(Signatures, 1, "type")             \
+  F(ImportTable, 2, "import")          \
+  F(FunctionSignatures, 3, "function") \
+  F(FunctionTable, 4, "table")         \
+  F(Memory, 5, "memory")               \
+  F(ExportTable, 6, "export")          \
+  F(StartFunction, 7, "start")         \
+  F(FunctionBodies, 8, "code")         \
+  F(DataSegments, 9, "data")           \
+  F(Names, 10, "name")                 \
+  F(OldFunctions, 0, "old_function")   \
+  F(Globals, 0, "global")              \
+  F(End, 0, "end")
 
 // Contants for the above section types: {LEB128 length, characters...}.
 #define WASM_SECTION_MEMORY 6, 'm', 'e', 'm', 'o', 'r', 'y'
-#define WASM_SECTION_SIGNATURES \
-  10, 's', 'i', 'g', 'n', 'a', 't', 'u', 'r', 'e', 's'
-#define WASM_SECTION_FUNCTIONS 9, 'f', 'u', 'n', 'c', 't', 'i', 'o', 'n', 's'
-#define WASM_SECTION_GLOBALS 7, 'g', 'l', 'o', 'b', 'a', 'l', 's'
-#define WASM_SECTION_DATA_SEGMENTS \
-  13, 'd', 'a', 't', 'a', '_', 's', 'e', 'g', 'm', 'e', 'n', 't', 's'
-#define WASM_SECTION_FUNCTION_TABLE \
-  14, 'f', 'u', 'n', 'c', 't', 'i', 'o', 'n', '_', 't', 'a', 'b', 'l', 'e'
+#define WASM_SECTION_SIGNATURES 4, 't', 'y', 'p', 'e'
+#define WASM_SECTION_OLD_FUNCTIONS \
+  12, 'o', 'l', 'd', '_', 'f', 'u', 'n', 'c', 't', 'i', 'o', 'n'
+#define WASM_SECTION_GLOBALS 6, 'g', 'l', 'o', 'b', 'a', 'l'
+#define WASM_SECTION_DATA_SEGMENTS 4, 'd', 'a', 't', 'a'
+#define WASM_SECTION_FUNCTION_TABLE 5, 't', 'a', 'b', 'l', 'e'
 #define WASM_SECTION_END 3, 'e', 'n', 'd'
-#define WASM_SECTION_START_FUNCTION \
-  14, 's', 't', 'a', 'r', 't', '_', 'f', 'u', 'n', 'c', 't', 'i', 'o', 'n'
-#define WASM_SECTION_IMPORT_TABLE \
-  12, 'i', 'm', 'p', 'o', 'r', 't', '_', 't', 'a', 'b', 'l', 'e'
-#define WASM_SECTION_EXPORT_TABLE \
-  12, 'e', 'x', 'p', 'o', 'r', 't', '_', 't', 'a', 'b', 'l', 'e'
-#define WASM_SECTION_FUNCTION_SIGNATURES                                    \
-  19, 'f', 'u', 'n', 'c', 't', 'i', 'o', 'n', '_', 's', 'i', 'g', 'n', 'a', \
-      't', 'u', 'r', 'e', 's'
-#define WASM_SECTION_FUNCTION_BODIES \
-  15, 'f', 'u', 'n', 'c', 't', 'i', 'o', 'n', '_', 'b', 'o', 'd', 'i', 'e', 's'
-#define WASM_SECTION_NAMES 5, 'n', 'a', 'm', 'e', 's'
+#define WASM_SECTION_START_FUNCTION 5, 's', 't', 'a', 'r', 't'
+#define WASM_SECTION_IMPORT_TABLE 6, 'i', 'm', 'p', 'o', 'r', 't'
+#define WASM_SECTION_EXPORT_TABLE 6, 'e', 'x', 'p', 'o', 'r', 't'
+#define WASM_SECTION_FUNCTION_SIGNATURES \
+  8, 'f', 'u', 'n', 'c', 't', 'i', 'o', 'n'
+#define WASM_SECTION_FUNCTION_BODIES 4, 'c', 'o', 'd', 'e'
+#define WASM_SECTION_NAMES 4, 'n', 'a', 'm', 'e'
 
 // Constants for the above section headers' size (LEB128 + characters).
 #define WASM_SECTION_MEMORY_SIZE ((size_t)7)
-#define WASM_SECTION_SIGNATURES_SIZE ((size_t)11)
-#define WASM_SECTION_FUNCTIONS_SIZE ((size_t)10)
-#define WASM_SECTION_GLOBALS_SIZE ((size_t)8)
-#define WASM_SECTION_DATA_SEGMENTS_SIZE ((size_t)14)
-#define WASM_SECTION_FUNCTION_TABLE_SIZE ((size_t)15)
+#define WASM_SECTION_SIGNATURES_SIZE ((size_t)5)
+#define WASM_SECTION_OLD_FUNCTIONS_SIZE ((size_t)13)
+#define WASM_SECTION_GLOBALS_SIZE ((size_t)7)
+#define WASM_SECTION_DATA_SEGMENTS_SIZE ((size_t)5)
+#define WASM_SECTION_FUNCTION_TABLE_SIZE ((size_t)6)
 #define WASM_SECTION_END_SIZE ((size_t)4)
-#define WASM_SECTION_START_FUNCTION_SIZE ((size_t)15)
-#define WASM_SECTION_IMPORT_TABLE_SIZE ((size_t)13)
-#define WASM_SECTION_EXPORT_TABLE_SIZE ((size_t)13)
-#define WASM_SECTION_FUNCTION_SIGNATURES_SIZE ((size_t)20)
-#define WASM_SECTION_FUNCTION_BODIES_SIZE ((size_t)16)
-#define WASM_SECTION_NAMES_SIZE ((size_t)6)
+#define WASM_SECTION_START_FUNCTION_SIZE ((size_t)6)
+#define WASM_SECTION_IMPORT_TABLE_SIZE ((size_t)7)
+#define WASM_SECTION_EXPORT_TABLE_SIZE ((size_t)7)
+#define WASM_SECTION_FUNCTION_SIGNATURES_SIZE ((size_t)9)
+#define WASM_SECTION_FUNCTION_BODIES_SIZE ((size_t)5)
+#define WASM_SECTION_NAMES_SIZE ((size_t)5)
 
 struct WasmSection {
   enum class Code : uint32_t {
-#define F(enumerator, string) enumerator,
+#define F(enumerator, order, string) enumerator,
     FOR_EACH_WASM_SECTION_TYPE(F)
 #undef F
         Max
@@ -94,13 +89,13 @@
   static WasmSection::Code end();
   static WasmSection::Code next(WasmSection::Code code);
   static const char* getName(Code code);
+  static int getOrder(Code code);
   static size_t getNameLength(Code code);
+  static WasmSection::Code lookup(const byte* string, uint32_t length);
 };
 
 enum WasmFunctionDeclBit {
   kDeclFunctionName = 0x01,
-  kDeclFunctionImport = 0x02,
-  kDeclFunctionLocals = 0x04,
   kDeclFunctionExport = 0x08
 };
 
@@ -108,6 +103,8 @@
 static const size_t kDeclMemorySize = 3;
 static const size_t kDeclDataSegmentSize = 13;
 
+static const uint32_t kMaxReturnCount = 1;
+
 // Static representation of a WASM function.
 struct WasmFunction {
   FunctionSig* sig;      // signature of the function.
@@ -117,12 +114,7 @@
   uint32_t name_length;  // length in bytes of the name.
   uint32_t code_start_offset;    // offset in the module bytes of code start.
   uint32_t code_end_offset;      // offset in the module bytes of code end.
-  uint16_t local_i32_count;      // number of i32 local variables.
-  uint16_t local_i64_count;      // number of i64 local variables.
-  uint16_t local_f32_count;      // number of f32 local variables.
-  uint16_t local_f64_count;      // number of f64 local variables.
   bool exported;                 // true if this function is exported.
-  bool external;  // true if this function is externally supplied.
 };
 
 // Static representation of an imported WASM function.
@@ -191,24 +183,38 @@
   WasmName GetName(uint32_t offset, uint32_t length) const {
     if (length == 0) return {"<?>", 3};  // no name.
     CHECK(BoundsCheck(offset, offset + length));
-    return {reinterpret_cast<const char*>(module_start + offset), length};
+    DCHECK_GE(static_cast<int>(length), 0);
+    return {reinterpret_cast<const char*>(module_start + offset),
+            static_cast<int>(length)};
+  }
+
+  // Get a string stored in the module bytes representing a function name.
+  WasmName GetName(WasmFunction* function) const {
+    return GetName(function->name_offset, function->name_length);
   }
 
   // Get a string stored in the module bytes representing a name.
   WasmName GetNameOrNull(uint32_t offset, uint32_t length) const {
     if (length == 0) return {NULL, 0};  // no name.
     CHECK(BoundsCheck(offset, offset + length));
-    return {reinterpret_cast<const char*>(module_start + offset), length};
+    DCHECK_GE(static_cast<int>(length), 0);
+    return {reinterpret_cast<const char*>(module_start + offset),
+            static_cast<int>(length)};
+  }
+
+  // Get a string stored in the module bytes representing a function name.
+  WasmName GetNameOrNull(WasmFunction* function) const {
+    return GetNameOrNull(function->name_offset, function->name_length);
   }
 
   // Checks the given offset range is contained within the module bytes.
   bool BoundsCheck(uint32_t start, uint32_t end) const {
     size_t size = module_end - module_start;
-    return start < size && end < size;
+    return start <= size && end <= size;
   }
 
   // Creates a new instantiation of the module in the given isolate.
-  MaybeHandle<JSObject> Instantiate(Isolate* isolate, Handle<JSObject> ffi,
+  MaybeHandle<JSObject> Instantiate(Isolate* isolate, Handle<JSReceiver> ffi,
                                     Handle<JSArrayBuffer> memory);
 };
 
@@ -318,6 +324,11 @@
 // given decoded module.
 int32_t CompileAndRunWasmModule(Isolate* isolate, WasmModule* module);
 
+// Extract a function name from the given wasm object.
+// Returns undefined if the function is unnamed or the function index is
+// invalid.
+Handle<Object> GetWasmFunctionName(Handle<JSObject> wasm, uint32_t func_index);
+
 }  // namespace wasm
 }  // namespace internal
 }  // namespace v8
diff --git a/src/wasm/wasm-opcodes.cc b/src/wasm/wasm-opcodes.cc
index 736c4d9..a08fa8d 100644
--- a/src/wasm/wasm-opcodes.cc
+++ b/src/wasm/wasm-opcodes.cc
@@ -3,6 +3,7 @@
 // found in the LICENSE file.
 
 #include "src/wasm/wasm-opcodes.h"
+#include "src/messages.h"
 #include "src/signature.h"
 
 namespace v8 {
@@ -24,6 +25,18 @@
   return "Unknown";
 }
 
+const char* WasmOpcodes::ShortOpcodeName(WasmOpcode opcode) {
+  switch (opcode) {
+#define DECLARE_NAME_CASE(name, opcode, sig) \
+  case kExpr##name:                          \
+    return #name;
+    FOREACH_OPCODE(DECLARE_NAME_CASE)
+#undef DECLARE_NAME_CASE
+    default:
+      break;
+  }
+  return "Unknown";
+}
 
 std::ostream& operator<<(std::ostream& os, const FunctionSig& sig) {
   if (sig.return_count() == 0) os << "v";
@@ -38,13 +51,10 @@
   return os;
 }
 
-
 #define DECLARE_SIG_ENUM(name, ...) kSigEnum_##name,
 
-
 enum WasmOpcodeSig { FOREACH_SIGNATURE(DECLARE_SIG_ENUM) };
 
-
 // TODO(titzer): not static-initializer safe. Wrap in LazyInstance.
 #define DECLARE_SIG(name, ...)                      \
   static LocalType kTypes_##name[] = {__VA_ARGS__}; \
@@ -60,7 +70,6 @@
 
 static byte kSimpleExprSigTable[256];
 
-
 // Initialize the signature table.
 static void InitSigTable() {
 #define SET_SIG_TABLE(name, opcode, sig) \
@@ -70,15 +79,24 @@
 #undef SET_SIG_TABLE
 }
 
+class SigTable {
+ public:
+  SigTable() {
+    // TODO(ahaas): Move {InitSigTable} into the class.
+    InitSigTable();
+  }
+  FunctionSig* Signature(WasmOpcode opcode) const {
+    return const_cast<FunctionSig*>(
+        kSimpleExprSigs[kSimpleExprSigTable[static_cast<byte>(opcode)]]);
+  }
+};
+
+static base::LazyInstance<SigTable>::type sig_table = LAZY_INSTANCE_INITIALIZER;
 
 FunctionSig* WasmOpcodes::Signature(WasmOpcode opcode) {
-  // TODO(titzer): use LazyInstance to make this thread safe.
-  if (kSimpleExprSigTable[kExprI32Add] == 0) InitSigTable();
-  return const_cast<FunctionSig*>(
-      kSimpleExprSigs[kSimpleExprSigTable[static_cast<byte>(opcode)]]);
+  return sig_table.Get().Signature(opcode);
 }
 
-
 // TODO(titzer): pull WASM_64 up to a common header.
 #if !V8_TARGET_ARCH_32_BIT || V8_TARGET_ARCH_X64
 #define WASM_64 1
@@ -86,64 +104,20 @@
 #define WASM_64 0
 #endif
 
-
-bool WasmOpcodes::IsSupported(WasmOpcode opcode) {
-#if !WASM_64
-  switch (opcode) {
-    // Opcodes not supported on 32-bit platforms.
-    case kExprI64Add:
-    case kExprI64Sub:
-    case kExprI64Mul:
-    case kExprI64DivS:
-    case kExprI64DivU:
-    case kExprI64RemS:
-    case kExprI64RemU:
-    case kExprI64And:
-    case kExprI64Ior:
-    case kExprI64Xor:
-    case kExprI64Shl:
-    case kExprI64ShrU:
-    case kExprI64ShrS:
-    case kExprI64Ror:
-    case kExprI64Rol:
-    case kExprI64Eq:
-    case kExprI64Ne:
-    case kExprI64LtS:
-    case kExprI64LeS:
-    case kExprI64LtU:
-    case kExprI64LeU:
-    case kExprI64GtS:
-    case kExprI64GeS:
-    case kExprI64GtU:
-    case kExprI64GeU:
-
-    case kExprI32ConvertI64:
-    case kExprI64SConvertI32:
-    case kExprI64UConvertI32:
-
-    case kExprF64ReinterpretI64:
-    case kExprI64ReinterpretF64:
-
-    case kExprI64Clz:
-    case kExprI64Ctz:
-    case kExprI64Popcnt:
-
-    case kExprF32SConvertI64:
-    case kExprF32UConvertI64:
-    case kExprF64SConvertI64:
-    case kExprF64UConvertI64:
-    case kExprI64SConvertF32:
-    case kExprI64SConvertF64:
-    case kExprI64UConvertF32:
-    case kExprI64UConvertF64:
-
-      return false;
+int WasmOpcodes::TrapReasonToMessageId(TrapReason reason) {
+  switch (reason) {
+#define TRAPREASON_TO_MESSAGE(name) \
+  case k##name:                     \
+    return MessageTemplate::kWasm##name;
+    FOREACH_WASM_TRAPREASON(TRAPREASON_TO_MESSAGE)
+#undef TRAPREASON_TO_MESSAGE
     default:
-      return true;
+      return MessageTemplate::kNone;
   }
-#else
-  return true;
-#endif
+}
+
+const char* WasmOpcodes::TrapReasonMessage(TrapReason reason) {
+  return MessageTemplate::TemplateString(TrapReasonToMessageId(reason));
 }
 }  // namespace wasm
 }  // namespace internal
diff --git a/src/wasm/wasm-opcodes.h b/src/wasm/wasm-opcodes.h
index 52f85aa..764c503 100644
--- a/src/wasm/wasm-opcodes.h
+++ b/src/wasm/wasm-opcodes.h
@@ -49,12 +49,10 @@
 typedef Signature<LocalType> FunctionSig;
 std::ostream& operator<<(std::ostream& os, const FunctionSig& function);
 
-struct WasmName {
-  const char* name;
-  uint32_t length;
-};
+typedef Vector<const char> WasmName;
 
-// TODO(titzer): Renumber all the opcodes to fill in holes.
+typedef int WasmCodePosition;
+const WasmCodePosition kNoCodePosition = -1;
 
 // Control expressions and blocks.
 #define FOREACH_CONTROL_OPCODE(V) \
@@ -62,29 +60,29 @@
   V(Block, 0x01, _)               \
   V(Loop, 0x02, _)                \
   V(If, 0x03, _)                  \
-  V(IfElse, 0x04, _)              \
+  V(Else, 0x04, _)                \
   V(Select, 0x05, _)              \
   V(Br, 0x06, _)                  \
   V(BrIf, 0x07, _)                \
   V(BrTable, 0x08, _)             \
-  V(Return, 0x14, _)              \
-  V(Unreachable, 0x15, _)
+  V(Return, 0x09, _)              \
+  V(Unreachable, 0x0a, _)         \
+  V(End, 0x0F, _)
 
 // Constants, locals, globals, and calls.
 #define FOREACH_MISC_OPCODE(V) \
-  V(I8Const, 0x09, _)          \
-  V(I32Const, 0x0a, _)         \
-  V(I64Const, 0x0b, _)         \
-  V(F64Const, 0x0c, _)         \
-  V(F32Const, 0x0d, _)         \
-  V(GetLocal, 0x0e, _)         \
-  V(SetLocal, 0x0f, _)         \
-  V(LoadGlobal, 0x10, _)       \
-  V(StoreGlobal, 0x11, _)      \
-  V(CallFunction, 0x12, _)     \
-  V(CallIndirect, 0x13, _)     \
-  V(CallImport, 0x1F, _)       \
-  V(DeclLocals, 0x1E, _)
+  V(I32Const, 0x10, _)         \
+  V(I64Const, 0x11, _)         \
+  V(F64Const, 0x12, _)         \
+  V(F32Const, 0x13, _)         \
+  V(GetLocal, 0x14, _)         \
+  V(SetLocal, 0x15, _)         \
+  V(CallFunction, 0x16, _)     \
+  V(CallIndirect, 0x17, _)     \
+  V(CallImport, 0x18, _)       \
+  V(I8Const, 0xcb, _)          \
+  V(LoadGlobal, 0xcc, _)       \
+  V(StoreGlobal, 0xcd, _)
 
 // Load memory expressions.
 #define FOREACH_LOAD_MEM_OPCODE(V) \
@@ -258,29 +256,27 @@
   V(F64Log, 0xc7, d_d)                 \
   V(F64Atan2, 0xc8, d_dd)              \
   V(F64Pow, 0xc9, d_dd)                \
-  V(F64Mod, 0xca, d_dd)
-
-// TODO(titzer): sketch of asm-js compatibility bytecodes
-/* V(I32AsmjsDivS, 0xd0, i_ii)          \ */
-/* V(I32AsmjsDivU, 0xd1, i_ii)          \ */
-/* V(I32AsmjsRemS, 0xd2, i_ii)          \ */
-/* V(I32AsmjsRemU, 0xd3, i_ii)          \ */
-/* V(I32AsmjsLoad8S, 0xd4, i_i)         \ */
-/* V(I32AsmjsLoad8U, 0xd5, i_i)         \ */
-/* V(I32AsmjsLoad16S, 0xd6, i_i)        \ */
-/* V(I32AsmjsLoad16U, 0xd7, i_i)        \ */
-/* V(I32AsmjsLoad, 0xd8, i_i)           \ */
-/* V(F32AsmjsLoad, 0xd9, f_i)           \ */
-/* V(F64AsmjsLoad, 0xda, d_i)           \ */
-/* V(I32AsmjsStore8, 0xdb, i_i)         \ */
-/* V(I32AsmjsStore16, 0xdc, i_i)        \ */
-/* V(I32AsmjsStore, 0xdd, i_ii)         \ */
-/* V(F32AsmjsStore, 0xde, i_if)         \ */
-/* V(F64AsmjsStore, 0xdf, i_id)         \ */
-/* V(I32SAsmjsConvertF32, 0xe0, i_f)    \ */
-/* V(I32UAsmjsConvertF32, 0xe1, i_f)    \ */
-/* V(I32SAsmjsConvertF64, 0xe2, i_d)    \ */
-/* V(I32SAsmjsConvertF64, 0xe3, i_d) */
+  V(F64Mod, 0xca, d_dd)                \
+  V(I32AsmjsDivS, 0xd0, i_ii)          \
+  V(I32AsmjsDivU, 0xd1, i_ii)          \
+  V(I32AsmjsRemS, 0xd2, i_ii)          \
+  V(I32AsmjsRemU, 0xd3, i_ii)          \
+  V(I32AsmjsLoadMem8S, 0xd4, i_i)      \
+  V(I32AsmjsLoadMem8U, 0xd5, i_i)      \
+  V(I32AsmjsLoadMem16S, 0xd6, i_i)     \
+  V(I32AsmjsLoadMem16U, 0xd7, i_i)     \
+  V(I32AsmjsLoadMem, 0xd8, i_i)        \
+  V(F32AsmjsLoadMem, 0xd9, f_i)        \
+  V(F64AsmjsLoadMem, 0xda, d_i)        \
+  V(I32AsmjsStoreMem8, 0xdb, i_ii)     \
+  V(I32AsmjsStoreMem16, 0xdc, i_ii)    \
+  V(I32AsmjsStoreMem, 0xdd, i_ii)      \
+  V(F32AsmjsStoreMem, 0xde, f_if)      \
+  V(F64AsmjsStoreMem, 0xdf, d_id)      \
+  V(I32AsmjsSConvertF32, 0xe0, i_f)    \
+  V(I32AsmjsUConvertF32, 0xe1, i_f)    \
+  V(I32AsmjsSConvertF64, 0xe2, i_d)    \
+  V(I32AsmjsUConvertF64, 0xe3, i_d)
 
 // All opcodes.
 #define FOREACH_OPCODE(V)     \
@@ -330,25 +326,33 @@
 };
 
 // The reason for a trap.
+#define FOREACH_WASM_TRAPREASON(V) \
+  V(TrapUnreachable)          \
+  V(TrapMemOutOfBounds)       \
+  V(TrapDivByZero)            \
+  V(TrapDivUnrepresentable)   \
+  V(TrapRemByZero)            \
+  V(TrapFloatUnrepresentable) \
+  V(TrapFuncInvalid)          \
+  V(TrapFuncSigMismatch)
+
 enum TrapReason {
-  kTrapUnreachable,
-  kTrapMemOutOfBounds,
-  kTrapDivByZero,
-  kTrapDivUnrepresentable,
-  kTrapRemByZero,
-  kTrapFloatUnrepresentable,
-  kTrapFuncInvalid,
-  kTrapFuncSigMismatch,
+#define DECLARE_ENUM(name) k##name,
+  FOREACH_WASM_TRAPREASON(DECLARE_ENUM)
   kTrapCount
+#undef DECLARE_ENUM
 };
 
 // A collection of opcode-related static methods.
 class WasmOpcodes {
  public:
-  static bool IsSupported(WasmOpcode opcode);
   static const char* OpcodeName(WasmOpcode opcode);
+  static const char* ShortOpcodeName(WasmOpcode opcode);
   static FunctionSig* Signature(WasmOpcode opcode);
 
+  static int TrapReasonToMessageId(TrapReason reason);
+  static const char* TrapReasonMessage(TrapReason reason);
+
   static byte MemSize(MachineType type) {
     return 1 << ElementSizeLog2Of(type.representation());
   }
@@ -508,29 +512,6 @@
         return "<unknown>";
     }
   }
-
-  static const char* TrapReasonName(TrapReason reason) {
-    switch (reason) {
-      case kTrapUnreachable:
-        return "unreachable";
-      case kTrapMemOutOfBounds:
-        return "memory access out of bounds";
-      case kTrapDivByZero:
-        return "divide by zero";
-      case kTrapDivUnrepresentable:
-        return "divide result unrepresentable";
-      case kTrapRemByZero:
-        return "remainder by zero";
-      case kTrapFloatUnrepresentable:
-        return "integer result unrepresentable";
-      case kTrapFuncInvalid:
-        return "invalid function";
-      case kTrapFuncSigMismatch:
-        return "function signature mismatch";
-      default:
-        return "<?>";
-    }
-  }
 };
 }  // namespace wasm
 }  // namespace internal
diff --git a/src/wasm/wasm-result.cc b/src/wasm/wasm-result.cc
index 4fd17ee..3de5812 100644
--- a/src/wasm/wasm-result.cc
+++ b/src/wasm/wasm-result.cc
@@ -28,7 +28,6 @@
   return os;
 }
 
-
 void ErrorThrower::Error(const char* format, ...) {
   if (error_) return;  // only report the first error.
   error_ = true;
diff --git a/src/wasm/wasm-result.h b/src/wasm/wasm-result.h
index 59ab29e..b650c33 100644
--- a/src/wasm/wasm-result.h
+++ b/src/wasm/wasm-result.h
@@ -5,6 +5,7 @@
 #ifndef V8_WASM_RESULT_H_
 #define V8_WASM_RESULT_H_
 
+#include "src/base/compiler-specific.h"
 #include "src/base/smart-pointers.h"
 
 #include "src/globals.h"
@@ -93,13 +94,13 @@
   ErrorThrower(Isolate* isolate, const char* context)
       : isolate_(isolate), context_(context), error_(false) {}
 
-  void Error(const char* fmt, ...);
+  PRINTF_FORMAT(2, 3) void Error(const char* fmt, ...);
 
   template <typename T>
   void Failed(const char* error, Result<T>& result) {
     std::ostringstream str;
     str << error << result;
-    return Error(str.str().c_str());
+    return Error("%s", str.str().c_str());
   }
 
   bool error() const { return error_; }