Version 1.3.3

Fix issue 417: incorrect %t placeholder expansion.

Add .gitignore file similar to Chromium's one.

Fix SConstruct file to build with new logging code for Android.

API: added function to find instance of template in prototype
chain.  Inlined Object::IsInstanceOf.

Land change to notify valgrind when we modify code on x86.

Add api call to determine whether a string can be externalized.

Add a write() command to d8.


git-svn-id: http://v8.googlecode.com/svn/trunk@2669 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..685e9a2
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,21 @@
+*.a
+*.exe
+*.lib
+*.log
+*.map
+*.mk
+*.ncb
+*.pyc
+*.scons*
+*.suo
+*.user
+*.xcodeproj
+d8
+d8_g
+shell
+shell_g
+/obj/
+/tools/visual_studio/Debug
+/tools/visual_studio/Release
+/xcodebuild/
+TAGS
diff --git a/ChangeLog b/ChangeLog
index 03b96f1..e786802 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,21 @@
+2009-08-12: Version 1.3.3
+
+        Fix issue 417: incorrect %t placeholder expansion.
+
+        Add .gitignore file similar to Chromium's one.
+
+        Fix SConstruct file to build with new logging code for Android.
+
+        API: added function to find instance of template in prototype
+        chain.  Inlined Object::IsInstanceOf.
+
+        Land change to notify valgrind when we modify code on x86.
+
+        Add api call to determine whether a string can be externalized.
+
+        Add a write() command to d8.
+
+
 2009-08-05: Version 1.3.2
 
         Started new compiler infrastructure for two-pass compilation using a
diff --git a/LICENSE b/LICENSE
index 7301556..553cf47 100644
--- a/LICENSE
+++ b/LICENSE
@@ -20,6 +20,12 @@
     copyrighted by Douglas Crockford and Baruch Even and released under
     an MIT license.
 
+  - Valgrind client API header, located at third_party/valgrind/valgrind.h
+    This is release under the BSD license.
+
+  - Valgrind client API header, located at third_party/valgrind/valgrind.h
+    This is release under the BSD license.
+
 These libraries have their own licenses; we recommend you read them,
 as their terms may differ from the terms below.
 
diff --git a/SConstruct b/SConstruct
index 8b48ad4..c981ef9 100644
--- a/SConstruct
+++ b/SConstruct
@@ -79,7 +79,9 @@
                     ANDROID_TOP + '/bionic/libc/kernel/arch-arm',
                     ANDROID_TOP + '/bionic/libm/include',
                     ANDROID_TOP + '/bionic/libm/include/arch/arm',
-                    ANDROID_TOP + '/bionic/libthread_db/include']
+                    ANDROID_TOP + '/bionic/libthread_db/include',
+                    ANDROID_TOP + '/frameworks/base/include',
+                    ANDROID_TOP + '/system/core/include']
 
 ANDROID_LINKFLAGS = ['-nostdlib',
                      '-Bdynamic',
@@ -331,7 +333,7 @@
       'CPPPATH':      ANDROID_INCLUDES,
       'LIBPATH':     [ANDROID_TOP + '/out/target/product/generic/obj/lib'],
       'LINKFLAGS':    ANDROID_LINKFLAGS,
-      'LIBS':         ['c', 'stdc++', 'm'],
+      'LIBS':         ['log', 'c', 'stdc++', 'm'],
       'mode:release': {
         'CPPDEFINES': ['SK_RELEASE', 'NDEBUG']
       }
@@ -382,7 +384,7 @@
       'CPPPATH':      ANDROID_INCLUDES,
       'LIBPATH':     [ANDROID_TOP + '/out/target/product/generic/obj/lib'],
       'LINKFLAGS':    ANDROID_LINKFLAGS,
-      'LIBS':         ['c', 'stdc++', 'm'],
+      'LIBS':         ['log', 'c', 'stdc++', 'm'],
       'mode:release': {
         'CPPDEFINES': ['SK_RELEASE', 'NDEBUG']
       }
@@ -470,7 +472,7 @@
     'os:android': {
       'LIBPATH':     [ANDROID_TOP + '/out/target/product/generic/obj/lib'],
       'LINKFLAGS':    ANDROID_LINKFLAGS,
-      'LIBS':         ['c', 'stdc++', 'm'],
+      'LIBS':         ['log', 'c', 'stdc++', 'm'],
     },
     'os:win32': {
       'LIBS': ['winmm', 'ws2_32'],
diff --git a/include/v8.h b/include/v8.h
index 83d5bed..23c4425 100644
--- a/include/v8.h
+++ b/include/v8.h
@@ -901,6 +901,11 @@
    */
   bool MakeExternal(ExternalAsciiStringResource* resource);
 
+  /**
+   * Returns true if this string can be made external.
+   */
+  bool CanMakeExternal();
+
   /** Creates an undetectable string from the supplied ascii or utf-8 data.*/
   static Local<String> NewUndetectable(const char* data, int length = -1);
 
diff --git a/src/api.cc b/src/api.cc
index 0cbbb46..d02a2e4 100644
--- a/src/api.cc
+++ b/src/api.cc
@@ -2589,9 +2589,12 @@
   i::Handle<i::Context> env;
   {
     ENTER_V8;
+#if defined(ANDROID)
+    // On mobile devices, full GC is expensive.
+#else
     // Give the heap a chance to cleanup if we've disposed contexts.
     i::Heap::CollectAllGarbageIfContextDisposed();
-
+#endif
     v8::Handle<ObjectTemplate> proxy_template = global_template;
     i::Handle<i::FunctionTemplateInfo> proxy_constructor;
     i::Handle<i::FunctionTemplateInfo> global_constructor;
@@ -2983,7 +2986,7 @@
   if (IsDeadCheck("v8::String::MakeExternal()")) return false;
   if (this->IsExternal()) return false;  // Already an external string.
   ENTER_V8;
-  i::Handle <i::String> obj = Utils::OpenHandle(this);
+  i::Handle<i::String> obj = Utils::OpenHandle(this);
   bool result = obj->MakeExternal(resource);
   if (result && !obj->IsSymbol()) {
     // Operation was successful and the string is not a symbol. In this case
@@ -3019,7 +3022,7 @@
   if (IsDeadCheck("v8::String::MakeExternal()")) return false;
   if (this->IsExternal()) return false;  // Already an external string.
   ENTER_V8;
-  i::Handle <i::String> obj = Utils::OpenHandle(this);
+  i::Handle<i::String> obj = Utils::OpenHandle(this);
   bool result = obj->MakeExternal(resource);
   if (result && !obj->IsSymbol()) {
     // Operation was successful and the string is not a symbol. In this case
@@ -3034,6 +3037,17 @@
 }
 
 
+bool v8::String::CanMakeExternal() {
+  if (IsDeadCheck("v8::String::CanMakeExternal()")) return false;
+  i::Handle<i::String> obj = Utils::OpenHandle(this);
+  int size = obj->Size();  // Byte size of the original string.
+  if (size < i::ExternalString::kSize)
+    return false;
+  i::StringShape shape(*obj);
+  return !shape.IsExternal();
+}
+
+
 Local<v8::Object> v8::Object::New() {
   EnsureInitialized("v8::Object::New()");
   LOG_API("Object::New");
diff --git a/src/arm/cfg-arm.cc b/src/arm/cfg-arm.cc
index 109067b..34e64b3 100644
--- a/src/arm/cfg-arm.cc
+++ b/src/arm/cfg-arm.cc
@@ -29,6 +29,7 @@
 
 #include "cfg.h"
 #include "codegen-inl.h"
+#include "codegen-arm.h"  // Include after codegen-inl.h.
 #include "macro-assembler-arm.h"
 
 namespace v8 {
@@ -42,6 +43,14 @@
   {
     Comment cmt(masm, "[ InstructionBlock");
     for (int i = 0, len = instructions_.length(); i < len; i++) {
+      // If the location of the current instruction is a temp, then the
+      // instruction cannot be in tail position in the block.  Allocate the
+      // temp based on peeking ahead to the next instruction.
+      Instruction* instr = instructions_[i];
+      Location* loc = instr->location();
+      if (loc->is_temporary()) {
+        instructions_[i+1]->FastAllocate(TempLocation::cast(loc));
+      }
       instructions_[i]->Compile(masm);
     }
   }
@@ -91,31 +100,199 @@
 }
 
 
-void ReturnInstr::Compile(MacroAssembler* masm) {
-  Comment cmnt(masm, "[ ReturnInstr");
-  value_->ToRegister(masm, r0);
+void PropLoadInstr::Compile(MacroAssembler* masm) {
+  // The key should not be on the stack---if it is a compiler-generated
+  // temporary it is in the accumulator.
+  ASSERT(!key()->is_on_stack());
+
+  Comment cmnt(masm, "[ Load from Property");
+  // If the key is known at compile-time we may be able to use a load IC.
+  bool is_keyed_load = true;
+  if (key()->is_constant()) {
+    // Still use the keyed load IC if the key can be parsed as an integer so
+    // we will get into the case that handles [] on string objects.
+    Handle<Object> key_val = Constant::cast(key())->handle();
+    uint32_t ignored;
+    if (key_val->IsSymbol() &&
+        !String::cast(*key_val)->AsArrayIndex(&ignored)) {
+      is_keyed_load = false;
+    }
+  }
+
+  if (!object()->is_on_stack()) object()->Push(masm);
+
+  if (is_keyed_load) {
+    key()->Push(masm);
+    Handle<Code> ic(Builtins::builtin(Builtins::KeyedLoadIC_Initialize));
+    __ Call(ic, RelocInfo::CODE_TARGET);
+    // Discard key and receiver.
+    __ add(sp, sp, Operand(2 * kPointerSize));
+  } else {
+    key()->Get(masm, r2);
+    Handle<Code> ic(Builtins::builtin(Builtins::LoadIC_Initialize));
+    __ Call(ic, RelocInfo::CODE_TARGET);
+    __ pop();  // Discard receiver.
+  }
+  location()->Set(masm, r0);
 }
 
 
-void Constant::ToRegister(MacroAssembler* masm, Register reg) {
+void BinaryOpInstr::Compile(MacroAssembler* masm) {
+  // The right-hand value should not be on the stack---if it is a
+  // compiler-generated temporary it is in the accumulator.
+  ASSERT(!right()->is_on_stack());
+
+  Comment cmnt(masm, "[ BinaryOpInstr");
+  // We can overwrite one of the operands if it is a temporary.
+  OverwriteMode mode = NO_OVERWRITE;
+  if (left()->is_temporary()) {
+    mode = OVERWRITE_LEFT;
+  } else if (right()->is_temporary()) {
+    mode = OVERWRITE_RIGHT;
+  }
+
+  // Move left to r1 and right to r0.
+  left()->Get(masm, r1);
+  right()->Get(masm, r0);
+  GenericBinaryOpStub stub(op(), mode);
+  __ CallStub(&stub);
+  location()->Set(masm, r0);
+}
+
+
+void ReturnInstr::Compile(MacroAssembler* masm) {
+  // The location should be 'Effect'.  As a side effect, move the value to
+  // the accumulator.
+  Comment cmnt(masm, "[ ReturnInstr");
+  value()->Get(masm, r0);
+}
+
+
+void Constant::Get(MacroAssembler* masm, Register reg) {
   __ mov(reg, Operand(handle_));
 }
 
 
-void SlotLocation::ToRegister(MacroAssembler* masm, Register reg) {
-  switch (type_) {
+void Constant::Push(MacroAssembler* masm) {
+  __ mov(ip, Operand(handle_));
+  __ push(ip);
+}
+
+
+static MemOperand ToMemOperand(SlotLocation* loc) {
+  switch (loc->type()) {
     case Slot::PARAMETER: {
       int count = CfgGlobals::current()->fun()->scope()->num_parameters();
-      __ ldr(reg, MemOperand(fp, (1 + count - index_) * kPointerSize));
-      break;
+      return MemOperand(fp, (1 + count - loc->index()) * kPointerSize);
     }
     case Slot::LOCAL: {
       const int kOffset = JavaScriptFrameConstants::kLocal0Offset;
-      __ ldr(reg, MemOperand(fp, kOffset - index_ * kPointerSize));
-      break;
+      return MemOperand(fp, kOffset - loc->index() * kPointerSize);
     }
     default:
       UNREACHABLE();
+      return MemOperand(r0);
+  }
+}
+
+
+void Constant::MoveToSlot(MacroAssembler* masm, SlotLocation* loc) {
+  __ mov(ip, Operand(handle_));
+  __ str(ip, ToMemOperand(loc));
+}
+
+
+void SlotLocation::Get(MacroAssembler* masm, Register reg) {
+  __ ldr(reg, ToMemOperand(this));
+}
+
+
+void SlotLocation::Set(MacroAssembler* masm, Register reg) {
+  __ str(reg, ToMemOperand(this));
+}
+
+
+void SlotLocation::Push(MacroAssembler* masm) {
+  __ ldr(ip, ToMemOperand(this));
+  __ push(ip);  // Push will not destroy ip.
+}
+
+
+void SlotLocation::Move(MacroAssembler* masm, Value* value) {
+  // Double dispatch.
+  value->MoveToSlot(masm, this);
+}
+
+
+void SlotLocation::MoveToSlot(MacroAssembler* masm, SlotLocation* loc) {
+  __ ldr(ip, ToMemOperand(this));
+  __ str(ip, ToMemOperand(loc));
+}
+
+
+void TempLocation::Get(MacroAssembler* masm, Register reg) {
+  switch (where_) {
+    case ACCUMULATOR:
+      if (!reg.is(r0)) __ mov(reg, r0);
+      break;
+    case STACK:
+      __ pop(reg);
+      break;
+    case NOT_ALLOCATED:
+      UNREACHABLE();
+  }
+}
+
+
+void TempLocation::Set(MacroAssembler* masm, Register reg) {
+  switch (where_) {
+    case ACCUMULATOR:
+      if (!reg.is(r0)) __ mov(r0, reg);
+      break;
+    case STACK:
+      __ push(reg);
+      break;
+    case NOT_ALLOCATED:
+      UNREACHABLE();
+  }
+}
+
+
+void TempLocation::Push(MacroAssembler* masm) {
+  switch (where_) {
+    case ACCUMULATOR:
+      __ push(r0);
+      break;
+    case STACK:
+    case NOT_ALLOCATED:
+      UNREACHABLE();
+  }
+}
+
+
+void TempLocation::Move(MacroAssembler* masm, Value* value) {
+  switch (where_) {
+    case ACCUMULATOR:
+      value->Get(masm, r0);
+    case STACK:
+      value->Push(masm);
+      break;
+    case NOT_ALLOCATED:
+      UNREACHABLE();
+  }
+}
+
+
+void TempLocation::MoveToSlot(MacroAssembler* masm, SlotLocation* loc) {
+  switch (where_) {
+    case ACCUMULATOR:
+      __ str(r0, ToMemOperand(loc));
+    case STACK:
+      __ pop(ip);
+      __ str(ip, ToMemOperand(loc));
+      break;
+    case NOT_ALLOCATED:
+      UNREACHABLE();
   }
 }
 
diff --git a/src/arm/codegen-arm-inl.h b/src/arm/codegen-arm-inl.h
index 5a29a45..9ff02cb 100644
--- a/src/arm/codegen-arm-inl.h
+++ b/src/arm/codegen-arm-inl.h
@@ -34,6 +34,37 @@
 
 #define __ ACCESS_MASM(masm_)
 
+void CodeGenerator::LoadConditionAndSpill(Expression* expression,
+                                          TypeofState typeof_state,
+                                          JumpTarget* true_target,
+                                          JumpTarget* false_target,
+                                          bool force_control) {
+  LoadCondition(expression, typeof_state, true_target, false_target,
+                force_control);
+}
+
+
+void CodeGenerator::LoadAndSpill(Expression* expression,
+                                 TypeofState typeof_state) {
+  Load(expression, typeof_state);
+}
+
+
+void CodeGenerator::VisitAndSpill(Statement* statement) {
+  Visit(statement);
+}
+
+
+void CodeGenerator::VisitStatementsAndSpill(ZoneList<Statement*>* statements) {
+  VisitStatements(statements);
+}
+
+
+void Reference::GetValueAndSpill(TypeofState typeof_state) {
+  GetValue(typeof_state);
+}
+
+
 // Platform-specific inline functions.
 
 void DeferredCode::Jump() { __ jmp(&entry_label_); }
diff --git a/src/arm/codegen-arm.cc b/src/arm/codegen-arm.cc
index 5f8149e..d3507ec 100644
--- a/src/arm/codegen-arm.cc
+++ b/src/arm/codegen-arm.cc
@@ -133,8 +133,7 @@
       allocator_(NULL),
       cc_reg_(al),
       state_(NULL),
-      function_return_is_shadowed_(false),
-      in_spilled_code_(false) {
+      function_return_is_shadowed_(false) {
 }
 
 
@@ -156,7 +155,6 @@
   ASSERT(frame_ == NULL);
   frame_ = new VirtualFrame();
   cc_reg_ = al;
-  set_in_spilled_code(false);
   {
     CodeGenState state(this);
 
@@ -423,22 +421,6 @@
 }
 
 
-void CodeGenerator::LoadConditionAndSpill(Expression* expression,
-                                          TypeofState typeof_state,
-                                          JumpTarget* true_target,
-                                          JumpTarget* false_target,
-                                          bool force_control) {
-  ASSERT(in_spilled_code());
-  set_in_spilled_code(false);
-  LoadCondition(expression, typeof_state, true_target, false_target,
-                force_control);
-  if (frame_ != NULL) {
-    frame_->SpillAll();
-  }
-  set_in_spilled_code(true);
-}
-
-
 // Loads a value on TOS. If it is a boolean value, the result may have been
 // (partially) translated into branches, or it may have set the condition
 // code register. If force_cc is set, the value is forced to set the
@@ -450,7 +432,6 @@
                                   JumpTarget* true_target,
                                   JumpTarget* false_target,
                                   bool force_cc) {
-  ASSERT(!in_spilled_code());
   ASSERT(!has_cc());
   int original_height = frame_->height();
 
@@ -484,21 +465,10 @@
 }
 
 
-void CodeGenerator::LoadAndSpill(Expression* expression,
-                                 TypeofState typeof_state) {
-  ASSERT(in_spilled_code());
-  set_in_spilled_code(false);
-  Load(expression, typeof_state);
-  frame_->SpillAll();
-  set_in_spilled_code(true);
-}
-
-
 void CodeGenerator::Load(Expression* x, TypeofState typeof_state) {
 #ifdef DEBUG
   int original_height = frame_->height();
 #endif
-  ASSERT(!in_spilled_code());
   JumpTarget true_target;
   JumpTarget false_target;
   LoadCondition(x, typeof_state, &true_target, &false_target, false);
@@ -697,96 +667,6 @@
 }
 
 
-class GenericBinaryOpStub : public CodeStub {
- public:
-  GenericBinaryOpStub(Token::Value op,
-                      OverwriteMode mode,
-                      int constant_rhs = CodeGenerator::kUnknownIntValue)
-      : op_(op),
-        mode_(mode),
-        constant_rhs_(constant_rhs),
-        specialized_on_rhs_(RhsIsOneWeWantToOptimizeFor(op, constant_rhs)) { }
-
- private:
-  Token::Value op_;
-  OverwriteMode mode_;
-  int constant_rhs_;
-  bool specialized_on_rhs_;
-
-  static const int kMaxKnownRhs = 0x40000000;
-
-  // Minor key encoding in 16 bits.
-  class ModeBits: public BitField<OverwriteMode, 0, 2> {};
-  class OpBits: public BitField<Token::Value, 2, 6> {};
-  class KnownIntBits: public BitField<int, 8, 8> {};
-
-  Major MajorKey() { return GenericBinaryOp; }
-  int MinorKey() {
-    // Encode the parameters in a unique 16 bit value.
-    return OpBits::encode(op_)
-           | ModeBits::encode(mode_)
-           | KnownIntBits::encode(MinorKeyForKnownInt());
-  }
-
-  void Generate(MacroAssembler* masm);
-  void HandleNonSmiBitwiseOp(MacroAssembler* masm);
-
-  static bool RhsIsOneWeWantToOptimizeFor(Token::Value op, int constant_rhs) {
-    if (constant_rhs == CodeGenerator::kUnknownIntValue) return false;
-    if (op == Token::DIV) return constant_rhs >= 2 && constant_rhs <= 3;
-    if (op == Token::MOD) {
-      if (constant_rhs <= 1) return false;
-      if (constant_rhs <= 10) return true;
-      if (constant_rhs <= kMaxKnownRhs && IsPowerOf2(constant_rhs)) return true;
-      return false;
-    }
-    return false;
-  }
-
-  int MinorKeyForKnownInt() {
-    if (!specialized_on_rhs_) return 0;
-    if (constant_rhs_ <= 10) return constant_rhs_ + 1;
-    ASSERT(IsPowerOf2(constant_rhs_));
-    int key = 12;
-    int d = constant_rhs_;
-    while ((d & 1) == 0) {
-      key++;
-      d >>= 1;
-    }
-    return key;
-  }
-
-  const char* GetName() {
-    switch (op_) {
-      case Token::ADD: return "GenericBinaryOpStub_ADD";
-      case Token::SUB: return "GenericBinaryOpStub_SUB";
-      case Token::MUL: return "GenericBinaryOpStub_MUL";
-      case Token::DIV: return "GenericBinaryOpStub_DIV";
-      case Token::MOD: return "GenericBinaryOpStub_MOD";
-      case Token::BIT_OR: return "GenericBinaryOpStub_BIT_OR";
-      case Token::BIT_AND: return "GenericBinaryOpStub_BIT_AND";
-      case Token::BIT_XOR: return "GenericBinaryOpStub_BIT_XOR";
-      case Token::SAR: return "GenericBinaryOpStub_SAR";
-      case Token::SHL: return "GenericBinaryOpStub_SHL";
-      case Token::SHR: return "GenericBinaryOpStub_SHR";
-      default:         return "GenericBinaryOpStub";
-    }
-  }
-
-#ifdef DEBUG
-  void Print() {
-    if (!specialized_on_rhs_) {
-      PrintF("GenericBinaryOpStub (%s)\n", Token::String(op_));
-    } else {
-      PrintF("GenericBinaryOpStub (%s by %d)\n",
-             Token::String(op_),
-             constant_rhs_);
-    }
-  }
-#endif
-};
-
-
 void CodeGenerator::GenericBinaryOperation(Token::Value op,
                                            OverwriteMode overwrite_mode,
                                            int constant_rhs) {
@@ -1156,11 +1036,7 @@
   // We call with 0 args because there are 0 on the stack.
   CompareStub stub(cc, strict);
   frame_->CallStub(&stub, 0);
-
-  Result result = allocator_->Allocate(r0);
-  ASSERT(result.is_valid());
-  __ cmp(result.reg(), Operand(0));
-  result.Unuse();
+  __ cmp(r0, Operand(0));
   exit.Jump();
 
   // Do smi comparisons by pointer comparison.
@@ -1236,28 +1112,6 @@
 }
 
 
-void CodeGenerator::VisitAndSpill(Statement* statement) {
-  ASSERT(in_spilled_code());
-  set_in_spilled_code(false);
-  Visit(statement);
-  if (frame_ != NULL) {
-    frame_->SpillAll();
-    }
-  set_in_spilled_code(true);
-}
-
-
-void CodeGenerator::VisitStatementsAndSpill(ZoneList<Statement*>* statements) {
-  ASSERT(in_spilled_code());
-  set_in_spilled_code(false);
-  VisitStatements(statements);
-  if (frame_ != NULL) {
-    frame_->SpillAll();
-  }
-  set_in_spilled_code(true);
-}
-
-
 void CodeGenerator::VisitStatements(ZoneList<Statement*>* statements) {
 #ifdef DEBUG
   int original_height = frame_->height();
@@ -1854,7 +1708,6 @@
 #ifdef DEBUG
   int original_height = frame_->height();
 #endif
-  ASSERT(!in_spilled_code());
   VirtualFrame::SpilledScope spilled_scope;
   Comment cmnt(masm_, "[ ForInStatement");
   CodeForStatementPosition(node);
@@ -1892,9 +1745,8 @@
 
   primitive.Bind();
   frame_->EmitPush(r0);
-  Result arg_count = allocator_->Allocate(r0);
-  ASSERT(arg_count.is_valid());
-  __ mov(arg_count.reg(), Operand(0));
+  Result arg_count(r0);
+  __ mov(r0, Operand(0));
   frame_->InvokeBuiltin(Builtins::TO_OBJECT, CALL_JS, &arg_count, 1);
 
   jsobject.Bind();
@@ -1975,15 +1827,10 @@
   __ ldr(r0, frame_->ElementAt(4));  // push enumerable
   frame_->EmitPush(r0);
   frame_->EmitPush(r3);  // push entry
-  Result arg_count_register = allocator_->Allocate(r0);
-  ASSERT(arg_count_register.is_valid());
-  __ mov(arg_count_register.reg(), Operand(1));
-  Result result = frame_->InvokeBuiltin(Builtins::FILTER_KEY,
-                                        CALL_JS,
-                                        &arg_count_register,
-                                        2);
-  __ mov(r3, Operand(result.reg()));
-  result.Unuse();
+  Result arg_count_reg(r0);
+  __ mov(r0, Operand(1));
+  frame_->InvokeBuiltin(Builtins::FILTER_KEY, CALL_JS, &arg_count_reg, 2);
+  __ mov(r3, Operand(r0));
 
   // If the property has been removed while iterating, we just skip it.
   __ cmp(r3, Operand(Factory::null_value()));
@@ -2576,9 +2423,8 @@
   // Load the global object.
   LoadGlobal();
   // Setup the name register.
-  Result name = allocator_->Allocate(r2);
-  ASSERT(name.is_valid());  // We are in spilled code.
-  __ mov(name.reg(), Operand(slot->var()->name()));
+  Result name(r2);
+  __ mov(r2, Operand(slot->var()->name()));
   // Call IC stub.
   if (typeof_state == INSIDE_TYPEOF) {
     frame_->CallCodeObject(ic, RelocInfo::CODE_TARGET, &name, 0);
@@ -2912,16 +2758,14 @@
 #ifdef DEBUG
   int original_height = frame_->height();
 #endif
-  ASSERT(!in_spilled_code());
   VirtualFrame::SpilledScope spilled_scope;
   // Call runtime routine to allocate the catch extension object and
   // assign the exception value to the catch variable.
   Comment cmnt(masm_, "[ CatchExtensionObject");
   LoadAndSpill(node->key());
   LoadAndSpill(node->value());
-  Result result =
-      frame_->CallRuntime(Runtime::kCreateCatchExtensionObject, 2);
-  frame_->EmitPush(result.reg());
+  frame_->CallRuntime(Runtime::kCreateCatchExtensionObject, 2);
+  frame_->EmitPush(r0);
   ASSERT(frame_->height() == original_height + 1);
 }
 
@@ -3261,24 +3105,22 @@
   }
 
   // r0: the number of arguments.
-  Result num_args = allocator_->Allocate(r0);
-  ASSERT(num_args.is_valid());
-  __ mov(num_args.reg(), Operand(arg_count));
+  Result num_args(r0);
+  __ mov(r0, Operand(arg_count));
 
   // Load the function into r1 as per calling convention.
-  Result function = allocator_->Allocate(r1);
-  ASSERT(function.is_valid());
-  __ ldr(function.reg(), frame_->ElementAt(arg_count + 1));
+  Result function(r1);
+  __ ldr(r1, frame_->ElementAt(arg_count + 1));
 
   // Call the construct call builtin that handles allocation and
   // constructor invocation.
   CodeForSourcePosition(node->position());
   Handle<Code> ic(Builtins::builtin(Builtins::JSConstructCall));
-  Result result = frame_->CallCodeObject(ic,
-                                         RelocInfo::CONSTRUCT_CALL,
-                                         &num_args,
-                                         &function,
-                                         arg_count + 1);
+  frame_->CallCodeObject(ic,
+                         RelocInfo::CONSTRUCT_CALL,
+                         &num_args,
+                         &function,
+                         arg_count + 1);
 
   // Discard old TOS value and push r0 on the stack (same as Pop(), push(r0)).
   __ str(r0, frame_->Top());
@@ -3621,9 +3463,8 @@
     if (property != NULL) {
       LoadAndSpill(property->obj());
       LoadAndSpill(property->key());
-      Result arg_count = allocator_->Allocate(r0);
-      ASSERT(arg_count.is_valid());
-      __ mov(arg_count.reg(), Operand(1));  // not counting receiver
+      Result arg_count(r0);
+      __ mov(r0, Operand(1));  // not counting receiver
       frame_->InvokeBuiltin(Builtins::DELETE, CALL_JS, &arg_count, 2);
 
     } else if (variable != NULL) {
@@ -3632,9 +3473,8 @@
         LoadGlobal();
         __ mov(r0, Operand(variable->name()));
         frame_->EmitPush(r0);
-        Result arg_count = allocator_->Allocate(r0);
-        ASSERT(arg_count.is_valid());
-        __ mov(arg_count.reg(), Operand(1));  // not counting receiver
+        Result arg_count(r0);
+        __ mov(r0, Operand(1));  // not counting receiver
         frame_->InvokeBuiltin(Builtins::DELETE, CALL_JS, &arg_count, 2);
 
       } else if (slot != NULL && slot->type() == Slot::LOOKUP) {
@@ -3647,9 +3487,8 @@
         frame_->EmitPush(r0);
         __ mov(r0, Operand(variable->name()));
         frame_->EmitPush(r0);
-        Result arg_count = allocator_->Allocate(r0);
-        ASSERT(arg_count.is_valid());
-        __ mov(arg_count.reg(), Operand(1));  // not counting receiver
+        Result arg_count(r0);
+        __ mov(r0, Operand(1));  // not counting receiver
         frame_->InvokeBuiltin(Builtins::DELETE, CALL_JS, &arg_count, 2);
 
       } else {
@@ -3700,9 +3539,8 @@
         smi_label.Branch(eq);
 
         frame_->EmitPush(r0);
-        Result arg_count = allocator_->Allocate(r0);
-        ASSERT(arg_count.is_valid());
-        __ mov(arg_count.reg(), Operand(0));  // not counting receiver
+        Result arg_count(r0);
+        __ mov(r0, Operand(0));  // not counting receiver
         frame_->InvokeBuiltin(Builtins::BIT_NOT, CALL_JS, &arg_count, 1);
 
         continue_label.Jump();
@@ -3725,9 +3563,8 @@
         __ tst(r0, Operand(kSmiTagMask));
         continue_label.Branch(eq);
         frame_->EmitPush(r0);
-        Result arg_count = allocator_->Allocate(r0);
-        ASSERT(arg_count.is_valid());
-        __ mov(arg_count.reg(), Operand(0));  // not counting receiver
+        Result arg_count(r0);
+        __ mov(r0, Operand(0));  // not counting receiver
         frame_->InvokeBuiltin(Builtins::TO_NUMBER, CALL_JS, &arg_count, 1);
         continue_label.Bind();
         break;
@@ -3813,9 +3650,8 @@
     {
       // Convert the operand to a number.
       frame_->EmitPush(r0);
-      Result arg_count = allocator_->Allocate(r0);
-      ASSERT(arg_count.is_valid());
-      __ mov(arg_count.reg(), Operand(0));
+      Result arg_count(r0);
+      __ mov(r0, Operand(0));
       frame_->InvokeBuiltin(Builtins::TO_NUMBER, CALL_JS, &arg_count, 1);
     }
     if (is_postfix) {
@@ -4192,14 +4028,10 @@
     case Token::IN: {
       LoadAndSpill(left);
       LoadAndSpill(right);
-      Result arg_count = allocator_->Allocate(r0);
-      ASSERT(arg_count.is_valid());
-      __ mov(arg_count.reg(), Operand(1));  // not counting receiver
-      Result result = frame_->InvokeBuiltin(Builtins::IN,
-                                            CALL_JS,
-                                            &arg_count,
-                                            2);
-      frame_->EmitPush(result.reg());
+      Result arg_count(r0);
+      __ mov(r0, Operand(1));  // not counting receiver
+      frame_->InvokeBuiltin(Builtins::IN, CALL_JS, &arg_count, 2);
+      frame_->EmitPush(r0);
       break;
     }
 
@@ -4207,9 +4039,9 @@
       LoadAndSpill(left);
       LoadAndSpill(right);
       InstanceofStub stub;
-      Result result = frame_->CallStub(&stub, 2);
+      frame_->CallStub(&stub, 2);
       // At this point if instanceof succeeded then r0 == 0.
-      __ tst(result.reg(), Operand(result.reg()));
+      __ tst(r0, Operand(r0));
       cc_reg_ = eq;
       break;
     }
@@ -4248,17 +4080,7 @@
 }
 
 
-void Reference::GetValueAndSpill(TypeofState typeof_state) {
-  ASSERT(cgen_->in_spilled_code());
-  cgen_->set_in_spilled_code(false);
-  GetValue(typeof_state);
-  cgen_->frame()->SpillAll();
-  cgen_->set_in_spilled_code(true);
-}
-
-
 void Reference::GetValue(TypeofState typeof_state) {
-  ASSERT(!cgen_->in_spilled_code());
   ASSERT(cgen_->HasValidEntryRegisters());
   ASSERT(!is_illegal());
   ASSERT(!cgen_->has_cc());
@@ -4289,15 +4111,14 @@
       Variable* var = expression_->AsVariableProxy()->AsVariable();
       Handle<Code> ic(Builtins::builtin(Builtins::LoadIC_Initialize));
       // Setup the name register.
-      Result name_reg = cgen_->allocator()->Allocate(r2);
-      ASSERT(name_reg.is_valid());
-      __ mov(name_reg.reg(), Operand(name));
+      Result name_reg(r2);
+      __ mov(r2, Operand(name));
       ASSERT(var == NULL || var->is_global());
       RelocInfo::Mode rmode = (var == NULL)
                             ? RelocInfo::CODE_TARGET
                             : RelocInfo::CODE_TARGET_CONTEXT;
-      Result answer = frame->CallCodeObject(ic, rmode, &name_reg, 0);
-      frame->EmitPush(answer.reg());
+      frame->CallCodeObject(ic, rmode, &name_reg, 0);
+      frame->EmitPush(r0);
       break;
     }
 
@@ -4316,8 +4137,8 @@
       RelocInfo::Mode rmode = (var == NULL)
                             ? RelocInfo::CODE_TARGET
                             : RelocInfo::CODE_TARGET_CONTEXT;
-      Result answer = frame->CallCodeObject(ic, rmode, 0);
-      frame->EmitPush(answer.reg());
+      frame->CallCodeObject(ic, rmode, 0);
+      frame->EmitPush(r0);
       break;
     }
 
@@ -4426,20 +4247,18 @@
       Handle<Code> ic(Builtins::builtin(Builtins::StoreIC_Initialize));
       Handle<String> name(GetName());
 
-      Result value = cgen_->allocator()->Allocate(r0);
-      ASSERT(value.is_valid());
-      frame->EmitPop(value.reg());
+      Result value(r0);
+      frame->EmitPop(r0);
 
       // Setup the name register.
-      Result property_name = cgen_->allocator()->Allocate(r2);
-      ASSERT(property_name.is_valid());
-      __ mov(property_name.reg(), Operand(name));
-      Result answer = frame->CallCodeObject(ic,
-                                            RelocInfo::CODE_TARGET,
-                                            &value,
-                                            &property_name,
-                                            0);
-      frame->EmitPush(answer.reg());
+      Result property_name(r2);
+      __ mov(r2, Operand(name));
+      frame->CallCodeObject(ic,
+                            RelocInfo::CODE_TARGET,
+                            &value,
+                            &property_name,
+                            0);
+      frame->EmitPush(r0);
       break;
     }
 
@@ -4452,12 +4271,10 @@
       // Call IC code.
       Handle<Code> ic(Builtins::builtin(Builtins::KeyedStoreIC_Initialize));
       // TODO(1222589): Make the IC grab the values from the stack.
-      Result value = cgen_->allocator()->Allocate(r0);
-      ASSERT(value.is_valid());
-      frame->EmitPop(value.reg());  // value
-      Result result =
-          frame->CallCodeObject(ic, RelocInfo::CODE_TARGET, &value, 0);
-      frame->EmitPush(result.reg());
+      Result value(r0);
+      frame->EmitPop(r0);  // value
+      frame->CallCodeObject(ic, RelocInfo::CODE_TARGET, &value, 0);
+      frame->EmitPush(r0);
       break;
     }
 
diff --git a/src/arm/codegen-arm.h b/src/arm/codegen-arm.h
index 42c5908..80d1d56 100644
--- a/src/arm/codegen-arm.h
+++ b/src/arm/codegen-arm.h
@@ -183,9 +183,6 @@
 
   void AddDeferred(DeferredCode* code) { deferred_.Add(code); }
 
-  bool in_spilled_code() const { return in_spilled_code_; }
-  void set_in_spilled_code(bool flag) { in_spilled_code_ = flag; }
-
   static const int kUnknownIntValue = -1;
 
  private:
@@ -222,11 +219,11 @@
   // reach the end of the statement (ie, it does not exit via break,
   // continue, return, or throw).  This function is used temporarily while
   // the code generator is being transformed.
-  void VisitAndSpill(Statement* statement);
+  inline void VisitAndSpill(Statement* statement);
 
   // Visit a list of statements and then spill the virtual frame if control
   // flow can reach the end of the list.
-  void VisitStatementsAndSpill(ZoneList<Statement*>* statements);
+  inline void VisitStatementsAndSpill(ZoneList<Statement*>* statements);
 
   // Main code generation function
   void GenCode(FunctionLiteral* fun);
@@ -263,17 +260,17 @@
   // Generate code to push the value of an expression on top of the frame
   // and then spill the frame fully to memory.  This function is used
   // temporarily while the code generator is being transformed.
-  void LoadAndSpill(Expression* expression,
-                    TypeofState typeof_state = NOT_INSIDE_TYPEOF);
+  inline void LoadAndSpill(Expression* expression,
+                           TypeofState typeof_state = NOT_INSIDE_TYPEOF);
 
   // Call LoadCondition and then spill the virtual frame unless control flow
   // cannot reach the end of the expression (ie, by emitting only
   // unconditional jumps to the control targets).
-  void LoadConditionAndSpill(Expression* expression,
-                             TypeofState typeof_state,
-                             JumpTarget* true_target,
-                             JumpTarget* false_target,
-                             bool force_control);
+  inline void LoadConditionAndSpill(Expression* expression,
+                                    TypeofState typeof_state,
+                                    JumpTarget* true_target,
+                                    JumpTarget* false_target,
+                                    bool force_control);
 
   // Read a value from a slot and leave it on top of the expression stack.
   void LoadFromSlot(Slot* slot, TypeofState typeof_state);
@@ -405,12 +402,6 @@
   // to some unlinking code).
   bool function_return_is_shadowed_;
 
-  // True when we are in code that expects the virtual frame to be fully
-  // spilled.  Some virtual frame function are disabled in DEBUG builds when
-  // called from spilled code, because they do not leave the virtual frame
-  // in a spilled state.
-  bool in_spilled_code_;
-
   static InlineRuntimeLUT kInlineRuntimeLUT[];
 
   friend class VirtualFrame;
@@ -421,6 +412,96 @@
 };
 
 
+class GenericBinaryOpStub : public CodeStub {
+ public:
+  GenericBinaryOpStub(Token::Value op,
+                      OverwriteMode mode,
+                      int constant_rhs = CodeGenerator::kUnknownIntValue)
+      : op_(op),
+        mode_(mode),
+        constant_rhs_(constant_rhs),
+        specialized_on_rhs_(RhsIsOneWeWantToOptimizeFor(op, constant_rhs)) { }
+
+ private:
+  Token::Value op_;
+  OverwriteMode mode_;
+  int constant_rhs_;
+  bool specialized_on_rhs_;
+
+  static const int kMaxKnownRhs = 0x40000000;
+
+  // Minor key encoding in 16 bits.
+  class ModeBits: public BitField<OverwriteMode, 0, 2> {};
+  class OpBits: public BitField<Token::Value, 2, 6> {};
+  class KnownIntBits: public BitField<int, 8, 8> {};
+
+  Major MajorKey() { return GenericBinaryOp; }
+  int MinorKey() {
+    // Encode the parameters in a unique 16 bit value.
+    return OpBits::encode(op_)
+           | ModeBits::encode(mode_)
+           | KnownIntBits::encode(MinorKeyForKnownInt());
+  }
+
+  void Generate(MacroAssembler* masm);
+  void HandleNonSmiBitwiseOp(MacroAssembler* masm);
+
+  static bool RhsIsOneWeWantToOptimizeFor(Token::Value op, int constant_rhs) {
+    if (constant_rhs == CodeGenerator::kUnknownIntValue) return false;
+    if (op == Token::DIV) return constant_rhs >= 2 && constant_rhs <= 3;
+    if (op == Token::MOD) {
+      if (constant_rhs <= 1) return false;
+      if (constant_rhs <= 10) return true;
+      if (constant_rhs <= kMaxKnownRhs && IsPowerOf2(constant_rhs)) return true;
+      return false;
+    }
+    return false;
+  }
+
+  int MinorKeyForKnownInt() {
+    if (!specialized_on_rhs_) return 0;
+    if (constant_rhs_ <= 10) return constant_rhs_ + 1;
+    ASSERT(IsPowerOf2(constant_rhs_));
+    int key = 12;
+    int d = constant_rhs_;
+    while ((d & 1) == 0) {
+      key++;
+      d >>= 1;
+    }
+    return key;
+  }
+
+  const char* GetName() {
+    switch (op_) {
+      case Token::ADD: return "GenericBinaryOpStub_ADD";
+      case Token::SUB: return "GenericBinaryOpStub_SUB";
+      case Token::MUL: return "GenericBinaryOpStub_MUL";
+      case Token::DIV: return "GenericBinaryOpStub_DIV";
+      case Token::MOD: return "GenericBinaryOpStub_MOD";
+      case Token::BIT_OR: return "GenericBinaryOpStub_BIT_OR";
+      case Token::BIT_AND: return "GenericBinaryOpStub_BIT_AND";
+      case Token::BIT_XOR: return "GenericBinaryOpStub_BIT_XOR";
+      case Token::SAR: return "GenericBinaryOpStub_SAR";
+      case Token::SHL: return "GenericBinaryOpStub_SHL";
+      case Token::SHR: return "GenericBinaryOpStub_SHR";
+      default:         return "GenericBinaryOpStub";
+    }
+  }
+
+#ifdef DEBUG
+  void Print() {
+    if (!specialized_on_rhs_) {
+      PrintF("GenericBinaryOpStub (%s)\n", Token::String(op_));
+    } else {
+      PrintF("GenericBinaryOpStub (%s by %d)\n",
+             Token::String(op_),
+             constant_rhs_);
+    }
+  }
+#endif
+};
+
+
 } }  // namespace v8::internal
 
 #endif  // V8_ARM_CODEGEN_ARM_H_
diff --git a/src/arm/jump-target-arm.cc b/src/arm/jump-target-arm.cc
index a925c51..3315f83 100644
--- a/src/arm/jump-target-arm.cc
+++ b/src/arm/jump-target-arm.cc
@@ -47,23 +47,29 @@
   ASSERT(cgen()->HasValidEntryRegisters());
 
   if (is_bound()) {
-    // Backward jump.  There is an expected frame to merge to.
+    // Backward jump.  There already a frame expectation at the target.
     ASSERT(direction_ == BIDIRECTIONAL);
-    cgen()->frame()->PrepareMergeTo(entry_frame_);
     cgen()->frame()->MergeTo(entry_frame_);
     cgen()->DeleteFrame();
-    __ jmp(&entry_label_);
   } else {
-    // Preconfigured entry frame is not used on ARM.
-    ASSERT(entry_frame_ == NULL);
-    // Forward jump.  The current frame is added to the end of the list
-    // of frames reaching the target block and a jump to the merge code
-    // is emitted.
-    AddReachingFrame(cgen()->frame());
-    RegisterFile empty;
-    cgen()->SetFrame(NULL, &empty);
-    __ jmp(&merge_labels_.last());
+    // Use the current frame as the expected one at the target if necessary.
+    if (entry_frame_ == NULL) {
+      entry_frame_ = cgen()->frame();
+      RegisterFile empty;
+      cgen()->SetFrame(NULL, &empty);
+    } else {
+      cgen()->frame()->MergeTo(entry_frame_);
+      cgen()->DeleteFrame();
+    }
+
+    // The predicate is_linked() should be made true.  Its implementation
+    // detects the presence of a frame pointer in the reaching_frames_ list.
+    if (!is_linked()) {
+      reaching_frames_.Add(NULL);
+      ASSERT(is_linked());
+    }
   }
+  __ jmp(&entry_label_);
 }
 
 
@@ -74,56 +80,21 @@
     ASSERT(direction_ == BIDIRECTIONAL);
     // Backward branch.  We have an expected frame to merge to on the
     // backward edge.
-
-    // Swap the current frame for a copy (we do the swapping to get
-    // the off-frame registers off the fall through) to use for the
-    // branch.
-    VirtualFrame* fall_through_frame = cgen()->frame();
-    VirtualFrame* branch_frame = new VirtualFrame(fall_through_frame);
-    RegisterFile non_frame_registers;
-    cgen()->SetFrame(branch_frame, &non_frame_registers);
-
-    // Check if we can avoid merge code.
-    cgen()->frame()->PrepareMergeTo(entry_frame_);
-    if (cgen()->frame()->Equals(entry_frame_)) {
-      // Branch right in to the block.
-      cgen()->DeleteFrame();
-      __ b(cc, &entry_label_);
-      cgen()->SetFrame(fall_through_frame, &non_frame_registers);
-      return;
-    }
-
-    // Check if we can reuse existing merge code.
-    for (int i = 0; i < reaching_frames_.length(); i++) {
-      if (reaching_frames_[i] != NULL &&
-          cgen()->frame()->Equals(reaching_frames_[i])) {
-        // Branch to the merge code.
-        cgen()->DeleteFrame();
-        __ b(cc, &merge_labels_[i]);
-        cgen()->SetFrame(fall_through_frame, &non_frame_registers);
-        return;
-      }
-    }
-
-    // To emit the merge code here, we negate the condition and branch
-    // around the merge code on the fall through path.
-    Label original_fall_through;
-    __ b(NegateCondition(cc), &original_fall_through);
     cgen()->frame()->MergeTo(entry_frame_);
-    cgen()->DeleteFrame();
-    __ b(&entry_label_);
-    cgen()->SetFrame(fall_through_frame, &non_frame_registers);
-    __ bind(&original_fall_through);
-
   } else {
-    // Preconfigured entry frame is not used on ARM.
-    ASSERT(entry_frame_ == NULL);
-    // Forward branch.  A copy of the current frame is added to the end
-    // of the list of frames reaching the target block and a branch to
-    // the merge code is emitted.
-    AddReachingFrame(new VirtualFrame(cgen()->frame()));
-    __ b(cc, &merge_labels_.last());
+    // Clone the current frame to use as the expected one at the target if
+    // necessary.
+    if (entry_frame_ == NULL) {
+      entry_frame_ = new VirtualFrame(cgen()->frame());
+    }
+    // The predicate is_linked() should be made true.  Its implementation
+    // detects the presence of a frame pointer in the reaching_frames_ list.
+    if (!is_linked()) {
+      reaching_frames_.Add(NULL);
+      ASSERT(is_linked());
+    }
   }
+  __ b(cc, &entry_label_);
 }
 
 
@@ -139,13 +110,19 @@
   ASSERT(cgen()->HasValidEntryRegisters());
   ASSERT(!is_linked());
 
-  cgen()->frame()->SpillAll();
+  // Calls are always 'forward' so we use a copy of the current frame (plus
+  // one for a return address) as the expected frame.
+  ASSERT(entry_frame_ == NULL);
   VirtualFrame* target_frame = new VirtualFrame(cgen()->frame());
   target_frame->Adjust(1);
-  // We do not expect a call with a preconfigured entry frame.
-  ASSERT(entry_frame_ == NULL);
-  AddReachingFrame(target_frame);
-  __ bl(&merge_labels_.last());
+  entry_frame_ = target_frame;
+
+  // The predicate is_linked() should now be made true.  Its implementation
+  // detects the presence of a frame pointer in the reaching_frames_ list.
+  reaching_frames_.Add(NULL);
+  ASSERT(is_linked());
+
+  __ bl(&entry_label_);
 }
 
 
@@ -156,168 +133,105 @@
   // block.
   ASSERT(!cgen()->has_valid_frame() || cgen()->HasValidEntryRegisters());
 
-  if (direction_ == FORWARD_ONLY) {
-    // A simple case: no forward jumps and no possible backward jumps.
-    if (!is_linked()) {
-      // The stack pointer can be floating above the top of the
-      // virtual frame before the bind.  Afterward, it should not.
-      ASSERT(cgen()->has_valid_frame());
-      VirtualFrame* frame = cgen()->frame();
-      int difference = frame->stack_pointer_ - (frame->element_count() - 1);
-      if (difference > 0) {
-        frame->stack_pointer_ -= difference;
-        __ add(sp, sp, Operand(difference * kPointerSize));
-      }
-      __ bind(&entry_label_);
-      return;
-    }
-
-    // Another simple case: no fall through, a single forward jump,
-    // and no possible backward jumps.
-    if (!cgen()->has_valid_frame() && reaching_frames_.length() == 1) {
-      // Pick up the only reaching frame, take ownership of it, and
-      // use it for the block about to be emitted.
-      VirtualFrame* frame = reaching_frames_[0];
-      RegisterFile empty;
-      cgen()->SetFrame(frame, &empty);
-      reaching_frames_[0] = NULL;
-      __ bind(&merge_labels_[0]);
-
-      // The stack pointer can be floating above the top of the
-      // virtual frame before the bind.  Afterward, it should not.
-      int difference = frame->stack_pointer_ - (frame->element_count() - 1);
-      if (difference > 0) {
-        frame->stack_pointer_ -= difference;
-        __ add(sp, sp, Operand(difference * kPointerSize));
-      }
-      __ bind(&entry_label_);
-      return;
-    }
-  }
-
-  // If there is a current frame, record it as the fall-through.  It
-  // is owned by the reaching frames for now.
-  bool had_fall_through = false;
   if (cgen()->has_valid_frame()) {
-    had_fall_through = true;
-    AddReachingFrame(cgen()->frame());  // Return value ignored.
-    RegisterFile empty;
-    cgen()->SetFrame(NULL, &empty);
-  }
-
-  // Compute the frame to use for entry to the block.
-  if (entry_frame_ == NULL) {
-    ComputeEntryFrame();
-  }
-
-  // Some moves required to merge to an expected frame require purely
-  // frame state changes, and do not require any code generation.
-  // Perform those first to increase the possibility of finding equal
-  // frames below.
-  for (int i = 0; i < reaching_frames_.length(); i++) {
-    if (reaching_frames_[i] != NULL) {
-      reaching_frames_[i]->PrepareMergeTo(entry_frame_);
+    // If there is a current frame we can use it on the fall through.
+    if (entry_frame_ == NULL) {
+      entry_frame_ = new VirtualFrame(cgen()->frame());
+    } else {
+      ASSERT(cgen()->frame()->Equals(entry_frame_));
     }
-  }
-
-  if (is_linked()) {
-    // There were forward jumps.  Handle merging the reaching frames
-    // and possible fall through to the entry frame.
-
-    // Loop over the (non-null) reaching frames and process any that
-    // need merge code.  Iterate backwards through the list to handle
-    // the fall-through frame first.  Set frames that will be
-    // processed after 'i' to NULL if we want to avoid processing
-    // them.
-    for (int i = reaching_frames_.length() - 1; i >= 0; i--) {
-      VirtualFrame* frame = reaching_frames_[i];
-
-      if (frame != NULL) {
-        // Does the frame (probably) need merge code?
-        if (!frame->Equals(entry_frame_)) {
-          // We could have a valid frame as the fall through to the
-          // binding site or as the fall through from a previous merge
-          // code block.  Jump around the code we are about to
-          // generate.
-          if (cgen()->has_valid_frame()) {
-            cgen()->DeleteFrame();
-            __ b(&entry_label_);
-          }
-          // Pick up the frame for this block.  Assume ownership if
-          // there cannot be backward jumps.
-          RegisterFile empty;
-          if (direction_ == BIDIRECTIONAL) {
-            cgen()->SetFrame(new VirtualFrame(frame), &empty);
-          } else {
-            cgen()->SetFrame(frame, &empty);
-            reaching_frames_[i] = NULL;
-          }
-          __ bind(&merge_labels_[i]);
-
-          // Loop over the remaining (non-null) reaching frames,
-          // looking for any that can share merge code with this one.
-          for (int j = 0; j < i; j++) {
-            VirtualFrame* other = reaching_frames_[j];
-            if (other != NULL && other->Equals(cgen()->frame())) {
-              // Set the reaching frame element to null to avoid
-              // processing it later, and then bind its entry label.
-              reaching_frames_[j] = NULL;
-              __ bind(&merge_labels_[j]);
-            }
-          }
-
-          // Emit the merge code.
-          cgen()->frame()->MergeTo(entry_frame_);
-        } else if (i == reaching_frames_.length() - 1 && had_fall_through) {
-          // If this is the fall through, and it didn't need merge
-          // code, we need to pick up the frame so we can jump around
-          // subsequent merge blocks if necessary.
-          RegisterFile empty;
-          cgen()->SetFrame(frame, &empty);
-          reaching_frames_[i] = NULL;
-        }
-      }
-    }
-
-    // The code generator may not have a current frame if there was no
-    // fall through and none of the reaching frames needed merging.
-    // In that case, clone the entry frame as the current frame.
-    if (!cgen()->has_valid_frame()) {
-      RegisterFile empty;
-      cgen()->SetFrame(new VirtualFrame(entry_frame_), &empty);
-    }
-
-    // There may be unprocessed reaching frames that did not need
-    // merge code.  They will have unbound merge labels.  Bind their
-    // merge labels to be the same as the entry label and deallocate
-    // them.
-    for (int i = 0; i < reaching_frames_.length(); i++) {
-      if (!merge_labels_[i].is_bound()) {
-        reaching_frames_[i] = NULL;
-        __ bind(&merge_labels_[i]);
-      }
-    }
-
-    // There are non-NULL reaching frames with bound labels for each
-    // merge block, but only on backward targets.
   } else {
-    // There were no forward jumps.  There must be a current frame and
-    // this must be a bidirectional target.
-    ASSERT(reaching_frames_.length() == 1);
-    ASSERT(reaching_frames_[0] != NULL);
-    ASSERT(direction_ == BIDIRECTIONAL);
-
-    // Use a copy of the reaching frame so the original can be saved
-    // for possible reuse as a backward merge block.
+    // If there is no current frame we must have an entry frame which we can
+    // copy.
+    ASSERT(entry_frame_ != NULL);
     RegisterFile empty;
-    cgen()->SetFrame(new VirtualFrame(reaching_frames_[0]), &empty);
-    __ bind(&merge_labels_[0]);
-    cgen()->frame()->MergeTo(entry_frame_);
+    cgen()->SetFrame(new VirtualFrame(entry_frame_), &empty);
+  }
+
+  // The predicate is_linked() should be made false.  Its implementation
+  // detects the presence (or absence) of frame pointers in the
+  // reaching_frames_ list.  If we inserted a bogus frame to make
+  // is_linked() true, remove it now.
+  if (is_linked()) {
+    reaching_frames_.Clear();
   }
 
   __ bind(&entry_label_);
 }
 
+
+void BreakTarget::Jump() {
+  // On ARM we do not currently emit merge code for jumps, so we need to do
+  // it explicitly here.  The only merging necessary is to drop extra
+  // statement state from the stack.
+  ASSERT(cgen()->has_valid_frame());
+  int count = cgen()->frame()->height() - expected_height_;
+  cgen()->frame()->Drop(count);
+  DoJump();
+}
+
+
+void BreakTarget::Jump(Result* arg) {
+  // On ARM we do not currently emit merge code for jumps, so we need to do
+  // it explicitly here.  The only merging necessary is to drop extra
+  // statement state from the stack.
+  ASSERT(cgen()->has_valid_frame());
+  int count = cgen()->frame()->height() - expected_height_;
+  cgen()->frame()->Drop(count);
+  cgen()->frame()->Push(arg);
+  DoJump();
+}
+
+
+void BreakTarget::Bind() {
+#ifdef DEBUG
+  // All the forward-reaching frames should have been adjusted at the
+  // jumps to this target.
+  for (int i = 0; i < reaching_frames_.length(); i++) {
+    ASSERT(reaching_frames_[i] == NULL ||
+           reaching_frames_[i]->height() == expected_height_);
+  }
+#endif
+  // Drop leftover statement state from the frame before merging, even
+  // on the fall through.  This is so we can bind the return target
+  // with state on the frame.
+  if (cgen()->has_valid_frame()) {
+    int count = cgen()->frame()->height() - expected_height_;
+    // On ARM we do not currently emit merge code at binding sites, so we need
+    // to do it explicitly here.  The only merging necessary is to drop extra
+    // statement state from the stack.
+    cgen()->frame()->Drop(count);
+  }
+
+  DoBind();
+}
+
+
+void BreakTarget::Bind(Result* arg) {
+#ifdef DEBUG
+  // All the forward-reaching frames should have been adjusted at the
+  // jumps to this target.
+  for (int i = 0; i < reaching_frames_.length(); i++) {
+    ASSERT(reaching_frames_[i] == NULL ||
+           reaching_frames_[i]->height() == expected_height_ + 1);
+  }
+#endif
+  // Drop leftover statement state from the frame before merging, even
+  // on the fall through.  This is so we can bind the return target
+  // with state on the frame.
+  if (cgen()->has_valid_frame()) {
+    int count = cgen()->frame()->height() - expected_height_;
+    // On ARM we do not currently emit merge code at binding sites, so we need
+    // to do it explicitly here.  The only merging necessary is to drop extra
+    // statement state from the stack.
+    cgen()->frame()->ForgetElements(count);
+    cgen()->frame()->Push(arg);
+  }
+  DoBind();
+  *arg = cgen()->frame()->Pop();
+}
+
+
 #undef __
 
 
diff --git a/src/arm/virtual-frame-arm.cc b/src/arm/virtual-frame-arm.cc
index 3d0ada7..d3dabf8 100644
--- a/src/arm/virtual-frame-arm.cc
+++ b/src/arm/virtual-frame-arm.cc
@@ -76,72 +76,23 @@
 
 
 void VirtualFrame::MergeTo(VirtualFrame* expected) {
-  Comment cmnt(masm(), "[ Merge frame");
-  // We should always be merging the code generator's current frame to an
-  // expected frame.
-  ASSERT(cgen()->frame() == this);
-
-  // Adjust the stack pointer upward (toward the top of the virtual
-  // frame) if necessary.
-  if (stack_pointer_ < expected->stack_pointer_) {
-    int difference = expected->stack_pointer_ - stack_pointer_;
-    stack_pointer_ = expected->stack_pointer_;
-    __ sub(sp, sp, Operand(difference * kPointerSize));
-  }
-
-  MergeMoveRegistersToMemory(expected);
-  MergeMoveRegistersToRegisters(expected);
-  MergeMoveMemoryToRegisters(expected);
-
-  // Fix any sync bit problems from the bottom-up, stopping when we
-  // hit the stack pointer or the top of the frame if the stack
-  // pointer is floating above the frame.
-  int limit = Min(static_cast<int>(stack_pointer_), element_count() - 1);
-  for (int i = 0; i <= limit; i++) {
-    FrameElement source = elements_[i];
-    FrameElement target = expected->elements_[i];
-    if (source.is_synced() && !target.is_synced()) {
-      elements_[i].clear_sync();
-    } else if (!source.is_synced() && target.is_synced()) {
-      SyncElementAt(i);
-    }
-  }
-
-  // Adjust the stack point downard if necessary.
-  if (stack_pointer_ > expected->stack_pointer_) {
-    int difference = stack_pointer_ - expected->stack_pointer_;
-    stack_pointer_ = expected->stack_pointer_;
-    __ add(sp, sp, Operand(difference * kPointerSize));
-  }
-
-  // At this point, the frames should be identical.
+  // ARM frames are currently always in memory.
   ASSERT(Equals(expected));
 }
 
 
 void VirtualFrame::MergeMoveRegistersToMemory(VirtualFrame* expected) {
-  ASSERT(stack_pointer_ >= expected->stack_pointer_);
-
-  // Move registers, constants, and copies to memory.  Perform moves
-  // from the top downward in the frame in order to leave the backing
-  // stores of copies in registers.
-  // On ARM, all elements are in memory.
-
-#ifdef DEBUG
-  int start = Min(static_cast<int>(stack_pointer_), element_count() - 1);
-  for (int i = start; i >= 0; i--) {
-    ASSERT(elements_[i].is_memory());
-    ASSERT(expected->elements_[i].is_memory());
-  }
-#endif
+  UNREACHABLE();
 }
 
 
 void VirtualFrame::MergeMoveRegistersToRegisters(VirtualFrame* expected) {
+  UNREACHABLE();
 }
 
 
 void VirtualFrame::MergeMoveMemoryToRegisters(VirtualFrame* expected) {
+  UNREACHABLE();
 }
 
 
@@ -235,76 +186,62 @@
 }
 
 
-Result VirtualFrame::RawCallStub(CodeStub* stub) {
+void VirtualFrame::RawCallStub(CodeStub* stub) {
   ASSERT(cgen()->HasValidEntryRegisters());
   __ CallStub(stub);
-  Result result = cgen()->allocator()->Allocate(r0);
-  ASSERT(result.is_valid());
-  return result;
 }
 
 
-Result VirtualFrame::CallStub(CodeStub* stub, Result* arg) {
+void VirtualFrame::CallStub(CodeStub* stub, Result* arg) {
   PrepareForCall(0, 0);
   arg->Unuse();
-  return RawCallStub(stub);
+  RawCallStub(stub);
 }
 
 
-Result VirtualFrame::CallStub(CodeStub* stub, Result* arg0, Result* arg1) {
+void VirtualFrame::CallStub(CodeStub* stub, Result* arg0, Result* arg1) {
   PrepareForCall(0, 0);
   arg0->Unuse();
   arg1->Unuse();
-  return RawCallStub(stub);
+  RawCallStub(stub);
 }
 
 
-Result VirtualFrame::CallRuntime(Runtime::Function* f, int arg_count) {
+void VirtualFrame::CallRuntime(Runtime::Function* f, int arg_count) {
   PrepareForCall(arg_count, arg_count);
   ASSERT(cgen()->HasValidEntryRegisters());
   __ CallRuntime(f, arg_count);
-  Result result = cgen()->allocator()->Allocate(r0);
-  ASSERT(result.is_valid());
-  return result;
 }
 
 
-Result VirtualFrame::CallRuntime(Runtime::FunctionId id, int arg_count) {
+void VirtualFrame::CallRuntime(Runtime::FunctionId id, int arg_count) {
   PrepareForCall(arg_count, arg_count);
   ASSERT(cgen()->HasValidEntryRegisters());
   __ CallRuntime(id, arg_count);
-  Result result = cgen()->allocator()->Allocate(r0);
-  ASSERT(result.is_valid());
-  return result;
 }
 
 
-Result VirtualFrame::InvokeBuiltin(Builtins::JavaScript id,
-                                   InvokeJSFlags flags,
-                                   Result* arg_count_register,
-                                   int arg_count) {
+void VirtualFrame::InvokeBuiltin(Builtins::JavaScript id,
+                                 InvokeJSFlags flags,
+                                 Result* arg_count_register,
+                                 int arg_count) {
   ASSERT(arg_count_register->reg().is(r0));
   PrepareForCall(arg_count, arg_count);
   arg_count_register->Unuse();
   __ InvokeBuiltin(id, flags);
-  Result result = cgen()->allocator()->Allocate(r0);
-  return result;
 }
 
 
-Result VirtualFrame::RawCallCodeObject(Handle<Code> code,
+void VirtualFrame::RawCallCodeObject(Handle<Code> code,
                                        RelocInfo::Mode rmode) {
   ASSERT(cgen()->HasValidEntryRegisters());
   __ Call(code, rmode);
-  Result result = cgen()->allocator()->Allocate(r0);
-  ASSERT(result.is_valid());
-  return result;
 }
 
 
-Result VirtualFrame::CallCodeObject(Handle<Code> code,
-                                    RelocInfo::Mode rmode,
-                                    int dropped_args) {
+void VirtualFrame::CallCodeObject(Handle<Code> code,
+                                  RelocInfo::Mode rmode,
+                                  int dropped_args) {
   int spilled_args = 0;
   switch (code->kind()) {
     case Code::CALL_IC:
@@ -325,14 +262,14 @@
       break;
   }
   PrepareForCall(spilled_args, dropped_args);
-  return RawCallCodeObject(code, rmode);
+  RawCallCodeObject(code, rmode);
 }
 
 
-Result VirtualFrame::CallCodeObject(Handle<Code> code,
-                                    RelocInfo::Mode rmode,
-                                    Result* arg,
-                                    int dropped_args) {
+void VirtualFrame::CallCodeObject(Handle<Code> code,
+                                  RelocInfo::Mode rmode,
+                                  Result* arg,
+                                  int dropped_args) {
   int spilled_args = 0;
   switch (code->kind()) {
     case Code::LOAD_IC:
@@ -353,15 +290,15 @@
   }
   PrepareForCall(spilled_args, dropped_args);
   arg->Unuse();
-  return RawCallCodeObject(code, rmode);
+  RawCallCodeObject(code, rmode);
 }
 
 
-Result VirtualFrame::CallCodeObject(Handle<Code> code,
-                                    RelocInfo::Mode rmode,
-                                    Result* arg0,
-                                    Result* arg1,
-                                    int dropped_args) {
+void VirtualFrame::CallCodeObject(Handle<Code> code,
+                                  RelocInfo::Mode rmode,
+                                  Result* arg0,
+                                  Result* arg1,
+                                  int dropped_args) {
   int spilled_args = 1;
   switch (code->kind()) {
     case Code::STORE_IC:
@@ -385,11 +322,12 @@
   PrepareForCall(spilled_args, dropped_args);
   arg0->Unuse();
   arg1->Unuse();
-  return RawCallCodeObject(code, rmode);
+  RawCallCodeObject(code, rmode);
 }
 
 
 void VirtualFrame::Drop(int count) {
+  ASSERT(count >= 0);
   ASSERT(height() >= count);
   int num_virtual_elements = (element_count() - 1) - stack_pointer_;
 
diff --git a/src/arm/virtual-frame-arm.h b/src/arm/virtual-frame-arm.h
index 2f36f10..457478d 100644
--- a/src/arm/virtual-frame-arm.h
+++ b/src/arm/virtual-frame-arm.h
@@ -52,20 +52,7 @@
   // generator is being transformed.
   class SpilledScope BASE_EMBEDDED {
    public:
-    SpilledScope() : previous_state_(cgen()->in_spilled_code()) {
-      ASSERT(cgen()->has_valid_frame());
-      cgen()->frame()->SpillAll();
-      cgen()->set_in_spilled_code(true);
-    }
-
-    ~SpilledScope() {
-      cgen()->set_in_spilled_code(previous_state_);
-    }
-
-   private:
-    bool previous_state_;
-
-    CodeGenerator* cgen() { return CodeGeneratorScope::Current(); }
+    SpilledScope() {}
   };
 
   // An illegal index into the virtual frame.
@@ -125,12 +112,14 @@
     ASSERT(count >= 0);
     ASSERT(stack_pointer_ == element_count() - 1);
     stack_pointer_ -= count;
-    ForgetElements(count);
+    // On ARM, all elements are in memory, so there is no extra bookkeeping
+    // (registers, copies, etc.) beyond dropping the elements.
+    elements_.Rewind(stack_pointer_ + 1);
   }
 
-  // Forget count elements from the top of the frame without adjusting
-  // the stack pointer downward.  This is used, for example, before
-  // merging frames at break, continue, and return targets.
+  // Forget count elements from the top of the frame and adjust the stack
+  // pointer downward.  This is used, for example, before merging frames at
+  // break, continue, and return targets.
   void ForgetElements(int count);
 
   // Spill all values from the frame to memory.
@@ -294,46 +283,46 @@
 
   // Call stub given the number of arguments it expects on (and
   // removes from) the stack.
-  Result CallStub(CodeStub* stub, int arg_count) {
+  void CallStub(CodeStub* stub, int arg_count) {
     PrepareForCall(arg_count, arg_count);
-    return RawCallStub(stub);
+    RawCallStub(stub);
   }
 
   // Call stub that expects its argument in r0.  The argument is given
   // as a result which must be the register r0.
-  Result CallStub(CodeStub* stub, Result* arg);
+  void CallStub(CodeStub* stub, Result* arg);
 
   // Call stub that expects its arguments in r1 and r0.  The arguments
   // are given as results which must be the appropriate registers.
-  Result CallStub(CodeStub* stub, Result* arg0, Result* arg1);
+  void CallStub(CodeStub* stub, Result* arg0, Result* arg1);
 
   // Call runtime given the number of arguments expected on (and
   // removed from) the stack.
-  Result CallRuntime(Runtime::Function* f, int arg_count);
-  Result CallRuntime(Runtime::FunctionId id, int arg_count);
+  void CallRuntime(Runtime::Function* f, int arg_count);
+  void CallRuntime(Runtime::FunctionId id, int arg_count);
 
   // Invoke builtin given the number of arguments it expects on (and
   // removes from) the stack.
-  Result InvokeBuiltin(Builtins::JavaScript id,
-                       InvokeJSFlags flag,
-                       Result* arg_count_register,
-                       int arg_count);
+  void InvokeBuiltin(Builtins::JavaScript id,
+                     InvokeJSFlags flag,
+                     Result* arg_count_register,
+                     int arg_count);
 
   // Call into an IC stub given the number of arguments it removes
   // from the stack.  Register arguments are passed as results and
   // consumed by the call.
-  Result CallCodeObject(Handle<Code> ic,
-                        RelocInfo::Mode rmode,
-                        int dropped_args);
-  Result CallCodeObject(Handle<Code> ic,
-                        RelocInfo::Mode rmode,
-                        Result* arg,
-                        int dropped_args);
-  Result CallCodeObject(Handle<Code> ic,
-                        RelocInfo::Mode rmode,
-                        Result* arg0,
-                        Result* arg1,
-                        int dropped_args);
+  void CallCodeObject(Handle<Code> ic,
+                      RelocInfo::Mode rmode,
+                      int dropped_args);
+  void CallCodeObject(Handle<Code> ic,
+                      RelocInfo::Mode rmode,
+                      Result* arg,
+                      int dropped_args);
+  void CallCodeObject(Handle<Code> ic,
+                      RelocInfo::Mode rmode,
+                      Result* arg0,
+                      Result* arg1,
+                      int dropped_args);
 
   // Drop a number of elements from the top of the expression stack.  May
   // emit code to affect the physical frame.  Does not clobber any registers
@@ -517,11 +506,11 @@
 
   // Call a code stub that has already been prepared for calling (via
   // PrepareForCall).
-  Result RawCallStub(CodeStub* stub);
+  void RawCallStub(CodeStub* stub);
 
   // Calls a code object which has already been prepared for calling
   // (via PrepareForCall).
-  Result RawCallCodeObject(Handle<Code> code, RelocInfo::Mode rmode);
+  void RawCallCodeObject(Handle<Code> code, RelocInfo::Mode rmode);
 
   bool Equals(VirtualFrame* other);
 
diff --git a/src/cfg.cc b/src/cfg.cc
index bad1441..d2dff52 100644
--- a/src/cfg.cc
+++ b/src/cfg.cc
@@ -42,8 +42,10 @@
 CfgGlobals::CfgGlobals(FunctionLiteral* fun)
     : global_fun_(fun),
       global_exit_(new ExitNode()),
+      nowhere_(new Nowhere()),
 #ifdef DEBUG
       node_counter_(0),
+      temp_counter_(0),
 #endif
       previous_(top_) {
   top_ = this;
@@ -58,6 +60,12 @@
   if (fun->scope()->num_heap_slots() > 0) {
     BAILOUT("function has context slots");
   }
+  if (fun->scope()->num_stack_slots() > kBitsPerPointer) {
+    BAILOUT("function has too many locals");
+  }
+  if (fun->scope()->num_parameters() > kBitsPerPointer - 1) {
+    BAILOUT("function has too many parameters");
+  }
   if (fun->scope()->arguments() != NULL) {
     BAILOUT("function uses .arguments");
   }
@@ -67,17 +75,20 @@
     BAILOUT("empty function body");
   }
 
-  StatementBuilder builder;
+  StatementCfgBuilder builder;
   builder.VisitStatements(body);
-  Cfg* cfg = builder.cfg();
-  if (cfg == NULL) {
+  Cfg* graph = builder.graph();
+  if (graph == NULL) {
     BAILOUT("unsupported statement type");
   }
-  if (cfg->has_exit()) {
+  if (graph->is_empty()) {
+    BAILOUT("function body produces empty cfg");
+  }
+  if (graph->has_exit()) {
     BAILOUT("control path without explicit return");
   }
-  cfg->PrependEntryNode();
-  return cfg;
+  graph->PrependEntryNode();
+  return graph;
 }
 
 #undef BAILOUT
@@ -90,8 +101,10 @@
 
 
 void Cfg::Append(Instruction* instr) {
-  ASSERT(has_exit());
-  ASSERT(!is_empty());
+  ASSERT(is_empty() || has_exit());
+  if (is_empty()) {
+    entry_ = exit_ = new InstructionBlock();
+  }
   InstructionBlock::cast(exit_)->Append(instr);
 }
 
@@ -104,6 +117,27 @@
 }
 
 
+void Cfg::Concatenate(Cfg* other) {
+  ASSERT(is_empty() || has_exit());
+  if (other->is_empty()) return;
+
+  if (is_empty()) {
+    entry_ = other->entry();
+    exit_ = other->exit();
+  } else {
+    // We have a pair of nonempty fragments and this has an available exit.
+    // Destructively glue the fragments together.
+    InstructionBlock* first = InstructionBlock::cast(exit_);
+    InstructionBlock* second = InstructionBlock::cast(other->entry());
+    first->instructions()->AddAll(*second->instructions());
+    if (second->successor() != NULL) {
+      first->set_successor(second->successor());
+      exit_ = other->exit();
+    }
+  }
+}
+
+
 void InstructionBlock::Unmark() {
   if (is_marked_) {
     is_marked_ = false;
@@ -166,11 +200,45 @@
 }
 
 
+void ZeroOperandInstruction::FastAllocate(TempLocation* temp) {
+  temp->set_where(TempLocation::STACK);
+}
+
+
+void OneOperandInstruction::FastAllocate(TempLocation* temp) {
+  temp->set_where((temp == value_)
+                  ? TempLocation::ACCUMULATOR
+                  : TempLocation::STACK);
+}
+
+
+void TwoOperandInstruction::FastAllocate(TempLocation* temp) {
+  temp->set_where((temp == value0_ || temp == value1_)
+                  ? TempLocation::ACCUMULATOR
+                  : TempLocation::STACK);
+}
+
+
+void PositionInstr::Compile(MacroAssembler* masm) {
+  if (FLAG_debug_info && pos_ != RelocInfo::kNoPosition) {
+    masm->RecordStatementPosition(pos_);
+    masm->RecordPosition(pos_);
+  }
+}
+
+
+void MoveInstr::Compile(MacroAssembler* masm) {
+  location()->Move(masm, value());
+}
+
+
 // The expression builder should not be used for declarations or statements.
-void ExpressionBuilder::VisitDeclaration(Declaration* decl) { UNREACHABLE(); }
+void ExpressionCfgBuilder::VisitDeclaration(Declaration* decl) {
+  UNREACHABLE();
+}
 
 #define DEFINE_VISIT(type)                                              \
-  void ExpressionBuilder::Visit##type(type* stmt) { UNREACHABLE(); }
+  void ExpressionCfgBuilder::Visit##type(type* stmt) { UNREACHABLE(); }
 STATEMENT_NODE_LIST(DEFINE_VISIT)
 #undef DEFINE_VISIT
 
@@ -178,35 +246,32 @@
 // Macros (temporarily) handling unsupported expression types.
 #define BAILOUT(reason)                         \
   do {                                          \
-    value_ = NULL;                              \
+    graph_ = NULL;                              \
     return;                                     \
   } while (false)
 
-#define CHECK_BAILOUT()                         \
-  if (value_ == NULL) { return; } else {}
-
-void ExpressionBuilder::VisitFunctionLiteral(FunctionLiteral* expr) {
+void ExpressionCfgBuilder::VisitFunctionLiteral(FunctionLiteral* expr) {
   BAILOUT("FunctionLiteral");
 }
 
 
-void ExpressionBuilder::VisitFunctionBoilerplateLiteral(
+void ExpressionCfgBuilder::VisitFunctionBoilerplateLiteral(
     FunctionBoilerplateLiteral* expr) {
   BAILOUT("FunctionBoilerplateLiteral");
 }
 
 
-void ExpressionBuilder::VisitConditional(Conditional* expr) {
+void ExpressionCfgBuilder::VisitConditional(Conditional* expr) {
   BAILOUT("Conditional");
 }
 
 
-void ExpressionBuilder::VisitSlot(Slot* expr) {
+void ExpressionCfgBuilder::VisitSlot(Slot* expr) {
   BAILOUT("Slot");
 }
 
 
-void ExpressionBuilder::VisitVariableProxy(VariableProxy* expr) {
+void ExpressionCfgBuilder::VisitVariableProxy(VariableProxy* expr) {
   Expression* rewrite = expr->var()->rewrite();
   if (rewrite == NULL || rewrite->AsSlot() == NULL) {
     BAILOUT("unsupported variable (not a slot)");
@@ -215,201 +280,332 @@
   if (slot->type() != Slot::PARAMETER && slot->type() != Slot::LOCAL) {
     BAILOUT("unsupported slot type (not a parameter or local)");
   }
+  // Ignore the passed destination.
   value_ = new SlotLocation(slot->type(), slot->index());
 }
 
 
-void ExpressionBuilder::VisitLiteral(Literal* expr) {
+void ExpressionCfgBuilder::VisitLiteral(Literal* expr) {
+  // Ignore the passed destination.
   value_ = new Constant(expr->handle());
 }
 
 
-void ExpressionBuilder::VisitRegExpLiteral(RegExpLiteral* expr) {
+void ExpressionCfgBuilder::VisitRegExpLiteral(RegExpLiteral* expr) {
   BAILOUT("RegExpLiteral");
 }
 
 
-void ExpressionBuilder::VisitObjectLiteral(ObjectLiteral* expr) {
+void ExpressionCfgBuilder::VisitObjectLiteral(ObjectLiteral* expr) {
   BAILOUT("ObjectLiteral");
 }
 
 
-void ExpressionBuilder::VisitArrayLiteral(ArrayLiteral* expr) {
+void ExpressionCfgBuilder::VisitArrayLiteral(ArrayLiteral* expr) {
   BAILOUT("ArrayLiteral");
 }
 
 
-void ExpressionBuilder::VisitCatchExtensionObject(CatchExtensionObject* expr) {
+void ExpressionCfgBuilder::VisitCatchExtensionObject(
+    CatchExtensionObject* expr) {
   BAILOUT("CatchExtensionObject");
 }
 
 
-void ExpressionBuilder::VisitAssignment(Assignment* expr) {
-  BAILOUT("Assignment");
+void ExpressionCfgBuilder::VisitAssignment(Assignment* expr) {
+  if (expr->op() != Token::ASSIGN && expr->op() != Token::INIT_VAR) {
+    BAILOUT("unsupported compound assignment");
+  }
+  Expression* lhs = expr->target();
+  if (lhs->AsProperty() != NULL) {
+    BAILOUT("unsupported property assignment");
+  }
+
+  Variable* var = lhs->AsVariableProxy()->AsVariable();
+  if (var == NULL) {
+    BAILOUT("unsupported invalid left-hand side");
+  }
+  if (var->is_global()) {
+    BAILOUT("unsupported global variable");
+  }
+  Slot* slot = var->slot();
+  ASSERT(slot != NULL);
+  if (slot->type() != Slot::PARAMETER && slot->type() != Slot::LOCAL) {
+    BAILOUT("unsupported slot lhs (not a parameter or local)");
+  }
+
+  // Parameter and local slot assignments.
+  ExpressionCfgBuilder builder;
+  SlotLocation* loc = new SlotLocation(slot->type(), slot->index());
+  builder.Build(expr->value(), loc);
+  if (builder.graph() == NULL) {
+    BAILOUT("unsupported expression in assignment");
+  }
+  // If the expression did not come back in the slot location, append
+  // a move to the CFG.
+  graph_ = builder.graph();
+  if (builder.value() != loc) {
+    graph()->Append(new MoveInstr(loc, builder.value()));
+  }
+  // Record the assignment.
+  assigned_vars_.AddElement(loc);
+  // Ignore the destination passed to us.
+  value_ = loc;
 }
 
 
-void ExpressionBuilder::VisitThrow(Throw* expr) {
+void ExpressionCfgBuilder::VisitThrow(Throw* expr) {
   BAILOUT("Throw");
 }
 
 
-void ExpressionBuilder::VisitProperty(Property* expr) {
-  BAILOUT("Property");
+void ExpressionCfgBuilder::VisitProperty(Property* expr) {
+  ExpressionCfgBuilder object, key;
+  object.Build(expr->obj(), NULL);
+  if (object.graph() == NULL) {
+    BAILOUT("unsupported object subexpression in propload");
+  }
+  key.Build(expr->key(), NULL);
+  if (key.graph() == NULL) {
+    BAILOUT("unsupported key subexpression in propload");
+  }
+
+  if (destination_ == NULL) destination_ = new TempLocation();
+
+  graph_ = object.graph();
+  // Insert a move to a fresh temporary if the object value is in a slot
+  // that's assigned in the key.
+  Location* temp = NULL;
+  if (object.value()->is_slot() &&
+      key.assigned_vars()->Contains(SlotLocation::cast(object.value()))) {
+    temp = new TempLocation();
+    graph()->Append(new MoveInstr(temp, object.value()));
+  }
+  graph()->Concatenate(key.graph());
+  graph()->Append(new PropLoadInstr(destination_,
+                                    temp == NULL ? object.value() : temp,
+                                    key.value()));
+
+  assigned_vars_ = *object.assigned_vars();
+  assigned_vars()->Union(key.assigned_vars());
+
+  value_ = destination_;
 }
 
 
-void ExpressionBuilder::VisitCall(Call* expr) {
+void ExpressionCfgBuilder::VisitCall(Call* expr) {
   BAILOUT("Call");
 }
 
 
-void ExpressionBuilder::VisitCallEval(CallEval* expr) {
+void ExpressionCfgBuilder::VisitCallEval(CallEval* expr) {
   BAILOUT("CallEval");
 }
 
 
-void ExpressionBuilder::VisitCallNew(CallNew* expr) {
+void ExpressionCfgBuilder::VisitCallNew(CallNew* expr) {
   BAILOUT("CallNew");
 }
 
 
-void ExpressionBuilder::VisitCallRuntime(CallRuntime* expr) {
+void ExpressionCfgBuilder::VisitCallRuntime(CallRuntime* expr) {
   BAILOUT("CallRuntime");
 }
 
 
-void ExpressionBuilder::VisitUnaryOperation(UnaryOperation* expr) {
+void ExpressionCfgBuilder::VisitUnaryOperation(UnaryOperation* expr) {
   BAILOUT("UnaryOperation");
 }
 
 
-void ExpressionBuilder::VisitCountOperation(CountOperation* expr) {
+void ExpressionCfgBuilder::VisitCountOperation(CountOperation* expr) {
   BAILOUT("CountOperation");
 }
 
 
-void ExpressionBuilder::VisitBinaryOperation(BinaryOperation* expr) {
-  BAILOUT("BinaryOperation");
+void ExpressionCfgBuilder::VisitBinaryOperation(BinaryOperation* expr) {
+  Token::Value op = expr->op();
+  switch (op) {
+    case Token::COMMA:
+    case Token::OR:
+    case Token::AND:
+      BAILOUT("unsupported binary operation");
+
+    case Token::BIT_OR:
+    case Token::BIT_XOR:
+    case Token::BIT_AND:
+    case Token::SHL:
+    case Token::SAR:
+    case Token::SHR:
+    case Token::ADD:
+    case Token::SUB:
+    case Token::MUL:
+    case Token::DIV:
+    case Token::MOD: {
+      ExpressionCfgBuilder left, right;
+      left.Build(expr->left(), NULL);
+      if (left.graph() == NULL) {
+        BAILOUT("unsupported left subexpression in binop");
+      }
+      right.Build(expr->right(), NULL);
+      if (right.graph() == NULL) {
+        BAILOUT("unsupported right subexpression in binop");
+      }
+
+      if (destination_ == NULL) destination_ = new TempLocation();
+
+      graph_ = left.graph();
+      // Insert a move to a fresh temporary if the left value is in a
+      // slot that's assigned on the right.
+      Location* temp = NULL;
+      if (left.value()->is_slot() &&
+          right.assigned_vars()->Contains(SlotLocation::cast(left.value()))) {
+        temp = new TempLocation();
+        graph()->Append(new MoveInstr(temp, left.value()));
+      }
+      graph()->Concatenate(right.graph());
+      graph()->Append(new BinaryOpInstr(destination_, op,
+                                        temp == NULL ? left.value() : temp,
+                                        right.value()));
+
+      assigned_vars_ = *left.assigned_vars();
+      assigned_vars()->Union(right.assigned_vars());
+
+      value_ = destination_;
+      return;
+    }
+
+    default:
+      UNREACHABLE();
+  }
 }
 
 
-void ExpressionBuilder::VisitCompareOperation(CompareOperation* expr) {
+void ExpressionCfgBuilder::VisitCompareOperation(CompareOperation* expr) {
   BAILOUT("CompareOperation");
 }
 
 
-void ExpressionBuilder::VisitThisFunction(ThisFunction* expr) {
+void ExpressionCfgBuilder::VisitThisFunction(ThisFunction* expr) {
   BAILOUT("ThisFunction");
 }
 
 #undef BAILOUT
-#undef CHECK_BAILOUT
 
 
 // Macros (temporarily) handling unsupported statement types.
 #define BAILOUT(reason)                         \
   do {                                          \
-    cfg_ = NULL;                                \
+    graph_ = NULL;                              \
     return;                                     \
   } while (false)
 
 #define CHECK_BAILOUT()                         \
-  if (cfg_ == NULL) { return; } else {}
+  if (graph() == NULL) { return; } else {}
 
-void StatementBuilder::VisitStatements(ZoneList<Statement*>* stmts) {
+void StatementCfgBuilder::VisitStatements(ZoneList<Statement*>* stmts) {
   for (int i = 0, len = stmts->length(); i < len; i++) {
     Visit(stmts->at(i));
     CHECK_BAILOUT();
-    if (!cfg_->has_exit()) return;
+    if (!graph()->has_exit()) return;
   }
 }
 
 
 // The statement builder should not be used for declarations or expressions.
-void StatementBuilder::VisitDeclaration(Declaration* decl) { UNREACHABLE(); }
+void StatementCfgBuilder::VisitDeclaration(Declaration* decl) { UNREACHABLE(); }
 
 #define DEFINE_VISIT(type)                                      \
-  void StatementBuilder::Visit##type(type* expr) { UNREACHABLE(); }
+  void StatementCfgBuilder::Visit##type(type* expr) { UNREACHABLE(); }
 EXPRESSION_NODE_LIST(DEFINE_VISIT)
 #undef DEFINE_VISIT
 
 
-void StatementBuilder::VisitBlock(Block* stmt) {
+void StatementCfgBuilder::VisitBlock(Block* stmt) {
   VisitStatements(stmt->statements());
 }
 
 
-void StatementBuilder::VisitExpressionStatement(ExpressionStatement* stmt) {
-  BAILOUT("ExpressionStatement");
+void StatementCfgBuilder::VisitExpressionStatement(ExpressionStatement* stmt) {
+  ExpressionCfgBuilder builder;
+  builder.Build(stmt->expression(), CfgGlobals::current()->nowhere());
+  if (builder.graph() == NULL) {
+    BAILOUT("unsupported expression in expression statement");
+  }
+  graph()->Append(new PositionInstr(stmt->statement_pos()));
+  graph()->Concatenate(builder.graph());
 }
 
 
-void StatementBuilder::VisitEmptyStatement(EmptyStatement* stmt) {
+void StatementCfgBuilder::VisitEmptyStatement(EmptyStatement* stmt) {
   // Nothing to do.
 }
 
 
-void StatementBuilder::VisitIfStatement(IfStatement* stmt) {
+void StatementCfgBuilder::VisitIfStatement(IfStatement* stmt) {
   BAILOUT("IfStatement");
 }
 
 
-void StatementBuilder::VisitContinueStatement(ContinueStatement* stmt) {
+void StatementCfgBuilder::VisitContinueStatement(ContinueStatement* stmt) {
   BAILOUT("ContinueStatement");
 }
 
 
-void StatementBuilder::VisitBreakStatement(BreakStatement* stmt) {
+void StatementCfgBuilder::VisitBreakStatement(BreakStatement* stmt) {
   BAILOUT("BreakStatement");
 }
 
 
-void StatementBuilder::VisitReturnStatement(ReturnStatement* stmt) {
-  ExpressionBuilder builder;
-  builder.Visit(stmt->expression());
-  Value* value = builder.value();
-  if (value == NULL) BAILOUT("unsupported expression type");
-  cfg_->AppendReturnInstruction(value);
+void StatementCfgBuilder::VisitReturnStatement(ReturnStatement* stmt) {
+  ExpressionCfgBuilder builder;
+  builder.Build(stmt->expression(), NULL);
+  if (builder.graph() == NULL) {
+    BAILOUT("unsupported expression in return statement");
+  }
+
+  graph()->Append(new PositionInstr(stmt->statement_pos()));
+  graph()->Concatenate(builder.graph());
+  graph()->AppendReturnInstruction(builder.value());
 }
 
 
-void StatementBuilder::VisitWithEnterStatement(WithEnterStatement* stmt) {
+void StatementCfgBuilder::VisitWithEnterStatement(WithEnterStatement* stmt) {
   BAILOUT("WithEnterStatement");
 }
 
 
-void StatementBuilder::VisitWithExitStatement(WithExitStatement* stmt) {
+void StatementCfgBuilder::VisitWithExitStatement(WithExitStatement* stmt) {
   BAILOUT("WithExitStatement");
 }
 
 
-void StatementBuilder::VisitSwitchStatement(SwitchStatement* stmt) {
+void StatementCfgBuilder::VisitSwitchStatement(SwitchStatement* stmt) {
   BAILOUT("SwitchStatement");
 }
 
 
-void StatementBuilder::VisitLoopStatement(LoopStatement* stmt) {
+void StatementCfgBuilder::VisitLoopStatement(LoopStatement* stmt) {
   BAILOUT("LoopStatement");
 }
 
 
-void StatementBuilder::VisitForInStatement(ForInStatement* stmt) {
+void StatementCfgBuilder::VisitForInStatement(ForInStatement* stmt) {
   BAILOUT("ForInStatement");
 }
 
 
-void StatementBuilder::VisitTryCatch(TryCatch* stmt) {
+void StatementCfgBuilder::VisitTryCatch(TryCatch* stmt) {
   BAILOUT("TryCatch");
 }
 
 
-void StatementBuilder::VisitTryFinally(TryFinally* stmt) {
+void StatementCfgBuilder::VisitTryFinally(TryFinally* stmt) {
   BAILOUT("TryFinally");
 }
 
 
-void StatementBuilder::VisitDebuggerStatement(DebuggerStatement* stmt) {
+void StatementCfgBuilder::VisitDebuggerStatement(DebuggerStatement* stmt) {
   BAILOUT("DebuggerStatement");
 }
 
@@ -424,20 +620,24 @@
 
 
 void Constant::Print() {
-  PrintF("Constant(");
+  PrintF("Constant ");
   handle_->Print();
-  PrintF(")");
+}
+
+
+void Nowhere::Print() {
+  PrintF("Nowhere");
 }
 
 
 void SlotLocation::Print() {
-  PrintF("Slot(");
+  PrintF("Slot ");
   switch (type_) {
     case Slot::PARAMETER:
-      PrintF("PARAMETER, %d)", index_);
+      PrintF("(PARAMETER, %d)", index_);
       break;
     case Slot::LOCAL:
-      PrintF("LOCAL, %d)", index_);
+      PrintF("(LOCAL, %d)", index_);
       break;
     default:
       UNREACHABLE();
@@ -445,9 +645,87 @@
 }
 
 
-void ReturnInstr::Print() {
-  PrintF("Return ");
+void TempLocation::Print() {
+  PrintF("Temp %d", number());
+}
+
+
+void OneOperandInstruction::Print() {
+  PrintF("(");
+  location()->Print();
+  PrintF(", ");
   value_->Print();
+  PrintF(")");
+}
+
+
+void TwoOperandInstruction::Print() {
+  PrintF("(");
+  location()->Print();
+  PrintF(", ");
+  value0_->Print();
+  PrintF(", ");
+  value1_->Print();
+  PrintF(")");
+}
+
+
+void MoveInstr::Print() {
+  PrintF("Move              ");
+  OneOperandInstruction::Print();
+  PrintF("\n");
+}
+
+
+void PropLoadInstr::Print() {
+  PrintF("PropLoad          ");
+  TwoOperandInstruction::Print();
+  PrintF("\n");
+}
+
+
+void BinaryOpInstr::Print() {
+  switch (op()) {
+    case Token::OR:
+      // Two character operand.
+      PrintF("BinaryOp[OR]      ");
+      break;
+    case Token::AND:
+    case Token::SHL:
+    case Token::SAR:
+    case Token::SHR:
+    case Token::ADD:
+    case Token::SUB:
+    case Token::MUL:
+    case Token::DIV:
+    case Token::MOD:
+      // Three character operands.
+      PrintF("BinaryOp[%s]     ", Token::Name(op()));
+      break;
+    case Token::COMMA:
+      // Five character operand.
+      PrintF("BinaryOp[COMMA]   ");
+      break;
+    case Token::BIT_OR:
+      // Six character operand.
+      PrintF("BinaryOp[BIT_OR]  ");
+      break;
+    case Token::BIT_XOR:
+    case Token::BIT_AND:
+      // Seven character operands.
+      PrintF("BinaryOp[%s] ", Token::Name(op()));
+      break;
+    default:
+      UNREACHABLE();
+  }
+  TwoOperandInstruction::Print();
+  PrintF("\n");
+}
+
+
+void ReturnInstr::Print() {
+  PrintF("Return            ");
+  OneOperandInstruction::Print();
   PrintF("\n");
 }
 
@@ -459,7 +737,7 @@
     for (int i = 0, len = instructions_.length(); i < len; i++) {
       instructions_[i]->Print();
     }
-    PrintF("Goto L%d\n\n", successor_->number());
+    PrintF("Goto              L%d\n\n", successor_->number());
     successor_->Print();
   }
 }
diff --git a/src/cfg.h b/src/cfg.h
index fb732dd..0eb0f92 100644
--- a/src/cfg.h
+++ b/src/cfg.h
@@ -34,6 +34,50 @@
 namespace internal {
 
 class ExitNode;
+class Location;
+
+// Translate a source AST into a control-flow graph (CFG).  The CFG contains
+// single-entry, single-exit blocks of straight-line instructions and
+// administrative nodes.
+//
+// Instructions are described by the following grammar.
+//
+// <Instruction> ::=
+//     Move <Location> <Value>
+//   | PropLoad <Location> <Value> <Value>
+//   | BinaryOp <Location> Token::Value <Value> <Value>
+//   | Return Nowhere <Value>
+//   | Position <Int>
+//
+// Values are trivial expressions:
+//
+// <Value> ::= Constant | <Location>
+//
+// Locations are storable values ('lvalues').  They can be slots,
+// compiler-generated temporaries, or the special location 'Nowhere'
+// indicating that no value is needed.
+//
+// <Location> ::=
+//     SlotLocation Slot::Type <Index>
+//   | TempLocation
+//   | Nowhere
+
+
+// Administrative nodes: There are several types of 'administrative' nodes
+// that do not contain instructions and do not necessarily have a single
+// predecessor and a single successor.
+//
+// EntryNode: there is a distinguished entry node that has no predecessors
+// and a single successor.
+//
+// ExitNode: there is a distinguished exit node that has arbitrarily many
+// predecessors and no successor.
+//
+// JoinNode: join nodes have multiple predecessors and a single successor.
+//
+// BranchNode: branch nodes have a single predecessor and multiple
+// successors.
+
 
 // A convenient class to keep 'global' values when building a CFG.  Since
 // CFG construction can be invoked recursively, CFG globals are stacked.
@@ -48,40 +92,64 @@
     return top_;
   }
 
+  // The function currently being compiled.
   FunctionLiteral* fun() { return global_fun_; }
 
+  // The shared global exit node for all exits from the function.
   ExitNode* exit() { return global_exit_; }
 
+  // A singleton.
+  Location* nowhere() { return nowhere_; }
+
 #ifdef DEBUG
-  int next_number() { return node_counter_++; }
+  int next_node_number() { return node_counter_++; }
+  int next_temp_number() { return temp_counter_++; }
 #endif
 
  private:
   static CfgGlobals* top_;
-
-  // Function literal currently compiling.
   FunctionLiteral* global_fun_;
-
-  // Shared global exit node for all returns from the same function.
   ExitNode* global_exit_;
+  Location* nowhere_;
 
 #ifdef DEBUG
-  // Used to number nodes when printing.
+  // Used to number nodes and temporaries when printing.
   int node_counter_;
+  int temp_counter_;
 #endif
 
   CfgGlobals* previous_;
 };
 
 
-// Values appear in instructions.  They represent trivial source
-// expressions: ones with no side effects and that do not require code to be
-// generated.
+class SlotLocation;
+
+// Values represent trivial source expressions: ones with no side effects
+// and that do not require code to be generated.
 class Value : public ZoneObject {
  public:
   virtual ~Value() {}
 
-  virtual void ToRegister(MacroAssembler* masm, Register reg) = 0;
+  // Predicates:
+
+  virtual bool is_temporary() { return false; }
+  virtual bool is_slot() { return false; }
+  virtual bool is_constant() { return false; }
+
+  // True if the value is a temporary allocated to the stack in
+  // fast-compilation mode.
+  virtual bool is_on_stack() { return false; }
+
+  // Support for fast-compilation mode:
+
+  // Move the value into a register.
+  virtual void Get(MacroAssembler* masm, Register reg) = 0;
+
+  // Push the value on the stack.
+  virtual void Push(MacroAssembler* masm) = 0;
+
+  // Move the value into a slot location.
+  virtual void MoveToSlot(MacroAssembler* masm, SlotLocation* loc) = 0;
 
 #ifdef DEBUG
   virtual void Print() = 0;
@@ -94,9 +162,22 @@
  public:
   explicit Constant(Handle<Object> handle) : handle_(handle) {}
 
-  virtual ~Constant() {}
+  // Cast accessor.
+  static Constant* cast(Value* value) {
+    ASSERT(value->is_constant());
+    return reinterpret_cast<Constant*>(value);
+  }
 
-  void ToRegister(MacroAssembler* masm, Register reg);
+  // Accessors.
+  Handle<Object> handle() { return handle_; }
+
+  // Predicates.
+  bool is_constant() { return true; }
+
+  // Support for fast-compilation mode.
+  void Get(MacroAssembler* masm, Register reg);
+  void Push(MacroAssembler* masm);
+  void MoveToSlot(MacroAssembler* masm, SlotLocation* loc);
 
 #ifdef DEBUG
   void Print();
@@ -112,7 +193,26 @@
  public:
   virtual ~Location() {}
 
-  virtual void ToRegister(MacroAssembler* masm, Register reg) = 0;
+  // Static factory function returning the singleton nowhere location.
+  static Location* Nowhere() {
+    return CfgGlobals::current()->nowhere();
+  }
+
+  // Support for fast-compilation mode:
+
+  // Assumes temporaries have been allocated.
+  virtual void Get(MacroAssembler* masm, Register reg) = 0;
+
+  // Store the value in a register to the location.  Assumes temporaries
+  // have been allocated.
+  virtual void Set(MacroAssembler* masm, Register reg) = 0;
+
+  // Assumes temporaries have been allocated, and if the value is a
+  // temporary it was not allocated to the stack.
+  virtual void Push(MacroAssembler* masm) = 0;
+
+  // Emit code to move a value into this location.
+  virtual void Move(MacroAssembler* masm, Value* value) = 0;
 
 #ifdef DEBUG
   virtual void Print() = 0;
@@ -120,13 +220,55 @@
 };
 
 
+// Nowhere is a special (singleton) location that indicates the value of a
+// computation is not needed (though its side effects are).
+class Nowhere : public Location {
+ public:
+  // We should not try to emit code to read Nowhere.
+  void Get(MacroAssembler* masm, Register reg) { UNREACHABLE(); }
+  void Push(MacroAssembler* masm) { UNREACHABLE(); }
+  void MoveToSlot(MacroAssembler* masm, SlotLocation* loc) { UNREACHABLE(); }
+
+  // Setting Nowhere is ignored.
+  void Set(MacroAssembler* masm, Register reg) {}
+  void Move(MacroAssembler* masm, Value* value) {}
+
+#ifdef DEBUG
+  void Print();
+#endif
+
+ private:
+  Nowhere() {}
+
+  friend class CfgGlobals;
+};
+
+
 // SlotLocations represent parameters and stack-allocated (i.e.,
 // non-context) local variables.
 class SlotLocation : public Location {
  public:
   SlotLocation(Slot::Type type, int index) : type_(type), index_(index) {}
 
-  void ToRegister(MacroAssembler* masm, Register reg);
+  // Cast accessor.
+  static SlotLocation* cast(Value* value) {
+    ASSERT(value->is_slot());
+    return reinterpret_cast<SlotLocation*>(value);
+  }
+
+  // Accessors.
+  Slot::Type type() { return type_; }
+  int index() { return index_; }
+
+  // Predicates.
+  bool is_slot() { return true; }
+
+  // Support for fast-compilation mode.
+  void Get(MacroAssembler* masm, Register reg);
+  void Set(MacroAssembler* masm, Register reg);
+  void Push(MacroAssembler* masm);
+  void Move(MacroAssembler* masm, Value* value);
+  void MoveToSlot(MacroAssembler* masm, SlotLocation* loc);
 
 #ifdef DEBUG
   void Print();
@@ -138,28 +280,241 @@
 };
 
 
+// TempLocations represent compiler generated temporaries.  They are
+// allocated to registers or memory either before code generation (in the
+// optimized-for-speed compiler) or on the fly during code generation (in
+// the optimized-for-space compiler).
+class TempLocation : public Location {
+ public:
+  // Fast-compilation mode allocation decisions.
+  enum Where {
+    NOT_ALLOCATED,  // Not yet allocated.
+    ACCUMULATOR,    // Allocated to the dedicated accumulator register.
+    STACK           //   "   "   "   "  stack.
+  };
+
+  TempLocation() : where_(NOT_ALLOCATED) {
+#ifdef DEBUG
+    number_ = -1;
+#endif
+  }
+
+  // Cast accessor.
+  static TempLocation* cast(Value* value) {
+    ASSERT(value->is_temporary());
+    return reinterpret_cast<TempLocation*>(value);
+  }
+
+  // Accessors.
+  Where where() { return where_; }
+  void set_where(Where where) {
+    ASSERT(where_ == TempLocation::NOT_ALLOCATED);
+    where_ = where;
+  }
+
+  // Predicates.
+  bool is_on_stack() { return where_ == STACK; }
+  bool is_temporary() { return true; }
+
+  // Support for fast-compilation mode.  Assume the temp has been allocated.
+  void Get(MacroAssembler* masm, Register reg);
+  void Set(MacroAssembler* masm, Register reg);
+  void Push(MacroAssembler* masm);
+  void Move(MacroAssembler* masm, Value* value);
+  void MoveToSlot(MacroAssembler* masm, SlotLocation* loc);
+
+#ifdef DEBUG
+  int number() {
+    if (number_ == -1) number_ = CfgGlobals::current()->next_temp_number();
+    return number_;
+  }
+
+  void Print();
+#endif
+
+ private:
+  Where where_;
+
+#ifdef DEBUG
+  int number_;
+#endif
+};
+
+
 // Instructions are computations.  The represent non-trivial source
 // expressions: typically ones that have side effects and require code to
 // be generated.
 class Instruction : public ZoneObject {
  public:
-  virtual ~Instruction() {}
+  // Accessors.
+  Location* location() { return location_; }
+  void set_location(Location* location) { location_ = location; }
 
+  // Support for fast-compilation mode:
+
+  // Emit code to perform the instruction.
   virtual void Compile(MacroAssembler* masm) = 0;
 
+  // Allocate a temporary which is the result of the immediate predecessor
+  // instruction.  It is allocated to the accumulator register if it is used
+  // as an operand to this instruction, otherwise to the stack.
+  virtual void FastAllocate(TempLocation* temp) = 0;
+
 #ifdef DEBUG
   virtual void Print() = 0;
 #endif
+
+ protected:
+  // Every instruction has a location where its result is stored (which may
+  // be Nowhere).
+  explicit Instruction(Location* location) : location_(location) {}
+
+  virtual ~Instruction() {}
+
+  Location* location_;
 };
 
 
-// Return a value.
-class ReturnInstr : public Instruction {
+// Base class of instructions that have no input operands.
+class ZeroOperandInstruction : public Instruction {
  public:
-  explicit ReturnInstr(Value* value) : value_(value) {}
+  // Support for fast-compilation mode:
+  virtual void Compile(MacroAssembler* masm) = 0;
+  void FastAllocate(TempLocation* temp);
 
-  virtual ~ReturnInstr() {}
+#ifdef DEBUG
+  // Printing support: print the operands (nothing).
+  virtual void Print() {}
+#endif
 
+ protected:
+  explicit ZeroOperandInstruction(Location* loc) : Instruction(loc) {}
+};
+
+
+// Base class of instructions that have a single input operand.
+class OneOperandInstruction : public Instruction {
+ public:
+  // Support for fast-compilation mode:
+  virtual void Compile(MacroAssembler* masm) = 0;
+  void FastAllocate(TempLocation* temp);
+
+#ifdef DEBUG
+  // Printing support: print the operands.
+  virtual void Print();
+#endif
+
+ protected:
+  OneOperandInstruction(Location* loc, Value* value)
+      : Instruction(loc), value_(value) {
+  }
+
+  Value* value_;
+};
+
+
+// Base class of instructions that have two input operands.
+class TwoOperandInstruction : public Instruction {
+ public:
+  // Support for fast-compilation mode:
+  virtual void Compile(MacroAssembler* masm) = 0;
+  void FastAllocate(TempLocation* temp);
+
+#ifdef DEBUG
+  // Printing support: print the operands.
+  virtual void Print();
+#endif
+
+ protected:
+  TwoOperandInstruction(Location* loc, Value* value0, Value* value1)
+      : Instruction(loc), value0_(value0), value1_(value1) {
+  }
+
+  Value* value0_;
+  Value* value1_;
+};
+
+
+// A phantom instruction that indicates the start of a statement.  It
+// causes the statement position to be recorded in the relocation
+// information but generates no code.
+class PositionInstr : public ZeroOperandInstruction {
+ public:
+  explicit PositionInstr(int pos)
+      : ZeroOperandInstruction(CfgGlobals::current()->nowhere()), pos_(pos) {
+  }
+
+  // Support for fast-compilation mode.
+  void Compile(MacroAssembler* masm);
+
+  // This should not be called.  The last instruction of the previous
+  // statement should not have a temporary as its location.
+  void FastAllocate(TempLocation* temp) { UNREACHABLE(); }
+
+#ifdef DEBUG
+  // Printing support.  Print nothing.
+  void Print() {}
+#endif
+
+ private:
+  int pos_;
+};
+
+
+// Move a value to a location.
+class MoveInstr : public OneOperandInstruction {
+ public:
+  MoveInstr(Location* loc, Value* value)
+      : OneOperandInstruction(loc, value) {
+  }
+
+  // Accessors.
+  Value* value() { return value_; }
+
+  // Support for fast-compilation mode.
+  void Compile(MacroAssembler* masm);
+
+#ifdef DEBUG
+  // Printing support.
+  void Print();
+#endif
+};
+
+
+// Load a property from a receiver, leaving the result in a location.
+class PropLoadInstr : public TwoOperandInstruction {
+ public:
+  PropLoadInstr(Location* loc, Value* object, Value* key)
+      : TwoOperandInstruction(loc, object, key) {
+  }
+
+  // Accessors.
+  Value* object() { return value0_; }
+  Value* key() { return value1_; }
+
+  // Support for fast-compilation mode.
+  void Compile(MacroAssembler* masm);
+
+#ifdef DEBUG
+  void Print();
+#endif
+};
+
+
+// Perform a (non-short-circuited) binary operation on a pair of values,
+// leaving the result in a location.
+class BinaryOpInstr : public TwoOperandInstruction {
+ public:
+  BinaryOpInstr(Location* loc, Token::Value op, Value* left, Value* right)
+      : TwoOperandInstruction(loc, left, right), op_(op) {
+  }
+
+  // Accessors.
+  Value* left() { return value0_; }
+  Value* right() { return value1_; }
+  Token::Value op() { return op_; }
+
+  // Support for fast-compilation mode.
   void Compile(MacroAssembler* masm);
 
 #ifdef DEBUG
@@ -167,13 +522,36 @@
 #endif
 
  private:
-  Value* value_;
+  Token::Value op_;
 };
 
 
-// Nodes make up control-flow graphs.  They contain single-entry,
-// single-exit blocks of instructions and administrative nodes making up the
-// graph structure.
+// Return a value.  Has the side effect of moving its value into the return
+// value register.  Can only occur as the last instruction in an instruction
+// block, and implies that the block is closed (cannot have instructions
+// appended or graph fragments concatenated to the end) and that the block's
+// successor is the global exit node for the current function.
+class ReturnInstr : public OneOperandInstruction {
+ public:
+  explicit ReturnInstr(Value* value)
+      : OneOperandInstruction(CfgGlobals::current()->nowhere(), value) {
+  }
+
+  virtual ~ReturnInstr() {}
+
+  // Accessors.
+  Value* value() { return value_; }
+
+  // Support for fast-compilation mode.
+  void Compile(MacroAssembler* masm);
+
+#ifdef DEBUG
+  void Print();
+#endif
+};
+
+
+// Nodes make up control-flow graphs.
 class CfgNode : public ZoneObject {
  public:
   CfgNode() : is_marked_(false) {
@@ -184,17 +562,26 @@
 
   virtual ~CfgNode() {}
 
+  // Because CFGs contain cycles, nodes support marking during traversal
+  // (e.g., for printing or compilation).  The traversal functions will mark
+  // unmarked nodes and backtrack if they encounter a marked one.  After a
+  // traversal, the graph should be explicitly unmarked by calling Unmark on
+  // the entry node.
   bool is_marked() { return is_marked_; }
-
-  virtual bool is_block() { return false; }
-
   virtual void Unmark() = 0;
 
+  // Predicates:
+
+  // True if the node is an instruction block.
+  virtual bool is_block() { return false; }
+
+  // Support for fast-compilation mode.  Emit the instructions or control
+  // flow represented by the node.
   virtual void Compile(MacroAssembler* masm) = 0;
 
 #ifdef DEBUG
   int number() {
-    if (number_ == -1) number_ = CfgGlobals::current()->next_number();
+    if (number_ == -1) number_ = CfgGlobals::current()->next_node_number();
     return number_;
   }
 
@@ -217,22 +604,30 @@
 
   virtual ~InstructionBlock() {}
 
+  void Unmark();
+
+  // Cast accessor.
   static InstructionBlock* cast(CfgNode* node) {
     ASSERT(node->is_block());
     return reinterpret_cast<InstructionBlock*>(node);
   }
 
+  bool is_block() { return true; }
+
+  // Accessors.
+  CfgNode* successor() { return successor_; }
+
   void set_successor(CfgNode* succ) {
     ASSERT(successor_ == NULL);
     successor_ = succ;
   }
 
-  bool is_block() { return true; }
+  ZoneList<Instruction*>* instructions() { return &instructions_; }
 
-  void Unmark();
-
+  // Support for fast-compilation mode.
   void Compile(MacroAssembler* masm);
 
+  // Add an instruction to the end of the block.
   void Append(Instruction* instr) { instructions_.Add(instr); }
 
 #ifdef DEBUG
@@ -245,9 +640,7 @@
 };
 
 
-// The CFG for a function has a distinguished entry node.  It has no
-// predecessors and a single successor.  The successor is the block
-// containing the function's first instruction.
+// An entry node (one per function).
 class EntryNode : public CfgNode {
  public:
   explicit EntryNode(InstructionBlock* succ) : successor_(succ) {}
@@ -256,6 +649,7 @@
 
   void Unmark();
 
+  // Support for fast-compilation mode.
   void Compile(MacroAssembler* masm);
 
 #ifdef DEBUG
@@ -267,9 +661,7 @@
 };
 
 
-// The CFG for a function has a distinguished exit node.  It has no
-// successor and arbitrarily many predecessors.  The predecessors are all
-// the blocks returning from the function.
+// An exit node (one per function).
 class ExitNode : public CfgNode {
  public:
   ExitNode() {}
@@ -278,6 +670,7 @@
 
   void Unmark();
 
+  // Support for fast-compilation mode.
   void Compile(MacroAssembler* masm);
 
 #ifdef DEBUG
@@ -286,28 +679,36 @@
 };
 
 
-// A CFG consists of a linked structure of nodes.  It has a single entry
-// node and optionally an exit node.  There is a distinguished global exit
-// node that is used as the successor of all blocks that return from the
-// function.
+// A CFG consists of a linked structure of nodes.  Nodes are linked by
+// pointing to their successors, always beginning with a (single) entry node
+// (not necessarily of type EntryNode).  If it is still possible to add
+// nodes to the end of the graph (i.e., there is a (single) path that does
+// not end with the global exit node), then the CFG has an exit node as
+// well.
 //
-// Fragments of control-flow graphs, produced when traversing the statements
-// and expressions in the source AST, are represented by the same class.
-// They have instruction blocks as both their entry and exit (if there is
-// one).  Instructions can always be prepended or appended to fragments, and
-// fragments can always be concatenated.
+// The empty CFG is represented by a NULL entry and a NULL exit.
 //
-// A singleton CFG fragment (i.e., with only one node) has the same node as
-// both entry and exit (if the exit is available).
+// We use the term 'open fragment' to mean a CFG whose entry and exits are
+// both instruction blocks.  It is always possible to add instructions and
+// nodes to the beginning or end of an open fragment.
+//
+// We use the term 'closed fragment' to mean a CFG whose entry is an
+// instruction block and whose exit is NULL (all paths go to the global
+// exit).
+//
+// We use the term 'fragment' to refer to a CFG that is known to be an open
+// or closed fragment.
 class Cfg : public ZoneObject {
  public:
-  // Create a singleton CFG fragment.
-  explicit Cfg(InstructionBlock* block) : entry_(block), exit_(block) {}
+  // Create an empty CFG fragment.
+  Cfg() : entry_(NULL), exit_(NULL) {}
 
-  // Build the CFG for a function.
+  // Build the CFG for a function.  The returned CFG begins with an
+  // EntryNode and all paths end with the ExitNode.
   static Cfg* Build();
 
-  // The entry and exit nodes.
+  // The entry and exit nodes of the CFG (not necessarily EntryNode and
+  // ExitNode).
   CfgNode* entry() { return entry_; }
   CfgNode* exit() { return exit_; }
 
@@ -318,18 +719,21 @@
   // concatenated to).
   bool has_exit() { return exit_ != NULL; }
 
-  // Add an entry node to a CFG fragment.  It is no longer a fragment
-  // (instructions cannot be prepended).
+  // Add an EntryNode to a CFG fragment.  It is no longer a fragment
+  // (instructions can no longer be prepended).
   void PrependEntryNode();
 
-  // Append an instruction to the end of a CFG fragment.  Assumes it has an
-  // available exit.
+  // Append an instruction to the end of an open fragment.
   void Append(Instruction* instr);
 
-  // Appends a return instruction to the end of a CFG fragment.  It no
-  // longer has an available exit node.
+  // Appends a return instruction to the end of an open fragment and make
+  // it a closed fragment (the exit's successor becomes global exit node).
   void AppendReturnInstruction(Value* value);
 
+  // Glue an other CFG fragment to the end of this (open) fragment.
+  void Concatenate(Cfg* other);
+
+  // Support for compilation.  Compile the entire CFG.
   Handle<Code> Compile(Handle<Script> script);
 
 #ifdef DEBUG
@@ -344,12 +748,85 @@
 };
 
 
-// An Expression Builder traverses a trivial expression and returns a value.
-class ExpressionBuilder : public AstVisitor {
+// An implementation of a set of locations (currently slot locations), most
+// of the operations are destructive.
+class LocationSet BASE_EMBEDDED {
  public:
-  ExpressionBuilder() : value_(new Constant(Handle<Object>::null())) {}
+  // Construct an empty location set.
+  LocationSet() : parameters_(0), locals_(0) {}
 
+  // Raw accessors.
+  uintptr_t parameters() { return parameters_; }
+  uintptr_t locals() { return locals_; }
+
+  // Make this the empty set.
+  void Empty() {
+    parameters_ = locals_ = 0;
+  }
+
+  // Insert an element.
+  void AddElement(SlotLocation* location) {
+    if (location->type() == Slot::PARAMETER) {
+      // Parameter indexes begin with -1 ('this').
+      ASSERT(location->index() < kBitsPerPointer - 1);
+      parameters_ |= (1 << (location->index() + 1));
+    } else {
+      ASSERT(location->type() == Slot::LOCAL);
+      ASSERT(location->index() < kBitsPerPointer);
+      locals_ |= (1 << location->index());
+    }
+  }
+
+  // (Destructively) compute the union with another set.
+  void Union(LocationSet* other) {
+    parameters_ |= other->parameters();
+    locals_ |= other->locals();
+  }
+
+  bool Contains(SlotLocation* location) {
+    if (location->type() == Slot::PARAMETER) {
+      ASSERT(location->index() < kBitsPerPointer - 1);
+      return (parameters_ & (1 << (location->index() + 1)));
+    } else {
+      ASSERT(location->type() == Slot::LOCAL);
+      ASSERT(location->index() < kBitsPerPointer);
+      return (locals_ & (1 << location->index()));
+    }
+  }
+
+ private:
+  uintptr_t parameters_;
+  uintptr_t locals_;
+};
+
+
+// An ExpressionCfgBuilder traverses an expression and returns an open CFG
+// fragment (currently a possibly empty list of instructions represented by
+// a singleton instruction block) and the expression's value.
+//
+// Failure to build the CFG is indicated by a NULL CFG.
+class ExpressionCfgBuilder : public AstVisitor {
+ public:
+  ExpressionCfgBuilder() : destination_(NULL), value_(NULL), graph_(NULL) {}
+
+  // Result accessors.
   Value* value() { return value_; }
+  Cfg* graph() { return graph_; }
+  LocationSet* assigned_vars() { return &assigned_vars_; }
+
+  // Build the cfg for an expression and remember its value.  The
+  // destination is a 'hint' where the value should go which may be ignored.
+  // NULL is used to indicate no preference.
+  //
+  // Concretely, if the expression needs to generate a temporary for its
+  // value, it should use the passed destination or generate one if NULL.
+  void Build(Expression* expr, Location* destination) {
+    value_ = NULL;
+    graph_ = new Cfg();
+    assigned_vars_.Empty();
+    destination_ = destination;
+    Visit(expr);
+  }
 
   // AST node visitors.
 #define DECLARE_VISIT(type) void Visit##type(type* node);
@@ -357,16 +834,24 @@
 #undef DECLARE_VISIT
 
  private:
+  // State for the visitor.  Input parameter:
+  Location* destination_;
+
+  // Output parameters:
   Value* value_;
+  Cfg* graph_;
+  LocationSet assigned_vars_;
 };
 
 
-// A StatementBuilder traverses a statement and returns a CFG.
-class StatementBuilder : public AstVisitor {
+// A StatementCfgBuilder maintains a CFG fragment accumulator.  When it
+// visits a statement, it concatenates the CFG for the statement to the end
+// of the accumulator.
+class StatementCfgBuilder : public AstVisitor {
  public:
-  StatementBuilder() : cfg_(new Cfg(new InstructionBlock())) {}
+  StatementCfgBuilder() : graph_(new Cfg()) {}
 
-  Cfg* cfg() { return cfg_; }
+  Cfg* graph() { return graph_; }
 
   void VisitStatements(ZoneList<Statement*>* stmts);
 
@@ -376,7 +861,8 @@
 #undef DECLARE_VISIT
 
  private:
-  Cfg* cfg_;
+  // State for the visitor.  Input/output parameter:
+  Cfg* graph_;
 };
 
 
diff --git a/src/d8.cc b/src/d8.cc
index e02c80a..be3615b 100644
--- a/src/d8.cc
+++ b/src/d8.cc
@@ -146,19 +146,22 @@
 
 
 Handle<Value> Shell::Print(const Arguments& args) {
-  bool first = true;
+  Handle<Value> val = Write(args);
+  printf("\n");
+  return val;
+}
+
+
+Handle<Value> Shell::Write(const Arguments& args) {
   for (int i = 0; i < args.Length(); i++) {
     HandleScope handle_scope;
-    if (first) {
-      first = false;
-    } else {
+    if (i != 0) {
       printf(" ");
     }
     v8::String::Utf8Value str(args[i]);
     const char* cstr = ToCString(str);
     printf("%s", cstr);
   }
-  printf("\n");
   return Undefined();
 }
 
@@ -399,6 +402,7 @@
   HandleScope scope;
   Handle<ObjectTemplate> global_template = ObjectTemplate::New();
   global_template->Set(String::New("print"), FunctionTemplate::New(Print));
+  global_template->Set(String::New("write"), FunctionTemplate::New(Write));
   global_template->Set(String::New("read"), FunctionTemplate::New(Read));
   global_template->Set(String::New("load"), FunctionTemplate::New(Load));
   global_template->Set(String::New("quit"), FunctionTemplate::New(Quit));
@@ -588,6 +592,8 @@
   Handle<ObjectTemplate> global_template = ObjectTemplate::New();
   global_template->Set(String::New("print"),
                        FunctionTemplate::New(Shell::Print));
+  global_template->Set(String::New("write"),
+                       FunctionTemplate::New(Shell::Write));
   global_template->Set(String::New("read"),
                        FunctionTemplate::New(Shell::Read));
   global_template->Set(String::New("load"),
diff --git a/src/d8.h b/src/d8.h
index 092e3a3..1bcc879 100644
--- a/src/d8.h
+++ b/src/d8.h
@@ -138,6 +138,7 @@
 #endif
 
   static Handle<Value> Print(const Arguments& args);
+  static Handle<Value> Write(const Arguments& args);
   static Handle<Value> Yield(const Arguments& args);
   static Handle<Value> Quit(const Arguments& args);
   static Handle<Value> Version(const Arguments& args);
diff --git a/src/heap.cc b/src/heap.cc
index d813ed1..8ea1136 100644
--- a/src/heap.cc
+++ b/src/heap.cc
@@ -74,7 +74,7 @@
 int Heap::old_generation_size_ = 128*MB;
 int Heap::initial_semispace_size_ = 128*KB;
 #else
-int Heap::semispace_size_  = 8*MB;
+int Heap::semispace_size_  = 4*MB;
 int Heap::old_generation_size_ = 512*MB;
 int Heap::initial_semispace_size_ = 512*KB;
 #endif
@@ -641,11 +641,11 @@
 
   if (new_space_.Capacity() < new_space_.MaximumCapacity() &&
       survived_since_last_expansion_ > new_space_.Capacity()) {
-    // Double the size of new space if there is room to grow and enough
+    // Grow the size of new space if there is room to grow and enough
     // data has survived scavenge since the last expansion.
-    // TODO(1240712): NewSpace::Double has a return value which is
+    // TODO(1240712): NewSpace::Grow has a return value which is
     // ignored here.
-    new_space_.Double();
+    new_space_.Grow();
     survived_since_last_expansion_ = 0;
   }
 
@@ -1669,7 +1669,7 @@
   int length = end - start;
 
   // If the resulting string is small make a sub string.
-  if (end - start <= String::kMinNonFlatLength) {
+  if (length <= String::kMinNonFlatLength) {
     return Heap::AllocateSubString(buffer, start, end);
   }
 
diff --git a/src/ia32/builtins-ia32.cc b/src/ia32/builtins-ia32.cc
index 3cafd90..a70a9d2 100644
--- a/src/ia32/builtins-ia32.cc
+++ b/src/ia32/builtins-ia32.cc
@@ -140,7 +140,7 @@
     ExternalReference new_space_allocation_limit =
         ExternalReference::new_space_allocation_limit_address();
     __ cmp(edi, Operand::StaticVariable(new_space_allocation_limit));
-    __ j(greater_equal, &rt_call);
+    __ j(above_equal, &rt_call);
     // Allocated the JSObject, now initialize the fields.
     // eax: initial map
     // ebx: JSObject
@@ -175,8 +175,8 @@
     __ or_(Operand(ebx), Immediate(kHeapObjectTag));
     __ mov(Operand::StaticVariable(new_space_allocation_top), edi);
 
-    // Check if a properties array should be setup and allocate one if needed.
-    // Otherwise initialize the properties to the empty_fixed_array as well.
+    // Check if a non-empty properties array is needed.
+    // Allocate and initialize a FixedArray if it is.
     // eax: initial map
     // ebx: JSObject
     // edi: start of next object
@@ -184,21 +184,19 @@
     __ movzx_b(ecx, FieldOperand(eax, Map::kInObjectPropertiesOffset));
     // Calculate unused properties past the end of the in-object properties.
     __ sub(edx, Operand(ecx));
-    __ test(edx, Operand(edx));
     // Done if no extra properties are to be allocated.
     __ j(zero, &allocated);
 
     // Scale the number of elements by pointer size and add the header for
     // FixedArrays to the start of the next object calculation from above.
-    // eax: initial map
     // ebx: JSObject
     // edi: start of next object (will be start of FixedArray)
     // edx: number of elements in properties array
     ASSERT(Heap::MaxObjectSizeInPagedSpace() >
            (FixedArray::kHeaderSize + 255*kPointerSize));
-    __ lea(ecx, Operand(edi, edx, times_4, FixedArray::kHeaderSize));
+    __ lea(ecx, Operand(edi, edx, times_pointer_size, FixedArray::kHeaderSize));
     __ cmp(ecx, Operand::StaticVariable(new_space_allocation_limit));
-    __ j(greater_equal, &undo_allocation);
+    __ j(above_equal, &undo_allocation);
     __ mov(Operand::StaticVariable(new_space_allocation_top), ecx);
 
     // Initialize the FixedArray.
@@ -223,7 +221,7 @@
       __ add(Operand(eax), Immediate(kPointerSize));
       __ bind(&entry);
       __ cmp(eax, Operand(ecx));
-      __ j(less, &loop);
+      __ j(below, &loop);
     }
 
     // Store the initialized FixedArray into the properties field of
diff --git a/src/ia32/cfg-ia32.cc b/src/ia32/cfg-ia32.cc
index 01ee5e6..58985a5 100644
--- a/src/ia32/cfg-ia32.cc
+++ b/src/ia32/cfg-ia32.cc
@@ -29,6 +29,7 @@
 
 #include "cfg.h"
 #include "codegen-inl.h"
+#include "codegen-ia32.h"
 #include "macro-assembler-ia32.h"
 
 namespace v8 {
@@ -42,6 +43,14 @@
   {
     Comment cmt(masm, "[ InstructionBlock");
     for (int i = 0, len = instructions_.length(); i < len; i++) {
+      // If the location of the current instruction is a temp, then the
+      // instruction cannot be in tail position in the block.  Allocate the
+      // temp based on peeking ahead to the next instruction.
+      Instruction* instr = instructions_[i];
+      Location* loc = instr->location();
+      if (loc->is_temporary()) {
+        instructions_[i+1]->FastAllocate(TempLocation::cast(loc));
+      }
       instructions_[i]->Compile(masm);
     }
   }
@@ -79,6 +88,7 @@
   }
   successor_->Compile(masm);
   if (FLAG_check_stack) {
+    Comment cmnt(masm, "[ Deferred Stack Check");
     __ bind(&deferred_enter);
     StackCheckStub stub;
     __ CallStub(&stub);
@@ -103,31 +113,199 @@
 }
 
 
-void ReturnInstr::Compile(MacroAssembler* masm) {
-  Comment cmnt(masm, "[ ReturnInstr");
-  value_->ToRegister(masm, eax);
+void PropLoadInstr::Compile(MacroAssembler* masm) {
+  // The key should not be on the stack---if it is a compiler-generated
+  // temporary it is in the accumulator.
+  ASSERT(!key()->is_on_stack());
+
+  Comment cmnt(masm, "[ Load from Property");
+  // If the key is known at compile-time we may be able to use a load IC.
+  bool is_keyed_load = true;
+  if (key()->is_constant()) {
+    // Still use the keyed load IC if the key can be parsed as an integer so
+    // we will get into the case that handles [] on string objects.
+    Handle<Object> key_val = Constant::cast(key())->handle();
+    uint32_t ignored;
+    if (key_val->IsSymbol() &&
+        !String::cast(*key_val)->AsArrayIndex(&ignored)) {
+      is_keyed_load = false;
+    }
+  }
+
+  if (!object()->is_on_stack()) object()->Push(masm);
+  // A test eax instruction after the call indicates to the IC code that it
+  // was inlined.  Ensure there is not one here.
+  if (is_keyed_load) {
+    key()->Push(masm);
+    Handle<Code> ic(Builtins::builtin(Builtins::KeyedLoadIC_Initialize));
+    __ call(ic, RelocInfo::CODE_TARGET);
+    __ pop(ebx);  // Discard key.
+  } else {
+    key()->Get(masm, ecx);
+    Handle<Code> ic(Builtins::builtin(Builtins::LoadIC_Initialize));
+    __ call(ic, RelocInfo::CODE_TARGET);
+  }
+  __ pop(ebx);  // Discard receiver.
+  location()->Set(masm, eax);
 }
 
 
-void Constant::ToRegister(MacroAssembler* masm, Register reg) {
+void BinaryOpInstr::Compile(MacroAssembler* masm) {
+  // The right-hand value should not be on the stack---if it is a
+  // compiler-generated temporary it is in the accumulator.
+  ASSERT(!right()->is_on_stack());
+
+  Comment cmnt(masm, "[ BinaryOpInstr");
+  // We can overwrite one of the operands if it is a temporary.
+  OverwriteMode mode = NO_OVERWRITE;
+  if (left()->is_temporary()) {
+    mode = OVERWRITE_LEFT;
+  } else if (right()->is_temporary()) {
+    mode = OVERWRITE_RIGHT;
+  }
+
+  // Push both operands and call the specialized stub.
+  if (!left()->is_on_stack()) left()->Push(masm);
+  right()->Push(masm);
+  GenericBinaryOpStub stub(op(), mode, SMI_CODE_IN_STUB);
+  __ CallStub(&stub);
+  location()->Set(masm, eax);
+}
+
+
+void ReturnInstr::Compile(MacroAssembler* masm) {
+  // The location should be 'Effect'.  As a side effect, move the value to
+  // the accumulator.
+  Comment cmnt(masm, "[ ReturnInstr");
+  value_->Get(masm, eax);
+}
+
+
+void Constant::Get(MacroAssembler* masm, Register reg) {
   __ mov(reg, Immediate(handle_));
 }
 
 
-void SlotLocation::ToRegister(MacroAssembler* masm, Register reg) {
-  switch (type_) {
+void Constant::Push(MacroAssembler* masm) {
+  __ push(Immediate(handle_));
+}
+
+
+static Operand ToOperand(SlotLocation* loc) {
+  switch (loc->type()) {
     case Slot::PARAMETER: {
       int count = CfgGlobals::current()->fun()->scope()->num_parameters();
-      __ mov(reg, Operand(ebp, (1 + count - index_) * kPointerSize));
-      break;
+      return Operand(ebp, (1 + count - loc->index()) * kPointerSize);
     }
     case Slot::LOCAL: {
       const int kOffset = JavaScriptFrameConstants::kLocal0Offset;
-      __ mov(reg, Operand(ebp, kOffset - index_ * kPointerSize));
-      break;
+      return Operand(ebp, kOffset - loc->index() * kPointerSize);
     }
     default:
       UNREACHABLE();
+      return Operand(eax);
+  }
+}
+
+
+void Constant::MoveToSlot(MacroAssembler* masm, SlotLocation* loc) {
+  __ mov(ToOperand(loc), Immediate(handle_));
+}
+
+
+void SlotLocation::Get(MacroAssembler* masm, Register reg) {
+  __ mov(reg, ToOperand(this));
+}
+
+
+void SlotLocation::Set(MacroAssembler* masm, Register reg) {
+  __ mov(ToOperand(this), reg);
+}
+
+
+void SlotLocation::Push(MacroAssembler* masm) {
+  __ push(ToOperand(this));
+}
+
+
+void SlotLocation::Move(MacroAssembler* masm, Value* value) {
+  // We dispatch to the value because in some cases (temp or constant)
+  // we can use a single instruction.
+  value->MoveToSlot(masm, this);
+}
+
+
+void SlotLocation::MoveToSlot(MacroAssembler* masm, SlotLocation* loc) {
+  // The accumulator is not live across a MoveInstr.
+  __ mov(eax, ToOperand(this));
+  __ mov(ToOperand(loc), eax);
+}
+
+
+void TempLocation::Get(MacroAssembler* masm, Register reg) {
+  switch (where_) {
+    case ACCUMULATOR:
+      if (!reg.is(eax)) __ mov(reg, eax);
+      break;
+    case STACK:
+      __ pop(reg);
+      break;
+    case NOT_ALLOCATED:
+      UNREACHABLE();
+  }
+}
+
+
+void TempLocation::Set(MacroAssembler* masm, Register reg) {
+  switch (where_) {
+    case ACCUMULATOR:
+      if (!reg.is(eax)) __ mov(eax, reg);
+      break;
+    case STACK:
+      __ push(reg);
+      break;
+    case NOT_ALLOCATED:
+      UNREACHABLE();
+  }
+}
+
+
+void TempLocation::Push(MacroAssembler* masm) {
+  switch (where_) {
+    case ACCUMULATOR:
+      __ push(eax);
+      break;
+    case STACK:
+    case NOT_ALLOCATED:
+      UNREACHABLE();
+  }
+}
+
+
+void TempLocation::Move(MacroAssembler* masm, Value* value) {
+  switch (where_) {
+    case ACCUMULATOR:
+      value->Get(masm, eax);
+      break;
+    case STACK:
+      value->Push(masm);
+      break;
+    case NOT_ALLOCATED:
+      UNREACHABLE();
+  }
+}
+
+
+void TempLocation::MoveToSlot(MacroAssembler* masm, SlotLocation* loc) {
+  switch (where_) {
+    case ACCUMULATOR:
+      __ mov(ToOperand(loc), eax);
+      break;
+    case STACK:
+      __ pop(ToOperand(loc));
+      break;
+    case NOT_ALLOCATED:
+      UNREACHABLE();
   }
 }
 
diff --git a/src/ia32/codegen-ia32.cc b/src/ia32/codegen-ia32.cc
index c99bc6f..9542b16 100644
--- a/src/ia32/codegen-ia32.cc
+++ b/src/ia32/codegen-ia32.cc
@@ -754,9 +754,9 @@
  public:
   // Code pattern for loading a floating point value. Input value must
   // be either a smi or a heap number object (fp value). Requirements:
-  // operand on TOS+1. Returns operand as floating point number on FPU
-  // stack.
-  static void LoadFloatOperand(MacroAssembler* masm, Register scratch);
+  // operand in register number. Returns operand as floating point number
+  // on FPU stack.
+  static void LoadFloatOperand(MacroAssembler* masm, Register number);
   // Code pattern for loading floating point values. Input values must
   // be either smi or heap number objects (fp values). Requirements:
   // operand_1 on TOS+1 , operand_2 on TOS+2; Returns operands as
@@ -778,57 +778,6 @@
 };
 
 
-// Flag that indicates whether or not the code that handles smi arguments
-// should be placed in the stub, inlined, or omitted entirely.
-enum GenericBinaryFlags {
-  SMI_CODE_IN_STUB,
-  SMI_CODE_INLINED
-};
-
-
-class GenericBinaryOpStub: public CodeStub {
- public:
-  GenericBinaryOpStub(Token::Value op,
-                      OverwriteMode mode,
-                      GenericBinaryFlags flags)
-      : op_(op), mode_(mode), flags_(flags) {
-    ASSERT(OpBits::is_valid(Token::NUM_TOKENS));
-  }
-
-  void GenerateSmiCode(MacroAssembler* masm, Label* slow);
-
- private:
-  Token::Value op_;
-  OverwriteMode mode_;
-  GenericBinaryFlags flags_;
-
-  const char* GetName();
-
-#ifdef DEBUG
-  void Print() {
-    PrintF("GenericBinaryOpStub (op %s), (mode %d, flags %d)\n",
-           Token::String(op_),
-           static_cast<int>(mode_),
-           static_cast<int>(flags_));
-  }
-#endif
-
-  // Minor key encoding in 16 bits FOOOOOOOOOOOOOMM.
-  class ModeBits: public BitField<OverwriteMode, 0, 2> {};
-  class OpBits: public BitField<Token::Value, 2, 13> {};
-  class FlagBits: public BitField<GenericBinaryFlags, 15, 1> {};
-
-  Major MajorKey() { return GenericBinaryOp; }
-  int MinorKey() {
-    // Encode the parameters in a unique 16 bit value.
-    return OpBits::encode(op_)
-           | ModeBits::encode(mode_)
-           | FlagBits::encode(flags_);
-  }
-  void Generate(MacroAssembler* masm);
-};
-
-
 const char* GenericBinaryOpStub::GetName() {
   switch (op_) {
     case Token::ADD: return "GenericBinaryOpStub_ADD";
@@ -5215,8 +5164,11 @@
   }
 
   // Go slow case if argument to operation is out of range.
+  Result eax_reg = allocator_->Allocate(eax);
+  ASSERT(eax_reg.is_valid());
   __ fnstsw_ax();
   __ sahf();
+  eax_reg.Unuse();
   call_runtime.Branch(parity_even, not_taken);
 
   // Allocate heap number for result if possible.
@@ -7015,19 +6967,19 @@
 
 
 void FloatingPointHelper::LoadFloatOperand(MacroAssembler* masm,
-                                           Register scratch) {
+                                           Register number) {
   Label load_smi, done;
 
-  __ test(scratch, Immediate(kSmiTagMask));
+  __ test(number, Immediate(kSmiTagMask));
   __ j(zero, &load_smi, not_taken);
-  __ fld_d(FieldOperand(scratch, HeapNumber::kValueOffset));
+  __ fld_d(FieldOperand(number, HeapNumber::kValueOffset));
   __ jmp(&done);
 
   __ bind(&load_smi);
-  __ sar(scratch, kSmiTagSize);
-  __ push(scratch);
+  __ sar(number, kSmiTagSize);
+  __ push(number);
   __ fild_s(Operand(esp, 0));
-  __ pop(scratch);
+  __ pop(number);
 
   __ bind(&done);
 }
diff --git a/src/ia32/codegen-ia32.h b/src/ia32/codegen-ia32.h
index 2abe422..1d0cc8b 100644
--- a/src/ia32/codegen-ia32.h
+++ b/src/ia32/codegen-ia32.h
@@ -609,6 +609,57 @@
 };
 
 
+// Flag that indicates whether or not the code that handles smi arguments
+// should be placed in the stub, inlined, or omitted entirely.
+enum GenericBinaryFlags {
+  SMI_CODE_IN_STUB,
+  SMI_CODE_INLINED
+};
+
+
+class GenericBinaryOpStub: public CodeStub {
+ public:
+  GenericBinaryOpStub(Token::Value op,
+                      OverwriteMode mode,
+                      GenericBinaryFlags flags)
+      : op_(op), mode_(mode), flags_(flags) {
+    ASSERT(OpBits::is_valid(Token::NUM_TOKENS));
+  }
+
+  void GenerateSmiCode(MacroAssembler* masm, Label* slow);
+
+ private:
+  Token::Value op_;
+  OverwriteMode mode_;
+  GenericBinaryFlags flags_;
+
+  const char* GetName();
+
+#ifdef DEBUG
+  void Print() {
+    PrintF("GenericBinaryOpStub (op %s), (mode %d, flags %d)\n",
+           Token::String(op_),
+           static_cast<int>(mode_),
+           static_cast<int>(flags_));
+  }
+#endif
+
+  // Minor key encoding in 16 bits FOOOOOOOOOOOOOMM.
+  class ModeBits: public BitField<OverwriteMode, 0, 2> {};
+  class OpBits: public BitField<Token::Value, 2, 13> {};
+  class FlagBits: public BitField<GenericBinaryFlags, 15, 1> {};
+
+  Major MajorKey() { return GenericBinaryOp; }
+  int MinorKey() {
+    // Encode the parameters in a unique 16 bit value.
+    return OpBits::encode(op_)
+           | ModeBits::encode(mode_)
+           | FlagBits::encode(flags_);
+  }
+  void Generate(MacroAssembler* masm);
+};
+
+
 } }  // namespace v8::internal
 
 #endif  // V8_IA32_CODEGEN_IA32_H_
diff --git a/src/ia32/cpu-ia32.cc b/src/ia32/cpu-ia32.cc
index 82a5565..2107ad9 100644
--- a/src/ia32/cpu-ia32.cc
+++ b/src/ia32/cpu-ia32.cc
@@ -27,6 +27,10 @@
 
 // CPU specific code for ia32 independent of OS goes here.
 
+#ifdef __GNUC__
+#include "third_party/valgrind/valgrind.h"
+#endif
+
 #include "v8.h"
 
 #include "cpu.h"
@@ -49,6 +53,15 @@
 
   // If flushing of the instruction cache becomes necessary Windows has the
   // API function FlushInstructionCache.
+
+  // By default, valgrind only checks the stack for writes that might need to
+  // invalidate already cached translated code.  This leads to random
+  // instability when code patches or moves are sometimes unnoticed.  One
+  // solution is to run valgrind with --smc-check=all, but this comes at a big
+  // performance cost.  We can notify valgrind to invalidate its cache.
+#ifdef VALGRIND_DISCARD_TRANSLATIONS
+  VALGRIND_DISCARD_TRANSLATIONS(start, size);
+#endif
 }
 
 
diff --git a/src/ia32/jump-target-ia32.cc b/src/ia32/jump-target-ia32.cc
index 587fb2d..c3f2bc1 100644
--- a/src/ia32/jump-target-ia32.cc
+++ b/src/ia32/jump-target-ia32.cc
@@ -362,6 +362,70 @@
   __ bind(&entry_label_);
 }
 
+
+void BreakTarget::Jump() {
+  // Drop leftover statement state from the frame before merging, without
+  // emitting code.
+  ASSERT(cgen()->has_valid_frame());
+  int count = cgen()->frame()->height() - expected_height_;
+  cgen()->frame()->ForgetElements(count);
+  DoJump();
+}
+
+
+void BreakTarget::Jump(Result* arg) {
+  // Drop leftover statement state from the frame before merging, without
+  // emitting code.
+  ASSERT(cgen()->has_valid_frame());
+  int count = cgen()->frame()->height() - expected_height_;
+  cgen()->frame()->ForgetElements(count);
+  cgen()->frame()->Push(arg);
+  DoJump();
+}
+
+
+void BreakTarget::Bind() {
+#ifdef DEBUG
+  // All the forward-reaching frames should have been adjusted at the
+  // jumps to this target.
+  for (int i = 0; i < reaching_frames_.length(); i++) {
+    ASSERT(reaching_frames_[i] == NULL ||
+           reaching_frames_[i]->height() == expected_height_);
+  }
+#endif
+  // Drop leftover statement state from the frame before merging, even on
+  // the fall through.  This is so we can bind the return target with state
+  // on the frame.
+  if (cgen()->has_valid_frame()) {
+    int count = cgen()->frame()->height() - expected_height_;
+    cgen()->frame()->ForgetElements(count);
+  }
+  DoBind();
+}
+
+
+void BreakTarget::Bind(Result* arg) {
+#ifdef DEBUG
+  // All the forward-reaching frames should have been adjusted at the
+  // jumps to this target.
+  for (int i = 0; i < reaching_frames_.length(); i++) {
+    ASSERT(reaching_frames_[i] == NULL ||
+           reaching_frames_[i]->height() == expected_height_ + 1);
+  }
+#endif
+  // Drop leftover statement state from the frame before merging, even on
+  // the fall through.  This is so we can bind the return target with state
+  // on the frame.
+  if (cgen()->has_valid_frame()) {
+    int count = cgen()->frame()->height() - expected_height_;
+    cgen()->frame()->ForgetElements(count);
+    cgen()->frame()->Push(arg);
+  }
+  DoBind();
+  *arg = cgen()->frame()->Pop();
+}
+
+
 #undef __
 
 
diff --git a/src/ia32/macro-assembler-ia32.cc b/src/ia32/macro-assembler-ia32.cc
index de0ef8e..7782aa9 100644
--- a/src/ia32/macro-assembler-ia32.cc
+++ b/src/ia32/macro-assembler-ia32.cc
@@ -54,41 +54,47 @@
                               Register scratch) {
   Label fast;
 
-  // Compute the page address from the heap object pointer, leave it
-  // in 'object'.
+  // Compute the page start address from the heap object pointer, and reuse
+  // the 'object' register for it.
   masm->and_(object, ~Page::kPageAlignmentMask);
+  Register page_start = object;
 
-  // Compute the bit addr in the remembered set, leave it in "addr".
-  masm->sub(addr, Operand(object));
+  // Compute the bit addr in the remembered set/index of the pointer in the
+  // page. Reuse 'addr' as pointer_offset.
+  masm->sub(addr, Operand(page_start));
   masm->shr(addr, kObjectAlignmentBits);
+  Register pointer_offset = addr;
 
   // If the bit offset lies beyond the normal remembered set range, it is in
   // the extra remembered set area of a large object.
-  masm->cmp(addr, Page::kPageSize / kPointerSize);
+  masm->cmp(pointer_offset, Page::kPageSize / kPointerSize);
   masm->j(less, &fast);
 
-  // Adjust 'addr' to be relative to the start of the extra remembered set
-  // and the page address in 'object' to be the address of the extra
-  // remembered set.
-  masm->sub(Operand(addr), Immediate(Page::kPageSize / kPointerSize));
-  // Load the array length into 'scratch' and multiply by four to get the
-  // size in bytes of the elements.
-  masm->mov(scratch, Operand(object, Page::kObjectStartOffset
-                                     + FixedArray::kLengthOffset));
-  masm->shl(scratch, kObjectAlignmentBits);
-  // Add the page header, array header, and array body size to the page
-  // address.
-  masm->add(Operand(object), Immediate(Page::kObjectStartOffset
-                                       + FixedArray::kHeaderSize));
-  masm->add(object, Operand(scratch));
+  // Adjust 'page_start' so that addressing using 'pointer_offset' hits the
+  // extra remembered set after the large object.
 
+  // Find the length of the large object (FixedArray).
+  masm->mov(scratch, Operand(page_start, Page::kObjectStartOffset
+                                         + FixedArray::kLengthOffset));
+  Register array_length = scratch;
+
+  // Extra remembered set starts right after the large object (a FixedArray), at
+  //   page_start + kObjectStartOffset + objectSize
+  // where objectSize is FixedArray::kHeaderSize + kPointerSize * array_length.
+  // Add the delta between the end of the normal RSet and the start of the
+  // extra RSet to 'object', so that addressing the bit using 'pointer_offset'
+  // hits the extra RSet words.
+  masm->lea(page_start,
+            Operand(page_start, array_length, times_pointer_size,
+                    Page::kObjectStartOffset + FixedArray::kHeaderSize
+                        - Page::kRSetEndOffset));
 
   // NOTE: For now, we use the bit-test-and-set (bts) x86 instruction
   // to limit code size. We should probably evaluate this decision by
   // measuring the performance of an equivalent implementation using
   // "simpler" instructions
   masm->bind(&fast);
-  masm->bts(Operand(object, 0), addr);
+  masm->bts(Operand(page_start, Page::kRSetOffset), pointer_offset);
 }
 
 
diff --git a/src/ia32/regexp-macro-assembler-ia32.cc b/src/ia32/regexp-macro-assembler-ia32.cc
index 2129fd1..a49c1f5 100644
--- a/src/ia32/regexp-macro-assembler-ia32.cc
+++ b/src/ia32/regexp-macro-assembler-ia32.cc
@@ -752,7 +752,7 @@
 
   // Preempt-code
   if (check_preempt_label_.is_linked()) {
-    __ bind(&check_preempt_label_);
+    SafeCallTarget(&check_preempt_label_);
 
     __ push(backtrack_stackpointer());
     __ push(edi);
@@ -772,7 +772,7 @@
 
   // Backtrack stack overflow code.
   if (stack_overflow_label_.is_linked()) {
-    __ bind(&stack_overflow_label_);
+    SafeCallTarget(&stack_overflow_label_);
     // Reached if the backtrack-stack limit has been hit.
 
     Label grow_failed;
@@ -1249,17 +1249,19 @@
 
 
 void RegExpMacroAssemblerIA32::SafeCall(Label* to) {
-  Label return_to;
-  __ push(Immediate::CodeRelativeOffset(&return_to));
-  __ jmp(to);
-  __ bind(&return_to);
+  __ call(to);
 }
 
 
 void RegExpMacroAssemblerIA32::SafeReturn() {
-  __ pop(ebx);
-  __ add(Operand(ebx), Immediate(masm_->CodeObject()));
-  __ jmp(Operand(ebx));
+  __ add(Operand(esp, 0), Immediate(masm_->CodeObject()));
+  __ ret(0);
+}
+
+
+void RegExpMacroAssemblerIA32::SafeCallTarget(Label* name) {
+  __ bind(name);
+  __ sub(Operand(esp, 0), Immediate(masm_->CodeObject()));
 }
 
 
diff --git a/src/ia32/regexp-macro-assembler-ia32.h b/src/ia32/regexp-macro-assembler-ia32.h
index a06700a..c3d9155 100644
--- a/src/ia32/regexp-macro-assembler-ia32.h
+++ b/src/ia32/regexp-macro-assembler-ia32.h
@@ -227,6 +227,7 @@
   // is GC-safe (i.e., doesn't leave absolute code addresses on the stack)
   inline void SafeCall(Label* to);
   inline void SafeReturn();
+  inline void SafeCallTarget(Label* name);
 
   // Pushes the value of a register on the backtrack stack. Decrements the
   // stack pointer (ecx) by a word size and stores the register's value there.
diff --git a/src/ia32/virtual-frame-ia32.cc b/src/ia32/virtual-frame-ia32.cc
index 0854636..1b8232f 100644
--- a/src/ia32/virtual-frame-ia32.cc
+++ b/src/ia32/virtual-frame-ia32.cc
@@ -965,6 +965,7 @@
 
 
 void VirtualFrame::Drop(int count) {
+  ASSERT(count >= 0);
   ASSERT(height() >= count);
   int num_virtual_elements = (element_count() - 1) - stack_pointer_;
 
diff --git a/src/jump-target.cc b/src/jump-target.cc
index 8168dd0..3782f92 100644
--- a/src/jump-target.cc
+++ b/src/jump-target.cc
@@ -323,25 +323,6 @@
 }
 
 
-void BreakTarget::Jump() {
-  ASSERT(cgen()->has_valid_frame());
-
-  // Drop leftover statement state from the frame before merging.
-  cgen()->frame()->ForgetElements(cgen()->frame()->height() - expected_height_);
-  DoJump();
-}
-
-
-void BreakTarget::Jump(Result* arg) {
-  ASSERT(cgen()->has_valid_frame());
-
-  // Drop leftover statement state from the frame before merging.
-  cgen()->frame()->ForgetElements(cgen()->frame()->height() - expected_height_);
-  cgen()->frame()->Push(arg);
-  DoJump();
-}
-
-
 void BreakTarget::Branch(Condition cc, Hint hint) {
   ASSERT(cgen()->has_valid_frame());
 
@@ -362,48 +343,6 @@
 }
 
 
-void BreakTarget::Bind() {
-#ifdef DEBUG
-  // All the forward-reaching frames should have been adjusted at the
-  // jumps to this target.
-  for (int i = 0; i < reaching_frames_.length(); i++) {
-    ASSERT(reaching_frames_[i] == NULL ||
-           reaching_frames_[i]->height() == expected_height_);
-  }
-#endif
-  // Drop leftover statement state from the frame before merging, even
-  // on the fall through.  This is so we can bind the return target
-  // with state on the frame.
-  if (cgen()->has_valid_frame()) {
-    int count = cgen()->frame()->height() - expected_height_;
-    cgen()->frame()->ForgetElements(count);
-  }
-  DoBind();
-}
-
-
-void BreakTarget::Bind(Result* arg) {
-#ifdef DEBUG
-  // All the forward-reaching frames should have been adjusted at the
-  // jumps to this target.
-  for (int i = 0; i < reaching_frames_.length(); i++) {
-    ASSERT(reaching_frames_[i] == NULL ||
-           reaching_frames_[i]->height() == expected_height_ + 1);
-  }
-#endif
-  // Drop leftover statement state from the frame before merging, even
-  // on the fall through.  This is so we can bind the return target
-  // with state on the frame.
-  if (cgen()->has_valid_frame()) {
-    int count = cgen()->frame()->height() - expected_height_;
-    cgen()->frame()->ForgetElements(count);
-    cgen()->frame()->Push(arg);
-  }
-  DoBind();
-  *arg = cgen()->frame()->Pop();
-}
-
-
 // -------------------------------------------------------------------------
 // ShadowTarget implementation.
 
diff --git a/src/log.cc b/src/log.cc
index 44e5c32..5680820 100644
--- a/src/log.cc
+++ b/src/log.cc
@@ -1151,8 +1151,8 @@
               break;
             case 't': {
               // %t expands to the current time in milliseconds.
-              uint32_t time = static_cast<uint32_t>(OS::TimeCurrentMillis());
-              stream.Add("%u", time);
+              double time = OS::TimeCurrentMillis();
+              stream.Add("%.0f", FmtElm(time));
               break;
             }
             case '%':
diff --git a/src/objects.h b/src/objects.h
index 78e6ef6..03f0f3d 100644
--- a/src/objects.h
+++ b/src/objects.h
@@ -4038,8 +4038,8 @@
   // Layout description
 #if V8_HOST_ARCH_64_BIT
   // Optimizations expect buffer to be located at same offset as a ConsString's
-  // first substring. In 64 bit mode we have room for the size before the
-  // buffer.
+  // first substring. In 64 bit mode we have room for the start offset before
+  // the buffer.
   static const int kStartOffset = String::kSize;
   static const int kBufferOffset = kStartOffset + kIntSize;
   static const int kSize = kBufferOffset + kPointerSize;
diff --git a/src/platform-macos.cc b/src/platform-macos.cc
index 445f588..c081064 100644
--- a/src/platform-macos.cc
+++ b/src/platform-macos.cc
@@ -515,35 +515,31 @@
         thread_state_flavor_t flavor = x86_THREAD_STATE64;
         x86_thread_state64_t state;
         mach_msg_type_number_t count = x86_THREAD_STATE64_COUNT;
+#if __DARWIN_UNIX03
+#define REGISTER_FIELD(name) __r ## name
+#else
+#define REGISTER_FIELD(name) r ## name
+#endif  // __DARWIN_UNIX03
 #elif V8_HOST_ARCH_IA32
         thread_state_flavor_t flavor = i386_THREAD_STATE;
         i386_thread_state_t state;
         mach_msg_type_number_t count = i386_THREAD_STATE_COUNT;
+#if __DARWIN_UNIX03
+#define REGISTER_FIELD(name) __e ## name
+#else
+#define REGISTER_FIELD(name) e ## name
+#endif  // __DARWIN_UNIX03
 #else
 #error Unsupported Mac OS X host architecture.
-#endif  // V8_TARGET_ARCH_IA32
+#endif  // V8_HOST_ARCH
+
         if (thread_get_state(profiled_thread_,
                              flavor,
                              reinterpret_cast<natural_t*>(&state),
                              &count) == KERN_SUCCESS) {
-#if V8_HOST_ARCH_X64
-          UNIMPLEMENTED();
-          sample.pc = 0;
-          sample.sp = 0;
-          sample.fp = 0;
-#elif V8_HOST_ARCH_IA32
-#if __DARWIN_UNIX03
-          sample.pc = state.__eip;
-          sample.sp = state.__esp;
-          sample.fp = state.__ebp;
-#else  // !__DARWIN_UNIX03
-          sample.pc = state.eip;
-          sample.sp = state.esp;
-          sample.fp = state.ebp;
-#endif  // __DARWIN_UNIX03
-#else
-#error Unsupported Mac OS X host architecture.
-#endif  // V8_HOST_ARCH_IA32
+          sample.pc = state.REGISTER_FIELD(ip);
+          sample.sp = state.REGISTER_FIELD(sp);
+          sample.fp = state.REGISTER_FIELD(bp);
           sampler_->SampleStack(&sample);
         }
         thread_resume(profiled_thread_);
@@ -560,6 +556,8 @@
   }
 };
 
+#undef REGISTER_FIELD
+
 
 // Entry point for sampler thread.
 static void* SamplerEntry(void* arg) {
diff --git a/src/spaces-inl.h b/src/spaces-inl.h
index 0b4315c..da72497 100644
--- a/src/spaces-inl.h
+++ b/src/spaces-inl.h
@@ -127,20 +127,19 @@
 
   if (rset_address >= page->RSetEnd()) {
     // We have a large object page, and the remembered set address is actually
-    // past the end of the object.  The address of the remembered set in this
-    // case is the extra remembered set start address at the address of the
-    // end of the object:
+    // past the end of the object.
+
+    // The first part of the remembered set is still located at the start of
+    // the page, but anything after kRSetEndOffset must be relocated to after
+    // the large object, i.e. after
     //   (page->ObjectAreaStart() + object size)
-    // plus the offset of the computed remembered set address from the start
-    // of the object:
-    //   (rset_address - page->ObjectAreaStart()).
-    // Ie, we can just add the object size.
-    // In the X64 architecture, the remembered set ends before the object start,
-    // so we need to add an additional offset, from rset end to object start
+    // We do that by adding the difference between the normal RSet's end and
+    // the object's end.
     ASSERT(HeapObject::FromAddress(address)->IsFixedArray());
-    rset_address += kObjectStartOffset - kRSetEndOffset +
+    int fixedarray_length =
         FixedArray::SizeFor(Memory::int_at(page->ObjectAreaStart()
                                            + Array::kLengthOffset));
+    rset_address += kObjectStartOffset - kRSetEndOffset + fixedarray_length;
   }
   return rset_address;
 }
diff --git a/src/spaces.cc b/src/spaces.cc
index 4f8119f..6283bba 100644
--- a/src/spaces.cc
+++ b/src/spaces.cc
@@ -963,13 +963,13 @@
 }
 
 
-bool NewSpace::Double() {
-  ASSERT(capacity_ <= maximum_capacity_ / 2);
+bool NewSpace::Grow() {
+  ASSERT(capacity_ < maximum_capacity_);
   // TODO(1240712): Failure to double the from space can result in
   // semispaces of different sizes.  In the event of that failure, the
   // to space doubling should be rolled back before returning false.
-  if (!to_space_.Double() || !from_space_.Double()) return false;
-  capacity_ *= 2;
+  if (!to_space_.Grow() || !from_space_.Grow()) return false;
+  capacity_ = to_space_.Capacity() + from_space_.Capacity();
   allocation_info_.limit = to_space_.high();
   ASSERT_SEMISPACE_ALLOCATION_INFO(allocation_info_, to_space_);
   return true;
@@ -1074,11 +1074,16 @@
 }
 
 
-bool SemiSpace::Double() {
-  if (!MemoryAllocator::CommitBlock(high(), capacity_, executable())) {
+bool SemiSpace::Grow() {
+  // Commit 50% extra space but only up to maximum capacity.
+  int extra = capacity_/2;
+  if (capacity_ + extra > maximum_capacity_) {
+    extra = maximum_capacity_ - capacity_;
+  }
+  if (!MemoryAllocator::CommitBlock(high(), extra, executable())) {
     return false;
   }
-  capacity_ *= 2;
+  capacity_ += extra;
   return true;
 }
 
diff --git a/src/spaces.h b/src/spaces.h
index 57e7c1f..4df3388 100644
--- a/src/spaces.h
+++ b/src/spaces.h
@@ -997,11 +997,11 @@
   // True if the space has been set up but not torn down.
   bool HasBeenSetup() { return start_ != NULL; }
 
-  // Double the size of the semispace by committing extra virtual memory.
+  // Grow the size of the semispace by committing extra virtual memory.
   // Assumes that the caller has checked that the semispace has not reached
   // its maximum capacity (and thus there is space available in the reserved
   // address range to grow).
-  bool Double();
+  bool Grow();
 
   // Returns the start address of the space.
   Address low() { return start_; }
@@ -1040,6 +1040,9 @@
   virtual void Verify();
 #endif
 
+  // Returns the current capacity of the semi space.
+  int Capacity() { return capacity_; }
+
  private:
   // The current and maximum capacity of the space.
   int capacity_;
@@ -1131,9 +1134,9 @@
   // Flip the pair of spaces.
   void Flip();
 
-  // Doubles the capacity of the semispaces.  Assumes that they are not at
+  // Grow the capacity of the semispaces.  Assumes that they are not at
   // their maximum capacity.  Returns a flag indicating success or failure.
-  bool Double();
+  bool Grow();
 
   // True if the address or object lies in the address range of either
   // semispace (not necessarily below the allocation pointer).
diff --git a/src/third_party/valgrind/valgrind.h b/src/third_party/valgrind/valgrind.h
new file mode 100644
index 0000000..47f369b
--- /dev/null
+++ b/src/third_party/valgrind/valgrind.h
@@ -0,0 +1,3924 @@
+/* -*- c -*-
+   ----------------------------------------------------------------
+
+   Notice that the following BSD-style license applies to this one
+   file (valgrind.h) only.  The rest of Valgrind is licensed under the
+   terms of the GNU General Public License, version 2, unless
+   otherwise indicated.  See the COPYING file in the source
+   distribution for details.
+
+   ----------------------------------------------------------------
+
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2000-2007 Julian Seward.  All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   1. Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+   2. The origin of this software must not be misrepresented; you must 
+      not claim that you wrote the original software.  If you use this 
+      software in a product, an acknowledgment in the product 
+      documentation would be appreciated but is not required.
+
+   3. Altered source versions must be plainly marked as such, and must
+      not be misrepresented as being the original software.
+
+   4. The name of the author may not be used to endorse or promote 
+      products derived from this software without specific prior written 
+      permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+   OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+   WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+   GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   ----------------------------------------------------------------
+
+   Notice that the above BSD-style license applies to this one file
+   (valgrind.h) only.  The entire rest of Valgrind is licensed under
+   the terms of the GNU General Public License, version 2.  See the
+   COPYING file in the source distribution for details.
+
+   ---------------------------------------------------------------- 
+*/
+
+
+/* This file is for inclusion into client (your!) code.
+
+   You can use these macros to manipulate and query Valgrind's 
+   execution inside your own programs.
+
+   The resulting executables will still run without Valgrind, just a
+   little bit more slowly than they otherwise would, but otherwise
+   unchanged.  When not running on valgrind, each client request
+   consumes very few (eg. 7) instructions, so the resulting performance
+   loss is negligible unless you plan to execute client requests
+   millions of times per second.  Nevertheless, if that is still a
+   problem, you can compile with the NVALGRIND symbol defined (gcc
+   -DNVALGRIND) so that client requests are not even compiled in.  */
+
+#ifndef __VALGRIND_H
+#define __VALGRIND_H
+
+#include <stdarg.h>
+
+/* Nb: this file might be included in a file compiled with -ansi.  So
+   we can't use C++ style "//" comments nor the "asm" keyword (instead
+   use "__asm__"). */
+
+/* Derive some tags indicating what the target platform is.  Note
+   that in this file we're using the compiler's CPP symbols for
+   identifying architectures, which are different to the ones we use
+   within the rest of Valgrind.  Note, __powerpc__ is active for both
+   32 and 64-bit PPC, whereas __powerpc64__ is only active for the
+   latter (on Linux, that is). */
+#undef PLAT_x86_linux
+#undef PLAT_amd64_linux
+#undef PLAT_ppc32_linux
+#undef PLAT_ppc64_linux
+#undef PLAT_ppc32_aix5
+#undef PLAT_ppc64_aix5
+
+#if !defined(_AIX) && defined(__i386__)
+#  define PLAT_x86_linux 1
+#elif !defined(_AIX) && defined(__x86_64__)
+#  define PLAT_amd64_linux 1
+#elif !defined(_AIX) && defined(__powerpc__) && !defined(__powerpc64__)
+#  define PLAT_ppc32_linux 1
+#elif !defined(_AIX) && defined(__powerpc__) && defined(__powerpc64__)
+#  define PLAT_ppc64_linux 1
+#elif defined(_AIX) && defined(__64BIT__)
+#  define PLAT_ppc64_aix5 1
+#elif defined(_AIX) && !defined(__64BIT__)
+#  define PLAT_ppc32_aix5 1
+#endif
+
+
+/* If we're not compiling for our target platform, don't generate
+   any inline asms.  */
+#if !defined(PLAT_x86_linux) && !defined(PLAT_amd64_linux) \
+    && !defined(PLAT_ppc32_linux) && !defined(PLAT_ppc64_linux) \
+    && !defined(PLAT_ppc32_aix5) && !defined(PLAT_ppc64_aix5)
+#  if !defined(NVALGRIND)
+#    define NVALGRIND 1
+#  endif
+#endif
+
+
+/* ------------------------------------------------------------------ */
+/* ARCHITECTURE SPECIFICS for SPECIAL INSTRUCTIONS.  There is nothing */
+/* in here of use to end-users -- skip to the next section.           */
+/* ------------------------------------------------------------------ */
+
+#if defined(NVALGRIND)
+
+/* Define NVALGRIND to completely remove the Valgrind magic sequence
+   from the compiled code (analogous to NDEBUG's effects on
+   assert()) */
+#define VALGRIND_DO_CLIENT_REQUEST(                               \
+        _zzq_rlval, _zzq_default, _zzq_request,                   \
+        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
+   {                                                              \
+      (_zzq_rlval) = (_zzq_default);                              \
+   }
+
+#else  /* ! NVALGRIND */
+
+/* The following defines the magic code sequences which the JITter
+   spots and handles magically.  Don't look too closely at them as
+   they will rot your brain.
+
+   The assembly code sequences for all architectures is in this one
+   file.  This is because this file must be stand-alone, and we don't
+   want to have multiple files.
+
+   For VALGRIND_DO_CLIENT_REQUEST, we must ensure that the default
+   value gets put in the return slot, so that everything works when
+   this is executed not under Valgrind.  Args are passed in a memory
+   block, and so there's no intrinsic limit to the number that could
+   be passed, but it's currently five.
+   
+   The macro args are: 
+      _zzq_rlval    result lvalue
+      _zzq_default  default value (result returned when running on real CPU)
+      _zzq_request  request code
+      _zzq_arg1..5  request params
+
+   The other two macros are used to support function wrapping, and are
+   a lot simpler.  VALGRIND_GET_NR_CONTEXT returns the value of the
+   guest's NRADDR pseudo-register and whatever other information is
+   needed to safely run the call original from the wrapper: on
+   ppc64-linux, the R2 value at the divert point is also needed.  This
+   information is abstracted into a user-visible type, OrigFn.
+
+   VALGRIND_CALL_NOREDIR_* behaves the same as the following on the
+   guest, but guarantees that the branch instruction will not be
+   redirected: x86: call *%eax, amd64: call *%rax, ppc32/ppc64:
+   branch-and-link-to-r11.  VALGRIND_CALL_NOREDIR is just text, not a
+   complete inline asm, since it needs to be combined with more magic
+   inline asm stuff to be useful.
+*/
+
+/* ------------------------- x86-linux ------------------------- */
+
+#if defined(PLAT_x86_linux)
+
+typedef
+   struct { 
+      unsigned int nraddr; /* where's the code? */
+   }
+   OrigFn;
+
+#define __SPECIAL_INSTRUCTION_PREAMBLE                            \
+                     "roll $3,  %%edi ; roll $13, %%edi\n\t"      \
+                     "roll $29, %%edi ; roll $19, %%edi\n\t"
+
+#define VALGRIND_DO_CLIENT_REQUEST(                               \
+        _zzq_rlval, _zzq_default, _zzq_request,                   \
+        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
+  { volatile unsigned int _zzq_args[6];                           \
+    volatile unsigned int _zzq_result;                            \
+    _zzq_args[0] = (unsigned int)(_zzq_request);                  \
+    _zzq_args[1] = (unsigned int)(_zzq_arg1);                     \
+    _zzq_args[2] = (unsigned int)(_zzq_arg2);                     \
+    _zzq_args[3] = (unsigned int)(_zzq_arg3);                     \
+    _zzq_args[4] = (unsigned int)(_zzq_arg4);                     \
+    _zzq_args[5] = (unsigned int)(_zzq_arg5);                     \
+    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* %EDX = client_request ( %EAX ) */         \
+                     "xchgl %%ebx,%%ebx"                          \
+                     : "=d" (_zzq_result)                         \
+                     : "a" (&_zzq_args[0]), "0" (_zzq_default)    \
+                     : "cc", "memory"                             \
+                    );                                            \
+    _zzq_rlval = _zzq_result;                                     \
+  }
+
+#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
+  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
+    volatile unsigned int __addr;                                 \
+    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* %EAX = guest_NRADDR */                    \
+                     "xchgl %%ecx,%%ecx"                          \
+                     : "=a" (__addr)                              \
+                     :                                            \
+                     : "cc", "memory"                             \
+                    );                                            \
+    _zzq_orig->nraddr = __addr;                                   \
+  }
+
+#define VALGRIND_CALL_NOREDIR_EAX                                 \
+                     __SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* call-noredir *%EAX */                     \
+                     "xchgl %%edx,%%edx\n\t"
+#endif /* PLAT_x86_linux */
+
+/* ------------------------ amd64-linux ------------------------ */
+
+#if defined(PLAT_amd64_linux)
+
+typedef
+   struct { 
+      unsigned long long int nraddr; /* where's the code? */
+   }
+   OrigFn;
+
+#define __SPECIAL_INSTRUCTION_PREAMBLE                            \
+                     "rolq $3,  %%rdi ; rolq $13, %%rdi\n\t"      \
+                     "rolq $61, %%rdi ; rolq $51, %%rdi\n\t"
+
+#define VALGRIND_DO_CLIENT_REQUEST(                               \
+        _zzq_rlval, _zzq_default, _zzq_request,                   \
+        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
+  { volatile unsigned long long int _zzq_args[6];                 \
+    volatile unsigned long long int _zzq_result;                  \
+    _zzq_args[0] = (unsigned long long int)(_zzq_request);        \
+    _zzq_args[1] = (unsigned long long int)(_zzq_arg1);           \
+    _zzq_args[2] = (unsigned long long int)(_zzq_arg2);           \
+    _zzq_args[3] = (unsigned long long int)(_zzq_arg3);           \
+    _zzq_args[4] = (unsigned long long int)(_zzq_arg4);           \
+    _zzq_args[5] = (unsigned long long int)(_zzq_arg5);           \
+    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* %RDX = client_request ( %RAX ) */         \
+                     "xchgq %%rbx,%%rbx"                          \
+                     : "=d" (_zzq_result)                         \
+                     : "a" (&_zzq_args[0]), "0" (_zzq_default)    \
+                     : "cc", "memory"                             \
+                    );                                            \
+    _zzq_rlval = _zzq_result;                                     \
+  }
+
+#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
+  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
+    volatile unsigned long long int __addr;                       \
+    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* %RAX = guest_NRADDR */                    \
+                     "xchgq %%rcx,%%rcx"                          \
+                     : "=a" (__addr)                              \
+                     :                                            \
+                     : "cc", "memory"                             \
+                    );                                            \
+    _zzq_orig->nraddr = __addr;                                   \
+  }
+
+#define VALGRIND_CALL_NOREDIR_RAX                                 \
+                     __SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* call-noredir *%RAX */                     \
+                     "xchgq %%rdx,%%rdx\n\t"
+#endif /* PLAT_amd64_linux */
+
+/* ------------------------ ppc32-linux ------------------------ */
+
+#if defined(PLAT_ppc32_linux)
+
+typedef
+   struct { 
+      unsigned int nraddr; /* where's the code? */
+   }
+   OrigFn;
+
+#define __SPECIAL_INSTRUCTION_PREAMBLE                            \
+                     "rlwinm 0,0,3,0,0  ; rlwinm 0,0,13,0,0\n\t"  \
+                     "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t"
+
+#define VALGRIND_DO_CLIENT_REQUEST(                               \
+        _zzq_rlval, _zzq_default, _zzq_request,                   \
+        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
+                                                                  \
+  {          unsigned int  _zzq_args[6];                          \
+             unsigned int  _zzq_result;                           \
+             unsigned int* _zzq_ptr;                              \
+    _zzq_args[0] = (unsigned int)(_zzq_request);                  \
+    _zzq_args[1] = (unsigned int)(_zzq_arg1);                     \
+    _zzq_args[2] = (unsigned int)(_zzq_arg2);                     \
+    _zzq_args[3] = (unsigned int)(_zzq_arg3);                     \
+    _zzq_args[4] = (unsigned int)(_zzq_arg4);                     \
+    _zzq_args[5] = (unsigned int)(_zzq_arg5);                     \
+    _zzq_ptr = _zzq_args;                                         \
+    __asm__ volatile("mr 3,%1\n\t" /*default*/                    \
+                     "mr 4,%2\n\t" /*ptr*/                        \
+                     __SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* %R3 = client_request ( %R4 ) */           \
+                     "or 1,1,1\n\t"                               \
+                     "mr %0,3"     /*result*/                     \
+                     : "=b" (_zzq_result)                         \
+                     : "b" (_zzq_default), "b" (_zzq_ptr)         \
+                     : "cc", "memory", "r3", "r4");               \
+    _zzq_rlval = _zzq_result;                                     \
+  }
+
+#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
+  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
+    unsigned int __addr;                                          \
+    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* %R3 = guest_NRADDR */                     \
+                     "or 2,2,2\n\t"                               \
+                     "mr %0,3"                                    \
+                     : "=b" (__addr)                              \
+                     :                                            \
+                     : "cc", "memory", "r3"                       \
+                    );                                            \
+    _zzq_orig->nraddr = __addr;                                   \
+  }
+
+#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                   \
+                     __SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* branch-and-link-to-noredir *%R11 */       \
+                     "or 3,3,3\n\t"
+#endif /* PLAT_ppc32_linux */
+
+/* ------------------------ ppc64-linux ------------------------ */
+
+#if defined(PLAT_ppc64_linux)
+
+typedef
+   struct { 
+      unsigned long long int nraddr; /* where's the code? */
+      unsigned long long int r2;  /* what tocptr do we need? */
+   }
+   OrigFn;
+
+#define __SPECIAL_INSTRUCTION_PREAMBLE                            \
+                     "rotldi 0,0,3  ; rotldi 0,0,13\n\t"          \
+                     "rotldi 0,0,61 ; rotldi 0,0,51\n\t"
+
+#define VALGRIND_DO_CLIENT_REQUEST(                               \
+        _zzq_rlval, _zzq_default, _zzq_request,                   \
+        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
+                                                                  \
+  {          unsigned long long int  _zzq_args[6];                \
+    register unsigned long long int  _zzq_result __asm__("r3");   \
+    register unsigned long long int* _zzq_ptr __asm__("r4");      \
+    _zzq_args[0] = (unsigned long long int)(_zzq_request);        \
+    _zzq_args[1] = (unsigned long long int)(_zzq_arg1);           \
+    _zzq_args[2] = (unsigned long long int)(_zzq_arg2);           \
+    _zzq_args[3] = (unsigned long long int)(_zzq_arg3);           \
+    _zzq_args[4] = (unsigned long long int)(_zzq_arg4);           \
+    _zzq_args[5] = (unsigned long long int)(_zzq_arg5);           \
+    _zzq_ptr = _zzq_args;                                         \
+    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* %R3 = client_request ( %R4 ) */           \
+                     "or 1,1,1"                                   \
+                     : "=r" (_zzq_result)                         \
+                     : "0" (_zzq_default), "r" (_zzq_ptr)         \
+                     : "cc", "memory");                           \
+    _zzq_rlval = _zzq_result;                                     \
+  }
+
+#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
+  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
+    register unsigned long long int __addr __asm__("r3");         \
+    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* %R3 = guest_NRADDR */                     \
+                     "or 2,2,2"                                   \
+                     : "=r" (__addr)                              \
+                     :                                            \
+                     : "cc", "memory"                             \
+                    );                                            \
+    _zzq_orig->nraddr = __addr;                                   \
+    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* %R3 = guest_NRADDR_GPR2 */                \
+                     "or 4,4,4"                                   \
+                     : "=r" (__addr)                              \
+                     :                                            \
+                     : "cc", "memory"                             \
+                    );                                            \
+    _zzq_orig->r2 = __addr;                                       \
+  }
+
+#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                   \
+                     __SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* branch-and-link-to-noredir *%R11 */       \
+                     "or 3,3,3\n\t"
+
+#endif /* PLAT_ppc64_linux */
+
+/* ------------------------ ppc32-aix5 ------------------------- */
+
+#if defined(PLAT_ppc32_aix5)
+
+typedef
+   struct { 
+      unsigned int nraddr; /* where's the code? */
+      unsigned int r2;  /* what tocptr do we need? */
+   }
+   OrigFn;
+
+#define __SPECIAL_INSTRUCTION_PREAMBLE                            \
+                     "rlwinm 0,0,3,0,0  ; rlwinm 0,0,13,0,0\n\t"  \
+                     "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t"
+
+#define VALGRIND_DO_CLIENT_REQUEST(                               \
+        _zzq_rlval, _zzq_default, _zzq_request,                   \
+        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
+                                                                  \
+  {          unsigned int  _zzq_args[7];                          \
+    register unsigned int  _zzq_result;                           \
+    register unsigned int* _zzq_ptr;                              \
+    _zzq_args[0] = (unsigned int)(_zzq_request);                  \
+    _zzq_args[1] = (unsigned int)(_zzq_arg1);                     \
+    _zzq_args[2] = (unsigned int)(_zzq_arg2);                     \
+    _zzq_args[3] = (unsigned int)(_zzq_arg3);                     \
+    _zzq_args[4] = (unsigned int)(_zzq_arg4);                     \
+    _zzq_args[5] = (unsigned int)(_zzq_arg5);                     \
+    _zzq_args[6] = (unsigned int)(_zzq_default);                  \
+    _zzq_ptr = _zzq_args;                                         \
+    __asm__ volatile("mr 4,%1\n\t"                                \
+                     "lwz 3, 24(4)\n\t"                           \
+                     __SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* %R3 = client_request ( %R4 ) */           \
+                     "or 1,1,1\n\t"                               \
+                     "mr %0,3"                                    \
+                     : "=b" (_zzq_result)                         \
+                     : "b" (_zzq_ptr)                             \
+                     : "r3", "r4", "cc", "memory");               \
+    _zzq_rlval = _zzq_result;                                     \
+  }
+
+#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
+  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
+    register unsigned int __addr;                                 \
+    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* %R3 = guest_NRADDR */                     \
+                     "or 2,2,2\n\t"                               \
+                     "mr %0,3"                                    \
+                     : "=b" (__addr)                              \
+                     :                                            \
+                     : "r3", "cc", "memory"                       \
+                    );                                            \
+    _zzq_orig->nraddr = __addr;                                   \
+    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* %R3 = guest_NRADDR_GPR2 */                \
+                     "or 4,4,4\n\t"                               \
+                     "mr %0,3"                                    \
+                     : "=b" (__addr)                              \
+                     :                                            \
+                     : "r3", "cc", "memory"                       \
+                    );                                            \
+    _zzq_orig->r2 = __addr;                                       \
+  }
+
+#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                   \
+                     __SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* branch-and-link-to-noredir *%R11 */       \
+                     "or 3,3,3\n\t"
+
+#endif /* PLAT_ppc32_aix5 */
+
+/* ------------------------ ppc64-aix5 ------------------------- */
+
+#if defined(PLAT_ppc64_aix5)
+
+typedef
+   struct { 
+      unsigned long long int nraddr; /* where's the code? */
+      unsigned long long int r2;  /* what tocptr do we need? */
+   }
+   OrigFn;
+
+#define __SPECIAL_INSTRUCTION_PREAMBLE                            \
+                     "rotldi 0,0,3  ; rotldi 0,0,13\n\t"          \
+                     "rotldi 0,0,61 ; rotldi 0,0,51\n\t"
+
+#define VALGRIND_DO_CLIENT_REQUEST(                               \
+        _zzq_rlval, _zzq_default, _zzq_request,                   \
+        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
+                                                                  \
+  {          unsigned long long int  _zzq_args[7];                \
+    register unsigned long long int  _zzq_result;                 \
+    register unsigned long long int* _zzq_ptr;                    \
+    _zzq_args[0] = (unsigned int long long)(_zzq_request);        \
+    _zzq_args[1] = (unsigned int long long)(_zzq_arg1);           \
+    _zzq_args[2] = (unsigned int long long)(_zzq_arg2);           \
+    _zzq_args[3] = (unsigned int long long)(_zzq_arg3);           \
+    _zzq_args[4] = (unsigned int long long)(_zzq_arg4);           \
+    _zzq_args[5] = (unsigned int long long)(_zzq_arg5);           \
+    _zzq_args[6] = (unsigned int long long)(_zzq_default);        \
+    _zzq_ptr = _zzq_args;                                         \
+    __asm__ volatile("mr 4,%1\n\t"                                \
+                     "ld 3, 48(4)\n\t"                            \
+                     __SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* %R3 = client_request ( %R4 ) */           \
+                     "or 1,1,1\n\t"                               \
+                     "mr %0,3"                                    \
+                     : "=b" (_zzq_result)                         \
+                     : "b" (_zzq_ptr)                             \
+                     : "r3", "r4", "cc", "memory");               \
+    _zzq_rlval = _zzq_result;                                     \
+  }
+
+#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
+  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
+    register unsigned long long int __addr;                       \
+    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* %R3 = guest_NRADDR */                     \
+                     "or 2,2,2\n\t"                               \
+                     "mr %0,3"                                    \
+                     : "=b" (__addr)                              \
+                     :                                            \
+                     : "r3", "cc", "memory"                       \
+                    );                                            \
+    _zzq_orig->nraddr = __addr;                                   \
+    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* %R3 = guest_NRADDR_GPR2 */                \
+                     "or 4,4,4\n\t"                               \
+                     "mr %0,3"                                    \
+                     : "=b" (__addr)                              \
+                     :                                            \
+                     : "r3", "cc", "memory"                       \
+                    );                                            \
+    _zzq_orig->r2 = __addr;                                       \
+  }
+
+#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                   \
+                     __SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* branch-and-link-to-noredir *%R11 */       \
+                     "or 3,3,3\n\t"
+
+#endif /* PLAT_ppc64_aix5 */
+
+/* Insert assembly code for other platforms here... */
+
+#endif /* NVALGRIND */
+
+
+/* ------------------------------------------------------------------ */
+/* PLATFORM SPECIFICS for FUNCTION WRAPPING.  This is all very        */
+/* ugly.  It's the least-worst tradeoff I can think of.               */
+/* ------------------------------------------------------------------ */
+
+/* This section defines magic (a.k.a appalling-hack) macros for doing
+   guaranteed-no-redirection macros, so as to get from function
+   wrappers to the functions they are wrapping.  The whole point is to
+   construct standard call sequences, but to do the call itself with a
+   special no-redirect call pseudo-instruction that the JIT
+   understands and handles specially.  This section is long and
+   repetitious, and I can't see a way to make it shorter.
+
+   The naming scheme is as follows:
+
+      CALL_FN_{W,v}_{v,W,WW,WWW,WWWW,5W,6W,7W,etc}
+
+   'W' stands for "word" and 'v' for "void".  Hence there are
+   different macros for calling arity 0, 1, 2, 3, 4, etc, functions,
+   and for each, the possibility of returning a word-typed result, or
+   no result.
+*/
+
+/* Use these to write the name of your wrapper.  NOTE: duplicates
+   VG_WRAP_FUNCTION_Z{U,Z} in pub_tool_redir.h. */
+
+#define I_WRAP_SONAME_FNNAME_ZU(soname,fnname)                    \
+   _vgwZU_##soname##_##fnname
+
+#define I_WRAP_SONAME_FNNAME_ZZ(soname,fnname)                    \
+   _vgwZZ_##soname##_##fnname
+
+/* Use this macro from within a wrapper function to collect the
+   context (address and possibly other info) of the original function.
+   Once you have that you can then use it in one of the CALL_FN_
+   macros.  The type of the argument _lval is OrigFn. */
+#define VALGRIND_GET_ORIG_FN(_lval)  VALGRIND_GET_NR_CONTEXT(_lval)
+
+/* Derivatives of the main macros below, for calling functions
+   returning void. */
+
+#define CALL_FN_v_v(fnptr)                                        \
+   do { volatile unsigned long _junk;                             \
+        CALL_FN_W_v(_junk,fnptr); } while (0)
+
+#define CALL_FN_v_W(fnptr, arg1)                                  \
+   do { volatile unsigned long _junk;                             \
+        CALL_FN_W_W(_junk,fnptr,arg1); } while (0)
+
+#define CALL_FN_v_WW(fnptr, arg1,arg2)                            \
+   do { volatile unsigned long _junk;                             \
+        CALL_FN_W_WW(_junk,fnptr,arg1,arg2); } while (0)
+
+#define CALL_FN_v_WWW(fnptr, arg1,arg2,arg3)                      \
+   do { volatile unsigned long _junk;                             \
+        CALL_FN_W_WWW(_junk,fnptr,arg1,arg2,arg3); } while (0)
+
+/* ------------------------- x86-linux ------------------------- */
+
+#if defined(PLAT_x86_linux)
+
+/* These regs are trashed by the hidden call.  No need to mention eax
+   as gcc can already see that, plus causes gcc to bomb. */
+#define __CALLER_SAVED_REGS /*"eax"*/ "ecx", "edx"
+
+/* These CALL_FN_ macros assume that on x86-linux, sizeof(unsigned
+   long) == 4. */
+
+#define CALL_FN_W_v(lval, orig)                                   \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[1];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      __asm__ volatile(                                           \
+         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
+         VALGRIND_CALL_NOREDIR_EAX                                \
+         : /*out*/   "=a" (_res)                                  \
+         : /*in*/    "a" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_W(lval, orig, arg1)                             \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[2];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      __asm__ volatile(                                           \
+         "pushl 4(%%eax)\n\t"                                     \
+         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
+         VALGRIND_CALL_NOREDIR_EAX                                \
+         "addl $4, %%esp\n"                                       \
+         : /*out*/   "=a" (_res)                                  \
+         : /*in*/    "a" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      __asm__ volatile(                                           \
+         "pushl 8(%%eax)\n\t"                                     \
+         "pushl 4(%%eax)\n\t"                                     \
+         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
+         VALGRIND_CALL_NOREDIR_EAX                                \
+         "addl $8, %%esp\n"                                       \
+         : /*out*/   "=a" (_res)                                  \
+         : /*in*/    "a" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[4];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      __asm__ volatile(                                           \
+         "pushl 12(%%eax)\n\t"                                    \
+         "pushl 8(%%eax)\n\t"                                     \
+         "pushl 4(%%eax)\n\t"                                     \
+         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
+         VALGRIND_CALL_NOREDIR_EAX                                \
+         "addl $12, %%esp\n"                                      \
+         : /*out*/   "=a" (_res)                                  \
+         : /*in*/    "a" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[5];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      __asm__ volatile(                                           \
+         "pushl 16(%%eax)\n\t"                                    \
+         "pushl 12(%%eax)\n\t"                                    \
+         "pushl 8(%%eax)\n\t"                                     \
+         "pushl 4(%%eax)\n\t"                                     \
+         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
+         VALGRIND_CALL_NOREDIR_EAX                                \
+         "addl $16, %%esp\n"                                      \
+         : /*out*/   "=a" (_res)                                  \
+         : /*in*/    "a" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[6];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      __asm__ volatile(                                           \
+         "pushl 20(%%eax)\n\t"                                    \
+         "pushl 16(%%eax)\n\t"                                    \
+         "pushl 12(%%eax)\n\t"                                    \
+         "pushl 8(%%eax)\n\t"                                     \
+         "pushl 4(%%eax)\n\t"                                     \
+         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
+         VALGRIND_CALL_NOREDIR_EAX                                \
+         "addl $20, %%esp\n"                                      \
+         : /*out*/   "=a" (_res)                                  \
+         : /*in*/    "a" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[7];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      __asm__ volatile(                                           \
+         "pushl 24(%%eax)\n\t"                                    \
+         "pushl 20(%%eax)\n\t"                                    \
+         "pushl 16(%%eax)\n\t"                                    \
+         "pushl 12(%%eax)\n\t"                                    \
+         "pushl 8(%%eax)\n\t"                                     \
+         "pushl 4(%%eax)\n\t"                                     \
+         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
+         VALGRIND_CALL_NOREDIR_EAX                                \
+         "addl $24, %%esp\n"                                      \
+         : /*out*/   "=a" (_res)                                  \
+         : /*in*/    "a" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+                                 arg7)                            \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[8];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      _argvec[7] = (unsigned long)(arg7);                         \
+      __asm__ volatile(                                           \
+         "pushl 28(%%eax)\n\t"                                    \
+         "pushl 24(%%eax)\n\t"                                    \
+         "pushl 20(%%eax)\n\t"                                    \
+         "pushl 16(%%eax)\n\t"                                    \
+         "pushl 12(%%eax)\n\t"                                    \
+         "pushl 8(%%eax)\n\t"                                     \
+         "pushl 4(%%eax)\n\t"                                     \
+         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
+         VALGRIND_CALL_NOREDIR_EAX                                \
+         "addl $28, %%esp\n"                                      \
+         : /*out*/   "=a" (_res)                                  \
+         : /*in*/    "a" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+                                 arg7,arg8)                       \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[9];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      _argvec[7] = (unsigned long)(arg7);                         \
+      _argvec[8] = (unsigned long)(arg8);                         \
+      __asm__ volatile(                                           \
+         "pushl 32(%%eax)\n\t"                                    \
+         "pushl 28(%%eax)\n\t"                                    \
+         "pushl 24(%%eax)\n\t"                                    \
+         "pushl 20(%%eax)\n\t"                                    \
+         "pushl 16(%%eax)\n\t"                                    \
+         "pushl 12(%%eax)\n\t"                                    \
+         "pushl 8(%%eax)\n\t"                                     \
+         "pushl 4(%%eax)\n\t"                                     \
+         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
+         VALGRIND_CALL_NOREDIR_EAX                                \
+         "addl $32, %%esp\n"                                      \
+         : /*out*/   "=a" (_res)                                  \
+         : /*in*/    "a" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+                                 arg7,arg8,arg9)                  \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[10];                         \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      _argvec[7] = (unsigned long)(arg7);                         \
+      _argvec[8] = (unsigned long)(arg8);                         \
+      _argvec[9] = (unsigned long)(arg9);                         \
+      __asm__ volatile(                                           \
+         "pushl 36(%%eax)\n\t"                                    \
+         "pushl 32(%%eax)\n\t"                                    \
+         "pushl 28(%%eax)\n\t"                                    \
+         "pushl 24(%%eax)\n\t"                                    \
+         "pushl 20(%%eax)\n\t"                                    \
+         "pushl 16(%%eax)\n\t"                                    \
+         "pushl 12(%%eax)\n\t"                                    \
+         "pushl 8(%%eax)\n\t"                                     \
+         "pushl 4(%%eax)\n\t"                                     \
+         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
+         VALGRIND_CALL_NOREDIR_EAX                                \
+         "addl $36, %%esp\n"                                      \
+         : /*out*/   "=a" (_res)                                  \
+         : /*in*/    "a" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+                                  arg7,arg8,arg9,arg10)           \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[11];                         \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      _argvec[7] = (unsigned long)(arg7);                         \
+      _argvec[8] = (unsigned long)(arg8);                         \
+      _argvec[9] = (unsigned long)(arg9);                         \
+      _argvec[10] = (unsigned long)(arg10);                       \
+      __asm__ volatile(                                           \
+         "pushl 40(%%eax)\n\t"                                    \
+         "pushl 36(%%eax)\n\t"                                    \
+         "pushl 32(%%eax)\n\t"                                    \
+         "pushl 28(%%eax)\n\t"                                    \
+         "pushl 24(%%eax)\n\t"                                    \
+         "pushl 20(%%eax)\n\t"                                    \
+         "pushl 16(%%eax)\n\t"                                    \
+         "pushl 12(%%eax)\n\t"                                    \
+         "pushl 8(%%eax)\n\t"                                     \
+         "pushl 4(%%eax)\n\t"                                     \
+         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
+         VALGRIND_CALL_NOREDIR_EAX                                \
+         "addl $40, %%esp\n"                                      \
+         : /*out*/   "=a" (_res)                                  \
+         : /*in*/    "a" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,       \
+                                  arg6,arg7,arg8,arg9,arg10,      \
+                                  arg11)                          \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[12];                         \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      _argvec[7] = (unsigned long)(arg7);                         \
+      _argvec[8] = (unsigned long)(arg8);                         \
+      _argvec[9] = (unsigned long)(arg9);                         \
+      _argvec[10] = (unsigned long)(arg10);                       \
+      _argvec[11] = (unsigned long)(arg11);                       \
+      __asm__ volatile(                                           \
+         "pushl 44(%%eax)\n\t"                                    \
+         "pushl 40(%%eax)\n\t"                                    \
+         "pushl 36(%%eax)\n\t"                                    \
+         "pushl 32(%%eax)\n\t"                                    \
+         "pushl 28(%%eax)\n\t"                                    \
+         "pushl 24(%%eax)\n\t"                                    \
+         "pushl 20(%%eax)\n\t"                                    \
+         "pushl 16(%%eax)\n\t"                                    \
+         "pushl 12(%%eax)\n\t"                                    \
+         "pushl 8(%%eax)\n\t"                                     \
+         "pushl 4(%%eax)\n\t"                                     \
+         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
+         VALGRIND_CALL_NOREDIR_EAX                                \
+         "addl $44, %%esp\n"                                      \
+         : /*out*/   "=a" (_res)                                  \
+         : /*in*/    "a" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,       \
+                                  arg6,arg7,arg8,arg9,arg10,      \
+                                  arg11,arg12)                    \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[13];                         \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      _argvec[7] = (unsigned long)(arg7);                         \
+      _argvec[8] = (unsigned long)(arg8);                         \
+      _argvec[9] = (unsigned long)(arg9);                         \
+      _argvec[10] = (unsigned long)(arg10);                       \
+      _argvec[11] = (unsigned long)(arg11);                       \
+      _argvec[12] = (unsigned long)(arg12);                       \
+      __asm__ volatile(                                           \
+         "pushl 48(%%eax)\n\t"                                    \
+         "pushl 44(%%eax)\n\t"                                    \
+         "pushl 40(%%eax)\n\t"                                    \
+         "pushl 36(%%eax)\n\t"                                    \
+         "pushl 32(%%eax)\n\t"                                    \
+         "pushl 28(%%eax)\n\t"                                    \
+         "pushl 24(%%eax)\n\t"                                    \
+         "pushl 20(%%eax)\n\t"                                    \
+         "pushl 16(%%eax)\n\t"                                    \
+         "pushl 12(%%eax)\n\t"                                    \
+         "pushl 8(%%eax)\n\t"                                     \
+         "pushl 4(%%eax)\n\t"                                     \
+         "movl (%%eax), %%eax\n\t"  /* target->%eax */            \
+         VALGRIND_CALL_NOREDIR_EAX                                \
+         "addl $48, %%esp\n"                                      \
+         : /*out*/   "=a" (_res)                                  \
+         : /*in*/    "a" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#endif /* PLAT_x86_linux */
+
+/* ------------------------ amd64-linux ------------------------ */
+
+#if defined(PLAT_amd64_linux)
+
+/* ARGREGS: rdi rsi rdx rcx r8 r9 (the rest on stack in R-to-L order) */
+
+/* These regs are trashed by the hidden call. */
+#define __CALLER_SAVED_REGS /*"rax",*/ "rcx", "rdx", "rsi",       \
+                            "rdi", "r8", "r9", "r10", "r11"
+
+/* These CALL_FN_ macros assume that on amd64-linux, sizeof(unsigned
+   long) == 8. */
+
+/* NB 9 Sept 07.  There is a nasty kludge here in all these CALL_FN_
+   macros.  In order not to trash the stack redzone, we need to drop
+   %rsp by 128 before the hidden call, and restore afterwards.  The
+   nastyness is that it is only by luck that the stack still appears
+   to be unwindable during the hidden call - since then the behaviour
+   of any routine using this macro does not match what the CFI data
+   says.  Sigh.
+
+   Why is this important?  Imagine that a wrapper has a stack
+   allocated local, and passes to the hidden call, a pointer to it.
+   Because gcc does not know about the hidden call, it may allocate
+   that local in the redzone.  Unfortunately the hidden call may then
+   trash it before it comes to use it.  So we must step clear of the
+   redzone, for the duration of the hidden call, to make it safe.
+
+   Probably the same problem afflicts the other redzone-style ABIs too
+   (ppc64-linux, ppc32-aix5, ppc64-aix5); but for those, the stack is
+   self describing (none of this CFI nonsense) so at least messing
+   with the stack pointer doesn't give a danger of non-unwindable
+   stack. */
+
+#define CALL_FN_W_v(lval, orig)                                   \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[1];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      __asm__ volatile(                                           \
+         "subq $128,%%rsp\n\t"                                    \
+         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
+         VALGRIND_CALL_NOREDIR_RAX                                \
+         "addq $128,%%rsp\n\t"                                    \
+         : /*out*/   "=a" (_res)                                  \
+         : /*in*/    "a" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_W(lval, orig, arg1)                             \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[2];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      __asm__ volatile(                                           \
+         "subq $128,%%rsp\n\t"                                    \
+         "movq 8(%%rax), %%rdi\n\t"                               \
+         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
+         VALGRIND_CALL_NOREDIR_RAX                                \
+         "addq $128,%%rsp\n\t"                                    \
+         : /*out*/   "=a" (_res)                                  \
+         : /*in*/    "a" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      __asm__ volatile(                                           \
+         "subq $128,%%rsp\n\t"                                    \
+         "movq 16(%%rax), %%rsi\n\t"                              \
+         "movq 8(%%rax), %%rdi\n\t"                               \
+         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
+         VALGRIND_CALL_NOREDIR_RAX                                \
+         "addq $128,%%rsp\n\t"                                    \
+         : /*out*/   "=a" (_res)                                  \
+         : /*in*/    "a" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[4];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      __asm__ volatile(                                           \
+         "subq $128,%%rsp\n\t"                                    \
+         "movq 24(%%rax), %%rdx\n\t"                              \
+         "movq 16(%%rax), %%rsi\n\t"                              \
+         "movq 8(%%rax), %%rdi\n\t"                               \
+         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
+         VALGRIND_CALL_NOREDIR_RAX                                \
+         "addq $128,%%rsp\n\t"                                    \
+         : /*out*/   "=a" (_res)                                  \
+         : /*in*/    "a" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[5];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      __asm__ volatile(                                           \
+         "subq $128,%%rsp\n\t"                                    \
+         "movq 32(%%rax), %%rcx\n\t"                              \
+         "movq 24(%%rax), %%rdx\n\t"                              \
+         "movq 16(%%rax), %%rsi\n\t"                              \
+         "movq 8(%%rax), %%rdi\n\t"                               \
+         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
+         VALGRIND_CALL_NOREDIR_RAX                                \
+         "addq $128,%%rsp\n\t"                                    \
+         : /*out*/   "=a" (_res)                                  \
+         : /*in*/    "a" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[6];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      __asm__ volatile(                                           \
+         "subq $128,%%rsp\n\t"                                    \
+         "movq 40(%%rax), %%r8\n\t"                               \
+         "movq 32(%%rax), %%rcx\n\t"                              \
+         "movq 24(%%rax), %%rdx\n\t"                              \
+         "movq 16(%%rax), %%rsi\n\t"                              \
+         "movq 8(%%rax), %%rdi\n\t"                               \
+         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
+         VALGRIND_CALL_NOREDIR_RAX                                \
+         "addq $128,%%rsp\n\t"                                    \
+         : /*out*/   "=a" (_res)                                  \
+         : /*in*/    "a" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[7];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      __asm__ volatile(                                           \
+         "subq $128,%%rsp\n\t"                                    \
+         "movq 48(%%rax), %%r9\n\t"                               \
+         "movq 40(%%rax), %%r8\n\t"                               \
+         "movq 32(%%rax), %%rcx\n\t"                              \
+         "movq 24(%%rax), %%rdx\n\t"                              \
+         "movq 16(%%rax), %%rsi\n\t"                              \
+         "movq 8(%%rax), %%rdi\n\t"                               \
+         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
+         "addq $128,%%rsp\n\t"                                    \
+         VALGRIND_CALL_NOREDIR_RAX                                \
+         : /*out*/   "=a" (_res)                                  \
+         : /*in*/    "a" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+                                 arg7)                            \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[8];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      _argvec[7] = (unsigned long)(arg7);                         \
+      __asm__ volatile(                                           \
+         "subq $128,%%rsp\n\t"                                    \
+         "pushq 56(%%rax)\n\t"                                    \
+         "movq 48(%%rax), %%r9\n\t"                               \
+         "movq 40(%%rax), %%r8\n\t"                               \
+         "movq 32(%%rax), %%rcx\n\t"                              \
+         "movq 24(%%rax), %%rdx\n\t"                              \
+         "movq 16(%%rax), %%rsi\n\t"                              \
+         "movq 8(%%rax), %%rdi\n\t"                               \
+         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
+         VALGRIND_CALL_NOREDIR_RAX                                \
+         "addq $8, %%rsp\n"                                       \
+         "addq $128,%%rsp\n\t"                                    \
+         : /*out*/   "=a" (_res)                                  \
+         : /*in*/    "a" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+                                 arg7,arg8)                       \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[9];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      _argvec[7] = (unsigned long)(arg7);                         \
+      _argvec[8] = (unsigned long)(arg8);                         \
+      __asm__ volatile(                                           \
+         "subq $128,%%rsp\n\t"                                    \
+         "pushq 64(%%rax)\n\t"                                    \
+         "pushq 56(%%rax)\n\t"                                    \
+         "movq 48(%%rax), %%r9\n\t"                               \
+         "movq 40(%%rax), %%r8\n\t"                               \
+         "movq 32(%%rax), %%rcx\n\t"                              \
+         "movq 24(%%rax), %%rdx\n\t"                              \
+         "movq 16(%%rax), %%rsi\n\t"                              \
+         "movq 8(%%rax), %%rdi\n\t"                               \
+         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
+         VALGRIND_CALL_NOREDIR_RAX                                \
+         "addq $16, %%rsp\n"                                      \
+         "addq $128,%%rsp\n\t"                                    \
+         : /*out*/   "=a" (_res)                                  \
+         : /*in*/    "a" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+                                 arg7,arg8,arg9)                  \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[10];                         \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      _argvec[7] = (unsigned long)(arg7);                         \
+      _argvec[8] = (unsigned long)(arg8);                         \
+      _argvec[9] = (unsigned long)(arg9);                         \
+      __asm__ volatile(                                           \
+         "subq $128,%%rsp\n\t"                                    \
+         "pushq 72(%%rax)\n\t"                                    \
+         "pushq 64(%%rax)\n\t"                                    \
+         "pushq 56(%%rax)\n\t"                                    \
+         "movq 48(%%rax), %%r9\n\t"                               \
+         "movq 40(%%rax), %%r8\n\t"                               \
+         "movq 32(%%rax), %%rcx\n\t"                              \
+         "movq 24(%%rax), %%rdx\n\t"                              \
+         "movq 16(%%rax), %%rsi\n\t"                              \
+         "movq 8(%%rax), %%rdi\n\t"                               \
+         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
+         VALGRIND_CALL_NOREDIR_RAX                                \
+         "addq $24, %%rsp\n"                                      \
+         "addq $128,%%rsp\n\t"                                    \
+         : /*out*/   "=a" (_res)                                  \
+         : /*in*/    "a" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+                                  arg7,arg8,arg9,arg10)           \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[11];                         \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      _argvec[7] = (unsigned long)(arg7);                         \
+      _argvec[8] = (unsigned long)(arg8);                         \
+      _argvec[9] = (unsigned long)(arg9);                         \
+      _argvec[10] = (unsigned long)(arg10);                       \
+      __asm__ volatile(                                           \
+         "subq $128,%%rsp\n\t"                                    \
+         "pushq 80(%%rax)\n\t"                                    \
+         "pushq 72(%%rax)\n\t"                                    \
+         "pushq 64(%%rax)\n\t"                                    \
+         "pushq 56(%%rax)\n\t"                                    \
+         "movq 48(%%rax), %%r9\n\t"                               \
+         "movq 40(%%rax), %%r8\n\t"                               \
+         "movq 32(%%rax), %%rcx\n\t"                              \
+         "movq 24(%%rax), %%rdx\n\t"                              \
+         "movq 16(%%rax), %%rsi\n\t"                              \
+         "movq 8(%%rax), %%rdi\n\t"                               \
+         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
+         VALGRIND_CALL_NOREDIR_RAX                                \
+         "addq $32, %%rsp\n"                                      \
+         "addq $128,%%rsp\n\t"                                    \
+         : /*out*/   "=a" (_res)                                  \
+         : /*in*/    "a" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+                                  arg7,arg8,arg9,arg10,arg11)     \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[12];                         \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      _argvec[7] = (unsigned long)(arg7);                         \
+      _argvec[8] = (unsigned long)(arg8);                         \
+      _argvec[9] = (unsigned long)(arg9);                         \
+      _argvec[10] = (unsigned long)(arg10);                       \
+      _argvec[11] = (unsigned long)(arg11);                       \
+      __asm__ volatile(                                           \
+         "subq $128,%%rsp\n\t"                                    \
+         "pushq 88(%%rax)\n\t"                                    \
+         "pushq 80(%%rax)\n\t"                                    \
+         "pushq 72(%%rax)\n\t"                                    \
+         "pushq 64(%%rax)\n\t"                                    \
+         "pushq 56(%%rax)\n\t"                                    \
+         "movq 48(%%rax), %%r9\n\t"                               \
+         "movq 40(%%rax), %%r8\n\t"                               \
+         "movq 32(%%rax), %%rcx\n\t"                              \
+         "movq 24(%%rax), %%rdx\n\t"                              \
+         "movq 16(%%rax), %%rsi\n\t"                              \
+         "movq 8(%%rax), %%rdi\n\t"                               \
+         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
+         VALGRIND_CALL_NOREDIR_RAX                                \
+         "addq $40, %%rsp\n"                                      \
+         "addq $128,%%rsp\n\t"                                    \
+         : /*out*/   "=a" (_res)                                  \
+         : /*in*/    "a" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+                                arg7,arg8,arg9,arg10,arg11,arg12) \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[13];                         \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      _argvec[7] = (unsigned long)(arg7);                         \
+      _argvec[8] = (unsigned long)(arg8);                         \
+      _argvec[9] = (unsigned long)(arg9);                         \
+      _argvec[10] = (unsigned long)(arg10);                       \
+      _argvec[11] = (unsigned long)(arg11);                       \
+      _argvec[12] = (unsigned long)(arg12);                       \
+      __asm__ volatile(                                           \
+         "subq $128,%%rsp\n\t"                                    \
+         "pushq 96(%%rax)\n\t"                                    \
+         "pushq 88(%%rax)\n\t"                                    \
+         "pushq 80(%%rax)\n\t"                                    \
+         "pushq 72(%%rax)\n\t"                                    \
+         "pushq 64(%%rax)\n\t"                                    \
+         "pushq 56(%%rax)\n\t"                                    \
+         "movq 48(%%rax), %%r9\n\t"                               \
+         "movq 40(%%rax), %%r8\n\t"                               \
+         "movq 32(%%rax), %%rcx\n\t"                              \
+         "movq 24(%%rax), %%rdx\n\t"                              \
+         "movq 16(%%rax), %%rsi\n\t"                              \
+         "movq 8(%%rax), %%rdi\n\t"                               \
+         "movq (%%rax), %%rax\n\t"  /* target->%rax */            \
+         VALGRIND_CALL_NOREDIR_RAX                                \
+         "addq $48, %%rsp\n"                                      \
+         "addq $128,%%rsp\n\t"                                    \
+         : /*out*/   "=a" (_res)                                  \
+         : /*in*/    "a" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#endif /* PLAT_amd64_linux */
+
+/* ------------------------ ppc32-linux ------------------------ */
+
+#if defined(PLAT_ppc32_linux)
+
+/* This is useful for finding out about the on-stack stuff:
+
+   extern int f9  ( int,int,int,int,int,int,int,int,int );
+   extern int f10 ( int,int,int,int,int,int,int,int,int,int );
+   extern int f11 ( int,int,int,int,int,int,int,int,int,int,int );
+   extern int f12 ( int,int,int,int,int,int,int,int,int,int,int,int );
+
+   int g9 ( void ) {
+      return f9(11,22,33,44,55,66,77,88,99);
+   }
+   int g10 ( void ) {
+      return f10(11,22,33,44,55,66,77,88,99,110);
+   }
+   int g11 ( void ) {
+      return f11(11,22,33,44,55,66,77,88,99,110,121);
+   }
+   int g12 ( void ) {
+      return f12(11,22,33,44,55,66,77,88,99,110,121,132);
+   }
+*/
+
+/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
+
+/* These regs are trashed by the hidden call. */
+#define __CALLER_SAVED_REGS                                       \
+   "lr", "ctr", "xer",                                            \
+   "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7",        \
+   "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",   \
+   "r11", "r12", "r13"
+
+/* These CALL_FN_ macros assume that on ppc32-linux, 
+   sizeof(unsigned long) == 4. */
+
+#define CALL_FN_W_v(lval, orig)                                   \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[1];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr %0,3"                                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_W(lval, orig, arg1)                             \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[2];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)arg1;                           \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
+         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr %0,3"                                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)arg1;                           \
+      _argvec[2] = (unsigned long)arg2;                           \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
+         "lwz 4,8(11)\n\t"                                        \
+         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr %0,3"                                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[4];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)arg1;                           \
+      _argvec[2] = (unsigned long)arg2;                           \
+      _argvec[3] = (unsigned long)arg3;                           \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
+         "lwz 4,8(11)\n\t"                                        \
+         "lwz 5,12(11)\n\t"                                       \
+         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr %0,3"                                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[5];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)arg1;                           \
+      _argvec[2] = (unsigned long)arg2;                           \
+      _argvec[3] = (unsigned long)arg3;                           \
+      _argvec[4] = (unsigned long)arg4;                           \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
+         "lwz 4,8(11)\n\t"                                        \
+         "lwz 5,12(11)\n\t"                                       \
+         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
+         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr %0,3"                                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[6];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)arg1;                           \
+      _argvec[2] = (unsigned long)arg2;                           \
+      _argvec[3] = (unsigned long)arg3;                           \
+      _argvec[4] = (unsigned long)arg4;                           \
+      _argvec[5] = (unsigned long)arg5;                           \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
+         "lwz 4,8(11)\n\t"                                        \
+         "lwz 5,12(11)\n\t"                                       \
+         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
+         "lwz 7,20(11)\n\t"                                       \
+         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr %0,3"                                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[7];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)arg1;                           \
+      _argvec[2] = (unsigned long)arg2;                           \
+      _argvec[3] = (unsigned long)arg3;                           \
+      _argvec[4] = (unsigned long)arg4;                           \
+      _argvec[5] = (unsigned long)arg5;                           \
+      _argvec[6] = (unsigned long)arg6;                           \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
+         "lwz 4,8(11)\n\t"                                        \
+         "lwz 5,12(11)\n\t"                                       \
+         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
+         "lwz 7,20(11)\n\t"                                       \
+         "lwz 8,24(11)\n\t"                                       \
+         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr %0,3"                                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+                                 arg7)                            \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[8];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)arg1;                           \
+      _argvec[2] = (unsigned long)arg2;                           \
+      _argvec[3] = (unsigned long)arg3;                           \
+      _argvec[4] = (unsigned long)arg4;                           \
+      _argvec[5] = (unsigned long)arg5;                           \
+      _argvec[6] = (unsigned long)arg6;                           \
+      _argvec[7] = (unsigned long)arg7;                           \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
+         "lwz 4,8(11)\n\t"                                        \
+         "lwz 5,12(11)\n\t"                                       \
+         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
+         "lwz 7,20(11)\n\t"                                       \
+         "lwz 8,24(11)\n\t"                                       \
+         "lwz 9,28(11)\n\t"                                       \
+         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr %0,3"                                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+                                 arg7,arg8)                       \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[9];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)arg1;                           \
+      _argvec[2] = (unsigned long)arg2;                           \
+      _argvec[3] = (unsigned long)arg3;                           \
+      _argvec[4] = (unsigned long)arg4;                           \
+      _argvec[5] = (unsigned long)arg5;                           \
+      _argvec[6] = (unsigned long)arg6;                           \
+      _argvec[7] = (unsigned long)arg7;                           \
+      _argvec[8] = (unsigned long)arg8;                           \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
+         "lwz 4,8(11)\n\t"                                        \
+         "lwz 5,12(11)\n\t"                                       \
+         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
+         "lwz 7,20(11)\n\t"                                       \
+         "lwz 8,24(11)\n\t"                                       \
+         "lwz 9,28(11)\n\t"                                       \
+         "lwz 10,32(11)\n\t" /* arg8->r10 */                      \
+         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr %0,3"                                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+                                 arg7,arg8,arg9)                  \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[10];                         \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)arg1;                           \
+      _argvec[2] = (unsigned long)arg2;                           \
+      _argvec[3] = (unsigned long)arg3;                           \
+      _argvec[4] = (unsigned long)arg4;                           \
+      _argvec[5] = (unsigned long)arg5;                           \
+      _argvec[6] = (unsigned long)arg6;                           \
+      _argvec[7] = (unsigned long)arg7;                           \
+      _argvec[8] = (unsigned long)arg8;                           \
+      _argvec[9] = (unsigned long)arg9;                           \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         "addi 1,1,-16\n\t"                                       \
+         /* arg9 */                                               \
+         "lwz 3,36(11)\n\t"                                       \
+         "stw 3,8(1)\n\t"                                         \
+         /* args1-8 */                                            \
+         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
+         "lwz 4,8(11)\n\t"                                        \
+         "lwz 5,12(11)\n\t"                                       \
+         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
+         "lwz 7,20(11)\n\t"                                       \
+         "lwz 8,24(11)\n\t"                                       \
+         "lwz 9,28(11)\n\t"                                       \
+         "lwz 10,32(11)\n\t" /* arg8->r10 */                      \
+         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "addi 1,1,16\n\t"                                        \
+         "mr %0,3"                                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+                                  arg7,arg8,arg9,arg10)           \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[11];                         \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)arg1;                           \
+      _argvec[2] = (unsigned long)arg2;                           \
+      _argvec[3] = (unsigned long)arg3;                           \
+      _argvec[4] = (unsigned long)arg4;                           \
+      _argvec[5] = (unsigned long)arg5;                           \
+      _argvec[6] = (unsigned long)arg6;                           \
+      _argvec[7] = (unsigned long)arg7;                           \
+      _argvec[8] = (unsigned long)arg8;                           \
+      _argvec[9] = (unsigned long)arg9;                           \
+      _argvec[10] = (unsigned long)arg10;                         \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         "addi 1,1,-16\n\t"                                       \
+         /* arg10 */                                              \
+         "lwz 3,40(11)\n\t"                                       \
+         "stw 3,12(1)\n\t"                                        \
+         /* arg9 */                                               \
+         "lwz 3,36(11)\n\t"                                       \
+         "stw 3,8(1)\n\t"                                         \
+         /* args1-8 */                                            \
+         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
+         "lwz 4,8(11)\n\t"                                        \
+         "lwz 5,12(11)\n\t"                                       \
+         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
+         "lwz 7,20(11)\n\t"                                       \
+         "lwz 8,24(11)\n\t"                                       \
+         "lwz 9,28(11)\n\t"                                       \
+         "lwz 10,32(11)\n\t" /* arg8->r10 */                      \
+         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "addi 1,1,16\n\t"                                        \
+         "mr %0,3"                                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+                                  arg7,arg8,arg9,arg10,arg11)     \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[12];                         \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)arg1;                           \
+      _argvec[2] = (unsigned long)arg2;                           \
+      _argvec[3] = (unsigned long)arg3;                           \
+      _argvec[4] = (unsigned long)arg4;                           \
+      _argvec[5] = (unsigned long)arg5;                           \
+      _argvec[6] = (unsigned long)arg6;                           \
+      _argvec[7] = (unsigned long)arg7;                           \
+      _argvec[8] = (unsigned long)arg8;                           \
+      _argvec[9] = (unsigned long)arg9;                           \
+      _argvec[10] = (unsigned long)arg10;                         \
+      _argvec[11] = (unsigned long)arg11;                         \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         "addi 1,1,-32\n\t"                                       \
+         /* arg11 */                                              \
+         "lwz 3,44(11)\n\t"                                       \
+         "stw 3,16(1)\n\t"                                        \
+         /* arg10 */                                              \
+         "lwz 3,40(11)\n\t"                                       \
+         "stw 3,12(1)\n\t"                                        \
+         /* arg9 */                                               \
+         "lwz 3,36(11)\n\t"                                       \
+         "stw 3,8(1)\n\t"                                         \
+         /* args1-8 */                                            \
+         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
+         "lwz 4,8(11)\n\t"                                        \
+         "lwz 5,12(11)\n\t"                                       \
+         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
+         "lwz 7,20(11)\n\t"                                       \
+         "lwz 8,24(11)\n\t"                                       \
+         "lwz 9,28(11)\n\t"                                       \
+         "lwz 10,32(11)\n\t" /* arg8->r10 */                      \
+         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "addi 1,1,32\n\t"                                        \
+         "mr %0,3"                                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+                                arg7,arg8,arg9,arg10,arg11,arg12) \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[13];                         \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)arg1;                           \
+      _argvec[2] = (unsigned long)arg2;                           \
+      _argvec[3] = (unsigned long)arg3;                           \
+      _argvec[4] = (unsigned long)arg4;                           \
+      _argvec[5] = (unsigned long)arg5;                           \
+      _argvec[6] = (unsigned long)arg6;                           \
+      _argvec[7] = (unsigned long)arg7;                           \
+      _argvec[8] = (unsigned long)arg8;                           \
+      _argvec[9] = (unsigned long)arg9;                           \
+      _argvec[10] = (unsigned long)arg10;                         \
+      _argvec[11] = (unsigned long)arg11;                         \
+      _argvec[12] = (unsigned long)arg12;                         \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         "addi 1,1,-32\n\t"                                       \
+         /* arg12 */                                              \
+         "lwz 3,48(11)\n\t"                                       \
+         "stw 3,20(1)\n\t"                                        \
+         /* arg11 */                                              \
+         "lwz 3,44(11)\n\t"                                       \
+         "stw 3,16(1)\n\t"                                        \
+         /* arg10 */                                              \
+         "lwz 3,40(11)\n\t"                                       \
+         "stw 3,12(1)\n\t"                                        \
+         /* arg9 */                                               \
+         "lwz 3,36(11)\n\t"                                       \
+         "stw 3,8(1)\n\t"                                         \
+         /* args1-8 */                                            \
+         "lwz 3,4(11)\n\t"   /* arg1->r3 */                       \
+         "lwz 4,8(11)\n\t"                                        \
+         "lwz 5,12(11)\n\t"                                       \
+         "lwz 6,16(11)\n\t"  /* arg4->r6 */                       \
+         "lwz 7,20(11)\n\t"                                       \
+         "lwz 8,24(11)\n\t"                                       \
+         "lwz 9,28(11)\n\t"                                       \
+         "lwz 10,32(11)\n\t" /* arg8->r10 */                      \
+         "lwz 11,0(11)\n\t"  /* target->r11 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "addi 1,1,32\n\t"                                        \
+         "mr %0,3"                                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#endif /* PLAT_ppc32_linux */
+
+/* ------------------------ ppc64-linux ------------------------ */
+
+#if defined(PLAT_ppc64_linux)
+
+/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
+
+/* These regs are trashed by the hidden call. */
+#define __CALLER_SAVED_REGS                                       \
+   "lr", "ctr", "xer",                                            \
+   "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7",        \
+   "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",   \
+   "r11", "r12", "r13"
+
+/* These CALL_FN_ macros assume that on ppc64-linux, sizeof(unsigned
+   long) == 8. */
+
+#define CALL_FN_W_v(lval, orig)                                   \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+0];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1] = (unsigned long)_orig.r2;                       \
+      _argvec[2] = (unsigned long)_orig.nraddr;                   \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         "std 2,-16(11)\n\t"  /* save tocptr */                   \
+         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld 2,-16(11)" /* restore tocptr */                      \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_W(lval, orig, arg1)                             \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+1];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         "std 2,-16(11)\n\t"  /* save tocptr */                   \
+         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld 2,-16(11)" /* restore tocptr */                      \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+2];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         "std 2,-16(11)\n\t"  /* save tocptr */                   \
+         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld 2,-16(11)" /* restore tocptr */                      \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+3];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         "std 2,-16(11)\n\t"  /* save tocptr */                   \
+         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
+         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld 2,-16(11)" /* restore tocptr */                      \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+4];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         "std 2,-16(11)\n\t"  /* save tocptr */                   \
+         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
+         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
+         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld 2,-16(11)" /* restore tocptr */                      \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+5];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         "std 2,-16(11)\n\t"  /* save tocptr */                   \
+         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
+         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
+         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
+         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld 2,-16(11)" /* restore tocptr */                      \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+6];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      _argvec[2+6] = (unsigned long)arg6;                         \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         "std 2,-16(11)\n\t"  /* save tocptr */                   \
+         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
+         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
+         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
+         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
+         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld 2,-16(11)" /* restore tocptr */                      \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+                                 arg7)                            \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+7];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      _argvec[2+6] = (unsigned long)arg6;                         \
+      _argvec[2+7] = (unsigned long)arg7;                         \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         "std 2,-16(11)\n\t"  /* save tocptr */                   \
+         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
+         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
+         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
+         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
+         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
+         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld 2,-16(11)" /* restore tocptr */                      \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+                                 arg7,arg8)                       \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+8];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      _argvec[2+6] = (unsigned long)arg6;                         \
+      _argvec[2+7] = (unsigned long)arg7;                         \
+      _argvec[2+8] = (unsigned long)arg8;                         \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         "std 2,-16(11)\n\t"  /* save tocptr */                   \
+         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
+         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
+         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
+         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
+         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
+         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
+         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld 2,-16(11)" /* restore tocptr */                      \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+                                 arg7,arg8,arg9)                  \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+9];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      _argvec[2+6] = (unsigned long)arg6;                         \
+      _argvec[2+7] = (unsigned long)arg7;                         \
+      _argvec[2+8] = (unsigned long)arg8;                         \
+      _argvec[2+9] = (unsigned long)arg9;                         \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         "std 2,-16(11)\n\t"  /* save tocptr */                   \
+         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+         "addi 1,1,-128\n\t"  /* expand stack frame */            \
+         /* arg9 */                                               \
+         "ld  3,72(11)\n\t"                                       \
+         "std 3,112(1)\n\t"                                       \
+         /* args1-8 */                                            \
+         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
+         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
+         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
+         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
+         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
+         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
+         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
+         "addi 1,1,128"     /* restore frame */                   \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+                                  arg7,arg8,arg9,arg10)           \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+10];                       \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      _argvec[2+6] = (unsigned long)arg6;                         \
+      _argvec[2+7] = (unsigned long)arg7;                         \
+      _argvec[2+8] = (unsigned long)arg8;                         \
+      _argvec[2+9] = (unsigned long)arg9;                         \
+      _argvec[2+10] = (unsigned long)arg10;                       \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         "std 2,-16(11)\n\t"  /* save tocptr */                   \
+         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+         "addi 1,1,-128\n\t"  /* expand stack frame */            \
+         /* arg10 */                                              \
+         "ld  3,80(11)\n\t"                                       \
+         "std 3,120(1)\n\t"                                       \
+         /* arg9 */                                               \
+         "ld  3,72(11)\n\t"                                       \
+         "std 3,112(1)\n\t"                                       \
+         /* args1-8 */                                            \
+         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
+         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
+         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
+         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
+         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
+         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
+         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
+         "addi 1,1,128"     /* restore frame */                   \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+                                  arg7,arg8,arg9,arg10,arg11)     \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+11];                       \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      _argvec[2+6] = (unsigned long)arg6;                         \
+      _argvec[2+7] = (unsigned long)arg7;                         \
+      _argvec[2+8] = (unsigned long)arg8;                         \
+      _argvec[2+9] = (unsigned long)arg9;                         \
+      _argvec[2+10] = (unsigned long)arg10;                       \
+      _argvec[2+11] = (unsigned long)arg11;                       \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         "std 2,-16(11)\n\t"  /* save tocptr */                   \
+         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+         "addi 1,1,-144\n\t"  /* expand stack frame */            \
+         /* arg11 */                                              \
+         "ld  3,88(11)\n\t"                                       \
+         "std 3,128(1)\n\t"                                       \
+         /* arg10 */                                              \
+         "ld  3,80(11)\n\t"                                       \
+         "std 3,120(1)\n\t"                                       \
+         /* arg9 */                                               \
+         "ld  3,72(11)\n\t"                                       \
+         "std 3,112(1)\n\t"                                       \
+         /* args1-8 */                                            \
+         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
+         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
+         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
+         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
+         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
+         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
+         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
+         "addi 1,1,144"     /* restore frame */                   \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+                                arg7,arg8,arg9,arg10,arg11,arg12) \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+12];                       \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      _argvec[2+6] = (unsigned long)arg6;                         \
+      _argvec[2+7] = (unsigned long)arg7;                         \
+      _argvec[2+8] = (unsigned long)arg8;                         \
+      _argvec[2+9] = (unsigned long)arg9;                         \
+      _argvec[2+10] = (unsigned long)arg10;                       \
+      _argvec[2+11] = (unsigned long)arg11;                       \
+      _argvec[2+12] = (unsigned long)arg12;                       \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         "std 2,-16(11)\n\t"  /* save tocptr */                   \
+         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+         "addi 1,1,-144\n\t"  /* expand stack frame */            \
+         /* arg12 */                                              \
+         "ld  3,96(11)\n\t"                                       \
+         "std 3,136(1)\n\t"                                       \
+         /* arg11 */                                              \
+         "ld  3,88(11)\n\t"                                       \
+         "std 3,128(1)\n\t"                                       \
+         /* arg10 */                                              \
+         "ld  3,80(11)\n\t"                                       \
+         "std 3,120(1)\n\t"                                       \
+         /* arg9 */                                               \
+         "ld  3,72(11)\n\t"                                       \
+         "std 3,112(1)\n\t"                                       \
+         /* args1-8 */                                            \
+         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
+         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
+         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
+         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
+         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
+         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
+         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
+         "addi 1,1,144"     /* restore frame */                   \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#endif /* PLAT_ppc64_linux */
+
+/* ------------------------ ppc32-aix5 ------------------------- */
+
+#if defined(PLAT_ppc32_aix5)
+
+/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
+
+/* These regs are trashed by the hidden call. */
+#define __CALLER_SAVED_REGS                                       \
+   "lr", "ctr", "xer",                                            \
+   "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7",        \
+   "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",   \
+   "r11", "r12", "r13"
+
+/* Expand the stack frame, copying enough info that unwinding
+   still works.  Trashes r3. */
+
+#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr)                      \
+         "addi 1,1,-" #_n_fr "\n\t"                               \
+         "lwz  3," #_n_fr "(1)\n\t"                               \
+         "stw  3,0(1)\n\t"
+
+#define VG_CONTRACT_FRAME_BY(_n_fr)                               \
+         "addi 1,1," #_n_fr "\n\t"
+
+/* These CALL_FN_ macros assume that on ppc32-aix5, sizeof(unsigned
+   long) == 4. */
+
+#define CALL_FN_W_v(lval, orig)                                   \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+0];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1] = (unsigned long)_orig.r2;                       \
+      _argvec[2] = (unsigned long)_orig.nraddr;                   \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
+         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
+         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
+         VG_CONTRACT_FRAME_BY(512)                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_W(lval, orig, arg1)                             \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+1];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
+         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
+         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
+         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
+         VG_CONTRACT_FRAME_BY(512)                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+2];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
+         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
+         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
+         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
+         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
+         VG_CONTRACT_FRAME_BY(512)                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+3];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
+         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
+         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
+         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
+         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
+         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
+         VG_CONTRACT_FRAME_BY(512)                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+4];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
+         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
+         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
+         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
+         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
+         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
+         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
+         VG_CONTRACT_FRAME_BY(512)                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+5];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
+         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
+         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
+         "lwz  4, 8(11)\n\t" /* arg2->r4 */                       \
+         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
+         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
+         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
+         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
+         VG_CONTRACT_FRAME_BY(512)                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+6];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      _argvec[2+6] = (unsigned long)arg6;                         \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
+         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
+         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
+         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
+         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
+         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
+         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
+         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
+         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
+         VG_CONTRACT_FRAME_BY(512)                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+                                 arg7)                            \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+7];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      _argvec[2+6] = (unsigned long)arg6;                         \
+      _argvec[2+7] = (unsigned long)arg7;                         \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
+         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
+         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
+         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
+         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
+         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
+         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
+         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
+         "lwz  9, 28(11)\n\t" /* arg7->r9 */                      \
+         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
+         VG_CONTRACT_FRAME_BY(512)                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+                                 arg7,arg8)                       \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+8];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      _argvec[2+6] = (unsigned long)arg6;                         \
+      _argvec[2+7] = (unsigned long)arg7;                         \
+      _argvec[2+8] = (unsigned long)arg8;                         \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
+         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
+         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
+         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
+         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
+         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
+         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
+         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
+         "lwz  9, 28(11)\n\t" /* arg7->r9 */                      \
+         "lwz 10, 32(11)\n\t" /* arg8->r10 */                     \
+         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
+         VG_CONTRACT_FRAME_BY(512)                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+                                 arg7,arg8,arg9)                  \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+9];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      _argvec[2+6] = (unsigned long)arg6;                         \
+      _argvec[2+7] = (unsigned long)arg7;                         \
+      _argvec[2+8] = (unsigned long)arg8;                         \
+      _argvec[2+9] = (unsigned long)arg9;                         \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
+         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
+         VG_EXPAND_FRAME_BY_trashes_r3(64)                        \
+         /* arg9 */                                               \
+         "lwz 3,36(11)\n\t"                                       \
+         "stw 3,56(1)\n\t"                                        \
+         /* args1-8 */                                            \
+         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
+         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
+         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
+         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
+         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
+         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
+         "lwz  9, 28(11)\n\t" /* arg7->r9 */                      \
+         "lwz 10, 32(11)\n\t" /* arg8->r10 */                     \
+         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
+         VG_CONTRACT_FRAME_BY(64)                                 \
+         VG_CONTRACT_FRAME_BY(512)                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+                                  arg7,arg8,arg9,arg10)           \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+10];                       \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      _argvec[2+6] = (unsigned long)arg6;                         \
+      _argvec[2+7] = (unsigned long)arg7;                         \
+      _argvec[2+8] = (unsigned long)arg8;                         \
+      _argvec[2+9] = (unsigned long)arg9;                         \
+      _argvec[2+10] = (unsigned long)arg10;                       \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
+         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
+         VG_EXPAND_FRAME_BY_trashes_r3(64)                        \
+         /* arg10 */                                              \
+         "lwz 3,40(11)\n\t"                                       \
+         "stw 3,60(1)\n\t"                                        \
+         /* arg9 */                                               \
+         "lwz 3,36(11)\n\t"                                       \
+         "stw 3,56(1)\n\t"                                        \
+         /* args1-8 */                                            \
+         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
+         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
+         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
+         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
+         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
+         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
+         "lwz  9, 28(11)\n\t" /* arg7->r9 */                      \
+         "lwz 10, 32(11)\n\t" /* arg8->r10 */                     \
+         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
+         VG_CONTRACT_FRAME_BY(64)                                 \
+         VG_CONTRACT_FRAME_BY(512)                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+                                  arg7,arg8,arg9,arg10,arg11)     \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+11];                       \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      _argvec[2+6] = (unsigned long)arg6;                         \
+      _argvec[2+7] = (unsigned long)arg7;                         \
+      _argvec[2+8] = (unsigned long)arg8;                         \
+      _argvec[2+9] = (unsigned long)arg9;                         \
+      _argvec[2+10] = (unsigned long)arg10;                       \
+      _argvec[2+11] = (unsigned long)arg11;                       \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
+         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
+         VG_EXPAND_FRAME_BY_trashes_r3(72)                        \
+         /* arg11 */                                              \
+         "lwz 3,44(11)\n\t"                                       \
+         "stw 3,64(1)\n\t"                                        \
+         /* arg10 */                                              \
+         "lwz 3,40(11)\n\t"                                       \
+         "stw 3,60(1)\n\t"                                        \
+         /* arg9 */                                               \
+         "lwz 3,36(11)\n\t"                                       \
+         "stw 3,56(1)\n\t"                                        \
+         /* args1-8 */                                            \
+         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
+         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
+         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
+         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
+         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
+         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
+         "lwz  9, 28(11)\n\t" /* arg7->r9 */                      \
+         "lwz 10, 32(11)\n\t" /* arg8->r10 */                     \
+         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
+         VG_CONTRACT_FRAME_BY(72)                                 \
+         VG_CONTRACT_FRAME_BY(512)                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+                                arg7,arg8,arg9,arg10,arg11,arg12) \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+12];                       \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      _argvec[2+6] = (unsigned long)arg6;                         \
+      _argvec[2+7] = (unsigned long)arg7;                         \
+      _argvec[2+8] = (unsigned long)arg8;                         \
+      _argvec[2+9] = (unsigned long)arg9;                         \
+      _argvec[2+10] = (unsigned long)arg10;                       \
+      _argvec[2+11] = (unsigned long)arg11;                       \
+      _argvec[2+12] = (unsigned long)arg12;                       \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+         "stw  2,-8(11)\n\t"  /* save tocptr */                   \
+         "lwz  2,-4(11)\n\t"  /* use nraddr's tocptr */           \
+         VG_EXPAND_FRAME_BY_trashes_r3(72)                        \
+         /* arg12 */                                              \
+         "lwz 3,48(11)\n\t"                                       \
+         "stw 3,68(1)\n\t"                                        \
+         /* arg11 */                                              \
+         "lwz 3,44(11)\n\t"                                       \
+         "stw 3,64(1)\n\t"                                        \
+         /* arg10 */                                              \
+         "lwz 3,40(11)\n\t"                                       \
+         "stw 3,60(1)\n\t"                                        \
+         /* arg9 */                                               \
+         "lwz 3,36(11)\n\t"                                       \
+         "stw 3,56(1)\n\t"                                        \
+         /* args1-8 */                                            \
+         "lwz  3, 4(11)\n\t"  /* arg1->r3 */                      \
+         "lwz  4, 8(11)\n\t"  /* arg2->r4 */                      \
+         "lwz  5, 12(11)\n\t" /* arg3->r5 */                      \
+         "lwz  6, 16(11)\n\t" /* arg4->r6 */                      \
+         "lwz  7, 20(11)\n\t" /* arg5->r7 */                      \
+         "lwz  8, 24(11)\n\t" /* arg6->r8 */                      \
+         "lwz  9, 28(11)\n\t" /* arg7->r9 */                      \
+         "lwz 10, 32(11)\n\t" /* arg8->r10 */                     \
+         "lwz 11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "lwz 2,-8(11)\n\t" /* restore tocptr */                  \
+         VG_CONTRACT_FRAME_BY(72)                                 \
+         VG_CONTRACT_FRAME_BY(512)                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#endif /* PLAT_ppc32_aix5 */
+
+/* ------------------------ ppc64-aix5 ------------------------- */
+
+#if defined(PLAT_ppc64_aix5)
+
+/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
+
+/* These regs are trashed by the hidden call. */
+#define __CALLER_SAVED_REGS                                       \
+   "lr", "ctr", "xer",                                            \
+   "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7",        \
+   "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",   \
+   "r11", "r12", "r13"
+
+/* Expand the stack frame, copying enough info that unwinding
+   still works.  Trashes r3. */
+
+#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr)                      \
+         "addi 1,1,-" #_n_fr "\n\t"                               \
+         "ld   3," #_n_fr "(1)\n\t"                               \
+         "std  3,0(1)\n\t"
+
+#define VG_CONTRACT_FRAME_BY(_n_fr)                               \
+         "addi 1,1," #_n_fr "\n\t"
+
+/* These CALL_FN_ macros assume that on ppc64-aix5, sizeof(unsigned
+   long) == 8. */
+
+#define CALL_FN_W_v(lval, orig)                                   \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+0];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1] = (unsigned long)_orig.r2;                       \
+      _argvec[2] = (unsigned long)_orig.nraddr;                   \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+         "std  2,-16(11)\n\t" /* save tocptr */                   \
+         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
+         VG_CONTRACT_FRAME_BY(512)                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_W(lval, orig, arg1)                             \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+1];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+         "std  2,-16(11)\n\t" /* save tocptr */                   \
+         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld 2,-16(11)\n\t" /* restore tocptr */                  \
+         VG_CONTRACT_FRAME_BY(512)                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+2];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+         "std  2,-16(11)\n\t" /* save tocptr */                   \
+         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
+         VG_CONTRACT_FRAME_BY(512)                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+3];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+         "std  2,-16(11)\n\t" /* save tocptr */                   \
+         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
+         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
+         VG_CONTRACT_FRAME_BY(512)                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+4];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+         "std  2,-16(11)\n\t" /* save tocptr */                   \
+         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
+         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
+         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
+         VG_CONTRACT_FRAME_BY(512)                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+5];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+         "std  2,-16(11)\n\t" /* save tocptr */                   \
+         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
+         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
+         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
+         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
+         VG_CONTRACT_FRAME_BY(512)                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+6];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      _argvec[2+6] = (unsigned long)arg6;                         \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+         "std  2,-16(11)\n\t" /* save tocptr */                   \
+         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
+         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
+         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
+         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
+         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
+         VG_CONTRACT_FRAME_BY(512)                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+                                 arg7)                            \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+7];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      _argvec[2+6] = (unsigned long)arg6;                         \
+      _argvec[2+7] = (unsigned long)arg7;                         \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+         "std  2,-16(11)\n\t" /* save tocptr */                   \
+         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
+         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
+         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
+         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
+         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
+         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
+         VG_CONTRACT_FRAME_BY(512)                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+                                 arg7,arg8)                       \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+8];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      _argvec[2+6] = (unsigned long)arg6;                         \
+      _argvec[2+7] = (unsigned long)arg7;                         \
+      _argvec[2+8] = (unsigned long)arg8;                         \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+         "std  2,-16(11)\n\t" /* save tocptr */                   \
+         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
+         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
+         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
+         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
+         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
+         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
+         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
+         VG_CONTRACT_FRAME_BY(512)                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+                                 arg7,arg8,arg9)                  \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+9];                        \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      _argvec[2+6] = (unsigned long)arg6;                         \
+      _argvec[2+7] = (unsigned long)arg7;                         \
+      _argvec[2+8] = (unsigned long)arg8;                         \
+      _argvec[2+9] = (unsigned long)arg9;                         \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+         "std  2,-16(11)\n\t" /* save tocptr */                   \
+         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+         VG_EXPAND_FRAME_BY_trashes_r3(128)                       \
+         /* arg9 */                                               \
+         "ld  3,72(11)\n\t"                                       \
+         "std 3,112(1)\n\t"                                       \
+         /* args1-8 */                                            \
+         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
+         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
+         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
+         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
+         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
+         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
+         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
+         VG_CONTRACT_FRAME_BY(128)                                \
+         VG_CONTRACT_FRAME_BY(512)                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+                                  arg7,arg8,arg9,arg10)           \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+10];                       \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      _argvec[2+6] = (unsigned long)arg6;                         \
+      _argvec[2+7] = (unsigned long)arg7;                         \
+      _argvec[2+8] = (unsigned long)arg8;                         \
+      _argvec[2+9] = (unsigned long)arg9;                         \
+      _argvec[2+10] = (unsigned long)arg10;                       \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+         "std  2,-16(11)\n\t" /* save tocptr */                   \
+         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+         VG_EXPAND_FRAME_BY_trashes_r3(128)                       \
+         /* arg10 */                                              \
+         "ld  3,80(11)\n\t"                                       \
+         "std 3,120(1)\n\t"                                       \
+         /* arg9 */                                               \
+         "ld  3,72(11)\n\t"                                       \
+         "std 3,112(1)\n\t"                                       \
+         /* args1-8 */                                            \
+         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
+         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
+         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
+         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
+         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
+         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
+         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
+         VG_CONTRACT_FRAME_BY(128)                                \
+         VG_CONTRACT_FRAME_BY(512)                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+                                  arg7,arg8,arg9,arg10,arg11)     \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+11];                       \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      _argvec[2+6] = (unsigned long)arg6;                         \
+      _argvec[2+7] = (unsigned long)arg7;                         \
+      _argvec[2+8] = (unsigned long)arg8;                         \
+      _argvec[2+9] = (unsigned long)arg9;                         \
+      _argvec[2+10] = (unsigned long)arg10;                       \
+      _argvec[2+11] = (unsigned long)arg11;                       \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+         "std  2,-16(11)\n\t" /* save tocptr */                   \
+         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+         VG_EXPAND_FRAME_BY_trashes_r3(144)                       \
+         /* arg11 */                                              \
+         "ld  3,88(11)\n\t"                                       \
+         "std 3,128(1)\n\t"                                       \
+         /* arg10 */                                              \
+         "ld  3,80(11)\n\t"                                       \
+         "std 3,120(1)\n\t"                                       \
+         /* arg9 */                                               \
+         "ld  3,72(11)\n\t"                                       \
+         "std 3,112(1)\n\t"                                       \
+         /* args1-8 */                                            \
+         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
+         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
+         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
+         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
+         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
+         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
+         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
+         VG_CONTRACT_FRAME_BY(144)                                \
+         VG_CONTRACT_FRAME_BY(512)                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+                                arg7,arg8,arg9,arg10,arg11,arg12) \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3+12];                       \
+      volatile unsigned long _res;                                \
+      /* _argvec[0] holds current r2 across the call */           \
+      _argvec[1]   = (unsigned long)_orig.r2;                     \
+      _argvec[2]   = (unsigned long)_orig.nraddr;                 \
+      _argvec[2+1] = (unsigned long)arg1;                         \
+      _argvec[2+2] = (unsigned long)arg2;                         \
+      _argvec[2+3] = (unsigned long)arg3;                         \
+      _argvec[2+4] = (unsigned long)arg4;                         \
+      _argvec[2+5] = (unsigned long)arg5;                         \
+      _argvec[2+6] = (unsigned long)arg6;                         \
+      _argvec[2+7] = (unsigned long)arg7;                         \
+      _argvec[2+8] = (unsigned long)arg8;                         \
+      _argvec[2+9] = (unsigned long)arg9;                         \
+      _argvec[2+10] = (unsigned long)arg10;                       \
+      _argvec[2+11] = (unsigned long)arg11;                       \
+      _argvec[2+12] = (unsigned long)arg12;                       \
+      __asm__ volatile(                                           \
+         "mr 11,%1\n\t"                                           \
+         VG_EXPAND_FRAME_BY_trashes_r3(512)                       \
+         "std  2,-16(11)\n\t" /* save tocptr */                   \
+         "ld   2,-8(11)\n\t"  /* use nraddr's tocptr */           \
+         VG_EXPAND_FRAME_BY_trashes_r3(144)                       \
+         /* arg12 */                                              \
+         "ld  3,96(11)\n\t"                                       \
+         "std 3,136(1)\n\t"                                       \
+         /* arg11 */                                              \
+         "ld  3,88(11)\n\t"                                       \
+         "std 3,128(1)\n\t"                                       \
+         /* arg10 */                                              \
+         "ld  3,80(11)\n\t"                                       \
+         "std 3,120(1)\n\t"                                       \
+         /* arg9 */                                               \
+         "ld  3,72(11)\n\t"                                       \
+         "std 3,112(1)\n\t"                                       \
+         /* args1-8 */                                            \
+         "ld   3, 8(11)\n\t"  /* arg1->r3 */                      \
+         "ld   4, 16(11)\n\t" /* arg2->r4 */                      \
+         "ld   5, 24(11)\n\t" /* arg3->r5 */                      \
+         "ld   6, 32(11)\n\t" /* arg4->r6 */                      \
+         "ld   7, 40(11)\n\t" /* arg5->r7 */                      \
+         "ld   8, 48(11)\n\t" /* arg6->r8 */                      \
+         "ld   9, 56(11)\n\t" /* arg7->r9 */                      \
+         "ld  10, 64(11)\n\t" /* arg8->r10 */                     \
+         "ld  11, 0(11)\n\t"  /* target->r11 */                   \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11                  \
+         "mr 11,%1\n\t"                                           \
+         "mr %0,3\n\t"                                            \
+         "ld  2,-16(11)\n\t" /* restore tocptr */                 \
+         VG_CONTRACT_FRAME_BY(144)                                \
+         VG_CONTRACT_FRAME_BY(512)                                \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[2])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#endif /* PLAT_ppc64_aix5 */
+
+
+/* ------------------------------------------------------------------ */
+/* ARCHITECTURE INDEPENDENT MACROS for CLIENT REQUESTS.               */
+/*                                                                    */
+/* ------------------------------------------------------------------ */
+
+/* Some request codes.  There are many more of these, but most are not
+   exposed to end-user view.  These are the public ones, all of the
+   form 0x1000 + small_number.
+
+   Core ones are in the range 0x00000000--0x0000ffff.  The non-public
+   ones start at 0x2000.
+*/
+
+/* These macros are used by tools -- they must be public, but don't
+   embed them into other programs. */
+#define VG_USERREQ_TOOL_BASE(a,b) \
+   ((unsigned int)(((a)&0xff) << 24 | ((b)&0xff) << 16))
+#define VG_IS_TOOL_USERREQ(a, b, v) \
+   (VG_USERREQ_TOOL_BASE(a,b) == ((v) & 0xffff0000))
+
+/* !! ABIWARNING !! ABIWARNING !! ABIWARNING !! ABIWARNING !! 
+   This enum comprises an ABI exported by Valgrind to programs
+   which use client requests.  DO NOT CHANGE THE ORDER OF THESE
+   ENTRIES, NOR DELETE ANY -- add new ones at the end. */
+typedef
+   enum { VG_USERREQ__RUNNING_ON_VALGRIND  = 0x1001,
+          VG_USERREQ__DISCARD_TRANSLATIONS = 0x1002,
+
+          /* These allow any function to be called from the simulated
+             CPU but run on the real CPU.  Nb: the first arg passed to
+             the function is always the ThreadId of the running
+             thread!  So CLIENT_CALL0 actually requires a 1 arg
+             function, etc. */
+          VG_USERREQ__CLIENT_CALL0 = 0x1101,
+          VG_USERREQ__CLIENT_CALL1 = 0x1102,
+          VG_USERREQ__CLIENT_CALL2 = 0x1103,
+          VG_USERREQ__CLIENT_CALL3 = 0x1104,
+
+          /* Can be useful in regression testing suites -- eg. can
+             send Valgrind's output to /dev/null and still count
+             errors. */
+          VG_USERREQ__COUNT_ERRORS = 0x1201,
+
+          /* These are useful and can be interpreted by any tool that
+             tracks malloc() et al, by using vg_replace_malloc.c. */
+          VG_USERREQ__MALLOCLIKE_BLOCK = 0x1301,
+          VG_USERREQ__FREELIKE_BLOCK   = 0x1302,
+          /* Memory pool support. */
+          VG_USERREQ__CREATE_MEMPOOL   = 0x1303,
+          VG_USERREQ__DESTROY_MEMPOOL  = 0x1304,
+          VG_USERREQ__MEMPOOL_ALLOC    = 0x1305,
+          VG_USERREQ__MEMPOOL_FREE     = 0x1306,
+          VG_USERREQ__MEMPOOL_TRIM     = 0x1307,
+          VG_USERREQ__MOVE_MEMPOOL     = 0x1308,
+          VG_USERREQ__MEMPOOL_CHANGE   = 0x1309,
+          VG_USERREQ__MEMPOOL_EXISTS   = 0x130a,
+
+          /* Allow printfs to valgrind log. */
+          VG_USERREQ__PRINTF           = 0x1401,
+          VG_USERREQ__PRINTF_BACKTRACE = 0x1402,
+
+          /* Stack support. */
+          VG_USERREQ__STACK_REGISTER   = 0x1501,
+          VG_USERREQ__STACK_DEREGISTER = 0x1502,
+          VG_USERREQ__STACK_CHANGE     = 0x1503
+   } Vg_ClientRequest;
+
+#if !defined(__GNUC__)
+#  define __extension__ /* */
+#endif
+
+/* Returns the number of Valgrinds this code is running under.  That
+   is, 0 if running natively, 1 if running under Valgrind, 2 if
+   running under Valgrind which is running under another Valgrind,
+   etc. */
+#define RUNNING_ON_VALGRIND  __extension__                        \
+   ({unsigned int _qzz_res;                                       \
+    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0 /* if not */,          \
+                               VG_USERREQ__RUNNING_ON_VALGRIND,   \
+                               0, 0, 0, 0, 0);                    \
+    _qzz_res;                                                     \
+   })
+
+
+/* Discard translation of code in the range [_qzz_addr .. _qzz_addr +
+   _qzz_len - 1].  Useful if you are debugging a JITter or some such,
+   since it provides a way to make sure valgrind will retranslate the
+   invalidated area.  Returns no value. */
+#define VALGRIND_DISCARD_TRANSLATIONS(_qzz_addr,_qzz_len)         \
+   {unsigned int _qzz_res;                                        \
+    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
+                               VG_USERREQ__DISCARD_TRANSLATIONS,  \
+                               _qzz_addr, _qzz_len, 0, 0, 0);     \
+   }
+
+
+/* These requests are for getting Valgrind itself to print something.
+   Possibly with a backtrace.  This is a really ugly hack. */
+
+#if defined(NVALGRIND)
+
+#  define VALGRIND_PRINTF(...)
+#  define VALGRIND_PRINTF_BACKTRACE(...)
+
+#else /* NVALGRIND */
+
+/* Modern GCC will optimize the static routine out if unused,
+   and unused attribute will shut down warnings about it.  */
+static int VALGRIND_PRINTF(const char *format, ...)
+   __attribute__((format(__printf__, 1, 2), __unused__));
+static int
+VALGRIND_PRINTF(const char *format, ...)
+{
+   unsigned long _qzz_res;
+   va_list vargs;
+   va_start(vargs, format);
+   VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__PRINTF,
+                              (unsigned long)format, (unsigned long)vargs, 
+                              0, 0, 0);
+   va_end(vargs);
+   return (int)_qzz_res;
+}
+
+static int VALGRIND_PRINTF_BACKTRACE(const char *format, ...)
+   __attribute__((format(__printf__, 1, 2), __unused__));
+static int
+VALGRIND_PRINTF_BACKTRACE(const char *format, ...)
+{
+   unsigned long _qzz_res;
+   va_list vargs;
+   va_start(vargs, format);
+   VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__PRINTF_BACKTRACE,
+                              (unsigned long)format, (unsigned long)vargs, 
+                              0, 0, 0);
+   va_end(vargs);
+   return (int)_qzz_res;
+}
+
+#endif /* NVALGRIND */
+
+
+/* These requests allow control to move from the simulated CPU to the
+   real CPU, calling an arbitary function.
+   
+   Note that the current ThreadId is inserted as the first argument.
+   So this call:
+
+     VALGRIND_NON_SIMD_CALL2(f, arg1, arg2)
+
+   requires f to have this signature:
+
+     Word f(Word tid, Word arg1, Word arg2)
+
+   where "Word" is a word-sized type.
+
+   Note that these client requests are not entirely reliable.  For example,
+   if you call a function with them that subsequently calls printf(),
+   there's a high chance Valgrind will crash.  Generally, your prospects of
+   these working are made higher if the called function does not refer to
+   any global variables, and does not refer to any libc or other functions
+   (printf et al).  Any kind of entanglement with libc or dynamic linking is
+   likely to have a bad outcome, for tricky reasons which we've grappled
+   with a lot in the past.
+*/
+#define VALGRIND_NON_SIMD_CALL0(_qyy_fn)                          \
+   __extension__                                                  \
+   ({unsigned long _qyy_res;                                      \
+    VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */,  \
+                               VG_USERREQ__CLIENT_CALL0,          \
+                               _qyy_fn,                           \
+                               0, 0, 0, 0);                       \
+    _qyy_res;                                                     \
+   })
+
+#define VALGRIND_NON_SIMD_CALL1(_qyy_fn, _qyy_arg1)               \
+   __extension__                                                  \
+   ({unsigned long _qyy_res;                                      \
+    VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */,  \
+                               VG_USERREQ__CLIENT_CALL1,          \
+                               _qyy_fn,                           \
+                               _qyy_arg1, 0, 0, 0);               \
+    _qyy_res;                                                     \
+   })
+
+#define VALGRIND_NON_SIMD_CALL2(_qyy_fn, _qyy_arg1, _qyy_arg2)    \
+   __extension__                                                  \
+   ({unsigned long _qyy_res;                                      \
+    VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */,  \
+                               VG_USERREQ__CLIENT_CALL2,          \
+                               _qyy_fn,                           \
+                               _qyy_arg1, _qyy_arg2, 0, 0);       \
+    _qyy_res;                                                     \
+   })
+
+#define VALGRIND_NON_SIMD_CALL3(_qyy_fn, _qyy_arg1, _qyy_arg2, _qyy_arg3) \
+   __extension__                                                  \
+   ({unsigned long _qyy_res;                                      \
+    VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */,  \
+                               VG_USERREQ__CLIENT_CALL3,          \
+                               _qyy_fn,                           \
+                               _qyy_arg1, _qyy_arg2,              \
+                               _qyy_arg3, 0);                     \
+    _qyy_res;                                                     \
+   })
+
+
+/* Counts the number of errors that have been recorded by a tool.  Nb:
+   the tool must record the errors with VG_(maybe_record_error)() or
+   VG_(unique_error)() for them to be counted. */
+#define VALGRIND_COUNT_ERRORS                                     \
+   __extension__                                                  \
+   ({unsigned int _qyy_res;                                       \
+    VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */,  \
+                               VG_USERREQ__COUNT_ERRORS,          \
+                               0, 0, 0, 0, 0);                    \
+    _qyy_res;                                                     \
+   })
+
+/* Mark a block of memory as having been allocated by a malloc()-like
+   function.  `addr' is the start of the usable block (ie. after any
+   redzone) `rzB' is redzone size if the allocator can apply redzones;
+   use '0' if not.  Adding redzones makes it more likely Valgrind will spot
+   block overruns.  `is_zeroed' indicates if the memory is zeroed, as it is
+   for calloc().  Put it immediately after the point where a block is
+   allocated. 
+   
+   If you're using Memcheck: If you're allocating memory via superblocks,
+   and then handing out small chunks of each superblock, if you don't have
+   redzones on your small blocks, it's worth marking the superblock with
+   VALGRIND_MAKE_MEM_NOACCESS when it's created, so that block overruns are
+   detected.  But if you can put redzones on, it's probably better to not do
+   this, so that messages for small overruns are described in terms of the
+   small block rather than the superblock (but if you have a big overrun
+   that skips over a redzone, you could miss an error this way).  See
+   memcheck/tests/custom_alloc.c for an example.
+
+   WARNING: if your allocator uses malloc() or 'new' to allocate
+   superblocks, rather than mmap() or brk(), this will not work properly --
+   you'll likely get assertion failures during leak detection.  This is
+   because Valgrind doesn't like seeing overlapping heap blocks.  Sorry.
+
+   Nb: block must be freed via a free()-like function specified
+   with VALGRIND_FREELIKE_BLOCK or mismatch errors will occur. */
+#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed)    \
+   {unsigned int _qzz_res;                                        \
+    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
+                               VG_USERREQ__MALLOCLIKE_BLOCK,      \
+                               addr, sizeB, rzB, is_zeroed, 0);   \
+   }
+
+/* Mark a block of memory as having been freed by a free()-like function.
+   `rzB' is redzone size;  it must match that given to
+   VALGRIND_MALLOCLIKE_BLOCK.  Memory not freed will be detected by the leak
+   checker.  Put it immediately after the point where the block is freed. */
+#define VALGRIND_FREELIKE_BLOCK(addr, rzB)                        \
+   {unsigned int _qzz_res;                                        \
+    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
+                               VG_USERREQ__FREELIKE_BLOCK,        \
+                               addr, rzB, 0, 0, 0);               \
+   }
+
+/* Create a memory pool. */
+#define VALGRIND_CREATE_MEMPOOL(pool, rzB, is_zeroed)             \
+   {unsigned int _qzz_res;                                        \
+    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
+                               VG_USERREQ__CREATE_MEMPOOL,        \
+                               pool, rzB, is_zeroed, 0, 0);       \
+   }
+
+/* Destroy a memory pool. */
+#define VALGRIND_DESTROY_MEMPOOL(pool)                            \
+   {unsigned int _qzz_res;                                        \
+    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
+                               VG_USERREQ__DESTROY_MEMPOOL,       \
+                               pool, 0, 0, 0, 0);                 \
+   }
+
+/* Associate a piece of memory with a memory pool. */
+#define VALGRIND_MEMPOOL_ALLOC(pool, addr, size)                  \
+   {unsigned int _qzz_res;                                        \
+    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
+                               VG_USERREQ__MEMPOOL_ALLOC,         \
+                               pool, addr, size, 0, 0);           \
+   }
+
+/* Disassociate a piece of memory from a memory pool. */
+#define VALGRIND_MEMPOOL_FREE(pool, addr)                         \
+   {unsigned int _qzz_res;                                        \
+    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
+                               VG_USERREQ__MEMPOOL_FREE,          \
+                               pool, addr, 0, 0, 0);              \
+   }
+
+/* Disassociate any pieces outside a particular range. */
+#define VALGRIND_MEMPOOL_TRIM(pool, addr, size)                   \
+   {unsigned int _qzz_res;                                        \
+    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
+                               VG_USERREQ__MEMPOOL_TRIM,          \
+                               pool, addr, size, 0, 0);           \
+   }
+
+/* Resize and/or move a piece associated with a memory pool. */
+#define VALGRIND_MOVE_MEMPOOL(poolA, poolB)                       \
+   {unsigned int _qzz_res;                                        \
+    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
+                               VG_USERREQ__MOVE_MEMPOOL,          \
+                               poolA, poolB, 0, 0, 0);            \
+   }
+
+/* Resize and/or move a piece associated with a memory pool. */
+#define VALGRIND_MEMPOOL_CHANGE(pool, addrA, addrB, size)         \
+   {unsigned int _qzz_res;                                        \
+    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
+                               VG_USERREQ__MEMPOOL_CHANGE,        \
+                               pool, addrA, addrB, size, 0);      \
+   }
+
+/* Return 1 if a mempool exists, else 0. */
+#define VALGRIND_MEMPOOL_EXISTS(pool)                             \
+   ({unsigned int _qzz_res;                                       \
+    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
+                               VG_USERREQ__MEMPOOL_EXISTS,        \
+                               pool, 0, 0, 0, 0);                 \
+    _qzz_res;                                                     \
+   })
+
+/* Mark a piece of memory as being a stack. Returns a stack id. */
+#define VALGRIND_STACK_REGISTER(start, end)                       \
+   ({unsigned int _qzz_res;                                       \
+    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
+                               VG_USERREQ__STACK_REGISTER,        \
+                               start, end, 0, 0, 0);              \
+    _qzz_res;                                                     \
+   })
+
+/* Unmark the piece of memory associated with a stack id as being a
+   stack. */
+#define VALGRIND_STACK_DEREGISTER(id)                             \
+   {unsigned int _qzz_res;                                        \
+    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
+                               VG_USERREQ__STACK_DEREGISTER,      \
+                               id, 0, 0, 0, 0);                   \
+   }
+
+/* Change the start and end address of the stack id. */
+#define VALGRIND_STACK_CHANGE(id, start, end)                     \
+   {unsigned int _qzz_res;                                        \
+    VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0,                       \
+                               VG_USERREQ__STACK_CHANGE,          \
+                               id, start, end, 0, 0);             \
+   }
+
+
+#undef PLAT_x86_linux
+#undef PLAT_amd64_linux
+#undef PLAT_ppc32_linux
+#undef PLAT_ppc64_linux
+#undef PLAT_ppc32_aix5
+#undef PLAT_ppc64_aix5
+
+#endif   /* __VALGRIND_H */
diff --git a/src/version.cc b/src/version.cc
index c5cdbba..b141120 100644
--- a/src/version.cc
+++ b/src/version.cc
@@ -34,8 +34,8 @@
 // cannot be changed without changing the SCons build script.
 #define MAJOR_VERSION     1
 #define MINOR_VERSION     3
-#define BUILD_NUMBER      2
-#define PATCH_LEVEL       2
+#define BUILD_NUMBER      3
+#define PATCH_LEVEL       0
 #define CANDIDATE_VERSION false
 
 // Define SONAME to have the SCons build the put a specific SONAME into the
diff --git a/src/x64/assembler-x64.cc b/src/x64/assembler-x64.cc
index 5e70f9e..b4fd678 100644
--- a/src/x64/assembler-x64.cc
+++ b/src/x64/assembler-x64.cc
@@ -1244,6 +1244,26 @@
 }
 
 
+void Assembler::movzxbl(Register dst, const Operand& src) {
+  EnsureSpace ensure_space(this);
+  last_pc_ = pc_;
+  emit_optional_rex_32(dst, src);
+  emit(0x0F);
+  emit(0xB6);
+  emit_operand(dst, src);
+}
+
+
+void Assembler::movzxwl(Register dst, const Operand& src) {
+  EnsureSpace ensure_space(this);
+  last_pc_ = pc_;
+  emit_optional_rex_32(dst, src);
+  emit(0x0F);
+  emit(0xB7);
+  emit_operand(dst, src);
+}
+
+
 void Assembler::mul(Register src) {
   EnsureSpace ensure_space(this);
   last_pc_ = pc_;
diff --git a/src/x64/assembler-x64.h b/src/x64/assembler-x64.h
index 4f482f5..015fa68 100644
--- a/src/x64/assembler-x64.h
+++ b/src/x64/assembler-x64.h
@@ -517,6 +517,8 @@
   void movsxlq(Register dst, Register src);
   void movsxlq(Register dst, const Operand& src);
   void movzxbq(Register dst, const Operand& src);
+  void movzxbl(Register dst, const Operand& src);
+  void movzxwl(Register dst, const Operand& src);
 
   // New x64 instruction to load from an immediate 64-bit pointer into RAX.
   void load_rax(void* ptr, RelocInfo::Mode rmode);
diff --git a/src/x64/builtins-x64.cc b/src/x64/builtins-x64.cc
index 08f8338..087aaff 100644
--- a/src/x64/builtins-x64.cc
+++ b/src/x64/builtins-x64.cc
@@ -503,13 +503,160 @@
   // Try to allocate the object without transitioning into C code. If any of the
   // preconditions is not met, the code bails out to the runtime call.
   Label rt_call, allocated;
+  if (FLAG_inline_new) {
+    Label undo_allocation;
+    // TODO(X64): Enable debugger support, using debug_step_in_fp.
 
-  // TODO(x64): Implement inlined allocation.
+    // Verified that the constructor is a JSFunction.
+    // Load the initial map and verify that it is in fact a map.
+    // rdi: constructor
+    __ movq(rax, FieldOperand(rdi, JSFunction::kPrototypeOrInitialMapOffset));
+    // Will both indicate a NULL and a Smi
+    __ testl(rax, Immediate(kSmiTagMask));
+    __ j(zero, &rt_call);
+    // rdi: constructor
+    // rax: initial map (if proven valid below)
+    __ CmpObjectType(rax, MAP_TYPE, rbx);
+    __ j(not_equal, &rt_call);
+
+    // Check that the constructor is not constructing a JSFunction (see comments
+    // in Runtime_NewObject in runtime.cc). In which case the initial map's
+    // instance type would be JS_FUNCTION_TYPE.
+    // rdi: constructor
+    // rax: initial map
+    __ CmpInstanceType(rax, JS_FUNCTION_TYPE);
+    __ j(equal, &rt_call);
+
+    // Now allocate the JSObject on the heap.
+    __ movzxbq(rdi, FieldOperand(rax, Map::kInstanceSizeOffset));
+    __ shl(rdi, Immediate(kPointerSizeLog2));
+    // rdi: size of new object
+    // Make sure that the maximum heap object size will never cause us
+    // problem here, because it is always greater than the maximum
+    // instance size that can be represented in a byte.
+    ASSERT(Heap::MaxObjectSizeInPagedSpace() >= (1 << kBitsPerByte));
+    ExternalReference new_space_allocation_top =
+        ExternalReference::new_space_allocation_top_address();
+    __ movq(kScratchRegister, new_space_allocation_top);
+    __ movq(rbx, Operand(kScratchRegister, 0));
+    __ addq(rdi, rbx);  // Calculate new top
+    ExternalReference new_space_allocation_limit =
+        ExternalReference::new_space_allocation_limit_address();
+    __ movq(kScratchRegister, new_space_allocation_limit);
+    __ cmpq(rdi, Operand(kScratchRegister, 0));
+    __ j(above_equal, &rt_call);
+    // Allocated the JSObject, now initialize the fields.
+    // rax: initial map
+    // rbx: JSObject (not HeapObject tagged - the actual address).
+    // rdi: start of next object
+    __ movq(Operand(rbx, JSObject::kMapOffset), rax);
+    __ Move(rcx, Factory::empty_fixed_array());
+    __ movq(Operand(rbx, JSObject::kPropertiesOffset), rcx);
+    __ movq(Operand(rbx, JSObject::kElementsOffset), rcx);
+    // Set extra fields in the newly allocated object.
+    // rax: initial map
+    // rbx: JSObject
+    // rdi: start of next object
+    { Label loop, entry;
+      __ Move(rdx, Factory::undefined_value());
+      __ lea(rcx, Operand(rbx, JSObject::kHeaderSize));
+      __ jmp(&entry);
+      __ bind(&loop);
+      __ movq(Operand(rcx, 0), rdx);
+      __ addq(rcx, Immediate(kPointerSize));
+      __ bind(&entry);
+      __ cmpq(rcx, rdi);
+      __ j(less, &loop);
+    }
+
+    // Mostly done with the JSObject. Add the heap tag and store the new top, so
+    // that we can continue and jump into the continuation code at any time from
+    // now on. Any failures need to undo the setting of the new top, so that the
+    // heap is in a consistent state and verifiable.
+    // rax: initial map
+    // rbx: JSObject
+    // rdi: start of next object
+    __ or_(rbx, Immediate(kHeapObjectTag));
+    __ movq(kScratchRegister, new_space_allocation_top);
+    __ movq(Operand(kScratchRegister, 0), rdi);
+
+    // Check if a non-empty properties array is needed.
+    // Allocate and initialize a FixedArray if it is.
+    // rax: initial map
+    // rbx: JSObject
+    // rdi: start of next object
+    __ movzxbq(rdx, FieldOperand(rax, Map::kUnusedPropertyFieldsOffset));
+    __ movzxbq(rcx, FieldOperand(rax, Map::kInObjectPropertiesOffset));
+    // Calculate unused properties past the end of the in-object properties.
+    __ subq(rdx, rcx);
+    // Done if no extra properties are to be allocated.
+    __ j(zero, &allocated);
+
+    // Scale the number of elements by pointer size and add the header for
+    // FixedArrays to the start of the next object calculation from above.
+    // rbx: JSObject
+    // rdi: start of next object (will be start of FixedArray)
+    // rdx: number of elements in properties array
+    ASSERT(Heap::MaxObjectSizeInPagedSpace() >
+           (FixedArray::kHeaderSize + 255*kPointerSize));
+    __ lea(rax, Operand(rdi, rdx, times_pointer_size, FixedArray::kHeaderSize));
+    __ movq(kScratchRegister, new_space_allocation_limit);
+    __ cmpq(rax, Operand(kScratchRegister, 0));
+    __ j(above_equal, &undo_allocation);
+    __ store_rax(new_space_allocation_top);
+
+    // Initialize the FixedArray.
+    // rbx: JSObject
+    // rdi: FixedArray
+    // rdx: number of elements
+    // rax: start of next object
+    __ Move(rcx, Factory::fixed_array_map());
+    __ movq(Operand(rdi, JSObject::kMapOffset), rcx);  // setup the map
+    __ movl(Operand(rdi, FixedArray::kLengthOffset), rdx);  // and length
+
+    // Initialize the fields to undefined.
+    // rbx: JSObject
+    // rdi: FixedArray
+    // rax: start of next object
+    // rdx: number of elements
+    { Label loop, entry;
+      __ Move(rdx, Factory::undefined_value());
+      __ lea(rcx, Operand(rdi, FixedArray::kHeaderSize));
+      __ jmp(&entry);
+      __ bind(&loop);
+      __ movq(Operand(rcx, 0), rdx);
+      __ addq(rcx, Immediate(kPointerSize));
+      __ bind(&entry);
+      __ cmpq(rcx, rax);
+      __ j(below, &loop);
+    }
+
+    // Store the initialized FixedArray into the properties field of
+    // the JSObject
+    // rbx: JSObject
+    // rdi: FixedArray
+    __ or_(rdi, Immediate(kHeapObjectTag));  // add the heap tag
+    __ movq(FieldOperand(rbx, JSObject::kPropertiesOffset), rdi);
+
+
+    // Continue with JSObject being successfully allocated
+    // rbx: JSObject
+    __ jmp(&allocated);
+
+    // Undo the setting of the new top so that the heap is verifiable. For
+    // example, the map's unused properties potentially do not match the
+    // allocated objects unused properties.
+    // rbx: JSObject (previous new top)
+    __ bind(&undo_allocation);
+    __ xor_(rbx, Immediate(kHeapObjectTag));  // clear the heap tag
+    __ movq(kScratchRegister, new_space_allocation_top);
+    __ movq(Operand(kScratchRegister, 0), rbx);
+  }
 
   // Allocate the new receiver object using the runtime call.
   // rdi: function (constructor)
   __ bind(&rt_call);
-  // Must restore edi (constructor) before calling runtime.
+  // Must restore rdi (constructor) before calling runtime.
   __ movq(rdi, Operand(rsp, 0));
   __ push(rdi);
   __ CallRuntime(Runtime::kNewObject, 1);
diff --git a/src/x64/cfg-x64.cc b/src/x64/cfg-x64.cc
index 86754ee..8d01ed2 100644
--- a/src/x64/cfg-x64.cc
+++ b/src/x64/cfg-x64.cc
@@ -29,6 +29,7 @@
 
 #include "cfg.h"
 #include "codegen-inl.h"
+#include "codegen-x64.h"
 #include "debug.h"
 #include "macro-assembler-x64.h"
 
@@ -43,6 +44,14 @@
   {
     Comment cmt(masm, "[ InstructionBlock");
     for (int i = 0, len = instructions_.length(); i < len; i++) {
+      // If the location of the current instruction is a temp, then the
+      // instruction cannot be in tail position in the block.  Allocate the
+      // temp based on peeking ahead to the next instruction.
+      Instruction* instr = instructions_[i];
+      Location* loc = instr->location();
+      if (loc->is_temporary()) {
+        instructions_[i+1]->FastAllocate(TempLocation::cast(loc));
+      }
       instructions_[i]->Compile(masm);
     }
   }
@@ -82,6 +91,7 @@
   }
   successor_->Compile(masm);
   if (FLAG_check_stack) {
+    Comment cmnt(masm, "[ Deferred Stack Check");
     __ bind(&deferred_enter);
     StackCheckStub stub;
     __ CallStub(&stub);
@@ -93,7 +103,6 @@
 void ExitNode::Compile(MacroAssembler* masm) {
   ASSERT(!is_marked());
   is_marked_ = true;
-
   Comment cmnt(masm, "[ ExitNode");
   if (FLAG_trace) {
     __ push(rax);
@@ -113,34 +122,202 @@
 }
 
 
-void ReturnInstr::Compile(MacroAssembler* masm) {
-  Comment cmnt(masm, "[ ReturnInstr");
-  value_->ToRegister(masm, rax);
+void PropLoadInstr::Compile(MacroAssembler* masm) {
+  // The key should not be on the stack---if it is a compiler-generated
+  // temporary it is in the accumulator.
+  ASSERT(!key()->is_on_stack());
+
+  Comment cmnt(masm, "[ Load from Property");
+  // If the key is known at compile-time we may be able to use a load IC.
+  bool is_keyed_load = true;
+  if (key()->is_constant()) {
+    // Still use the keyed load IC if the key can be parsed as an integer so
+    // we will get into the case that handles [] on string objects.
+    Handle<Object> key_val = Constant::cast(key())->handle();
+    uint32_t ignored;
+    if (key_val->IsSymbol() &&
+        !String::cast(*key_val)->AsArrayIndex(&ignored)) {
+      is_keyed_load = false;
+    }
+  }
+
+  if (!object()->is_on_stack()) object()->Push(masm);
+  // A test rax instruction after the call indicates to the IC code that it
+  // was inlined.  Ensure there is not one after the call below.
+  if (is_keyed_load) {
+    key()->Push(masm);
+    Handle<Code> ic(Builtins::builtin(Builtins::KeyedLoadIC_Initialize));
+    __ Call(ic, RelocInfo::CODE_TARGET);
+    __ pop(rbx);  // Discard key.
+  } else {
+    key()->Get(masm, rcx);
+    Handle<Code> ic(Builtins::builtin(Builtins::LoadIC_Initialize));
+    __ Call(ic, RelocInfo::CODE_TARGET);
+  }
+  __ pop(rbx);  // Discard receiver.
+  location()->Set(masm, rax);
 }
 
 
-void Constant::ToRegister(MacroAssembler* masm, Register reg) {
+void BinaryOpInstr::Compile(MacroAssembler* masm) {
+  // The right-hand value should not be on the stack---if it is a
+  // compiler-generated temporary it is in the accumulator.
+  ASSERT(!right()->is_on_stack());
+
+  Comment cmnt(masm, "[ BinaryOpInstr");
+  // We can overwrite one of the operands if it is a temporary.
+  OverwriteMode mode = NO_OVERWRITE;
+  if (left()->is_temporary()) {
+    mode = OVERWRITE_LEFT;
+  } else if (right()->is_temporary()) {
+    mode = OVERWRITE_RIGHT;
+  }
+
+  // Push both operands and call the specialized stub.
+  if (!left()->is_on_stack()) left()->Push(masm);
+  right()->Push(masm);
+  GenericBinaryOpStub stub(op(), mode, SMI_CODE_IN_STUB);
+  __ CallStub(&stub);
+  location()->Set(masm, rax);
+}
+
+
+void ReturnInstr::Compile(MacroAssembler* masm) {
+  // The location should be 'Effect'.  As a side effect, move the value to
+  // the accumulator.
+  Comment cmnt(masm, "[ ReturnInstr");
+  value()->Get(masm, rax);
+}
+
+
+void Constant::Get(MacroAssembler* masm, Register reg) {
   __ Move(reg, handle_);
 }
 
 
-void SlotLocation::ToRegister(MacroAssembler* masm, Register reg) {
-  switch (type_) {
+void Constant::Push(MacroAssembler* masm) {
+  __ Push(handle_);
+}
+
+
+static Operand ToOperand(SlotLocation* loc) {
+  switch (loc->type()) {
     case Slot::PARAMETER: {
       int count = CfgGlobals::current()->fun()->scope()->num_parameters();
-      __ movq(reg, Operand(rbp, (1 + count - index_) * kPointerSize));
-      break;
+      return Operand(rbp, (1 + count - loc->index()) * kPointerSize);
     }
     case Slot::LOCAL: {
       const int kOffset = JavaScriptFrameConstants::kLocal0Offset;
-      __ movq(reg, Operand(rbp, kOffset - index_ * kPointerSize));
-      break;
+      return Operand(rbp, kOffset - loc->index() * kPointerSize);
     }
     default:
       UNREACHABLE();
+      return Operand(rax, 0);
   }
 }
 
+
+void Constant::MoveToSlot(MacroAssembler* masm, SlotLocation* loc) {
+  __ Move(ToOperand(loc), handle_);
+}
+
+
+void SlotLocation::Get(MacroAssembler* masm, Register reg) {
+  __ movq(reg, ToOperand(this));
+}
+
+
+void SlotLocation::Set(MacroAssembler* masm, Register reg) {
+  __ movq(ToOperand(this), reg);
+}
+
+
+void SlotLocation::Push(MacroAssembler* masm) {
+  __ push(ToOperand(this));
+}
+
+
+void SlotLocation::Move(MacroAssembler* masm, Value* value) {
+  // We dispatch to the value because in some cases (temp or constant) we
+  // can use special instruction sequences.
+  value->MoveToSlot(masm, this);
+}
+
+
+void SlotLocation::MoveToSlot(MacroAssembler* masm, SlotLocation* loc) {
+  __ movq(kScratchRegister, ToOperand(this));
+  __ movq(ToOperand(loc), kScratchRegister);
+}
+
+
+void TempLocation::Get(MacroAssembler* masm, Register reg) {
+  switch (where_) {
+    case ACCUMULATOR:
+      if (!reg.is(rax)) __ movq(reg, rax);
+      break;
+    case STACK:
+      __ pop(reg);
+      break;
+    case NOT_ALLOCATED:
+      UNREACHABLE();
+  }
+}
+
+
+void TempLocation::Set(MacroAssembler* masm, Register reg) {
+  switch (where_) {
+    case ACCUMULATOR:
+      if (!reg.is(rax)) __ movq(rax, reg);
+      break;
+    case STACK:
+      __ push(reg);
+      break;
+    case NOT_ALLOCATED:
+      UNREACHABLE();
+  }
+}
+
+
+void TempLocation::Push(MacroAssembler* masm) {
+  switch (where_) {
+    case ACCUMULATOR:
+      __ push(rax);
+      break;
+    case STACK:
+    case NOT_ALLOCATED:
+      UNREACHABLE();
+  }
+}
+
+
+void TempLocation::Move(MacroAssembler* masm, Value* value) {
+  switch (where_) {
+    case ACCUMULATOR:
+      value->Get(masm, rax);
+      break;
+    case STACK:
+      value->Push(masm);
+      break;
+    case NOT_ALLOCATED:
+      UNREACHABLE();
+  }
+}
+
+
+void TempLocation::MoveToSlot(MacroAssembler* masm, SlotLocation* loc) {
+  switch (where_) {
+    case ACCUMULATOR:
+      __ movq(ToOperand(loc), rax);
+      break;
+    case STACK:
+      __ pop(ToOperand(loc));
+      break;
+    case NOT_ALLOCATED:
+      UNREACHABLE();
+  }
+}
+
+
 #undef __
 
 } }  // namespace v8::internal
diff --git a/src/x64/codegen-x64.cc b/src/x64/codegen-x64.cc
index d2ac163..58c5b47 100644
--- a/src/x64/codegen-x64.cc
+++ b/src/x64/codegen-x64.cc
@@ -97,6 +97,158 @@
 }
 
 
+// -------------------------------------------------------------------------
+// Deferred code objects
+//
+// These subclasses of DeferredCode add pieces of code to the end of generated
+// code.  They are branched to from the generated code, and
+// keep some slower code out of the main body of the generated code.
+// Many of them call a code stub or a runtime function.
+
+class DeferredInlineSmiAdd: public DeferredCode {
+ public:
+  DeferredInlineSmiAdd(Register dst,
+                       Smi* value,
+                       OverwriteMode overwrite_mode)
+      : dst_(dst), value_(value), overwrite_mode_(overwrite_mode) {
+    set_comment("[ DeferredInlineSmiAdd");
+  }
+
+  virtual void Generate();
+
+ private:
+  Register dst_;
+  Smi* value_;
+  OverwriteMode overwrite_mode_;
+};
+
+
+// The result of value + src is in dst.  It either overflowed or was not
+// smi tagged.  Undo the speculative addition and call the appropriate
+// specialized stub for add.  The result is left in dst.
+class DeferredInlineSmiAddReversed: public DeferredCode {
+ public:
+  DeferredInlineSmiAddReversed(Register dst,
+                               Smi* value,
+                               OverwriteMode overwrite_mode)
+      : dst_(dst), value_(value), overwrite_mode_(overwrite_mode) {
+    set_comment("[ DeferredInlineSmiAddReversed");
+  }
+
+  virtual void Generate();
+
+ private:
+  Register dst_;
+  Smi* value_;
+  OverwriteMode overwrite_mode_;
+};
+
+
+class DeferredInlineSmiSub: public DeferredCode {
+ public:
+  DeferredInlineSmiSub(Register dst,
+                       Smi* value,
+                       OverwriteMode overwrite_mode)
+      : dst_(dst), value_(value), overwrite_mode_(overwrite_mode) {
+    set_comment("[ DeferredInlineSmiSub");
+  }
+
+  virtual void Generate();
+
+ private:
+  Register dst_;
+  Smi* value_;
+  OverwriteMode overwrite_mode_;
+};
+
+
+// Call the appropriate binary operation stub to compute src op value
+// and leave the result in dst.
+class DeferredInlineSmiOperation: public DeferredCode {
+ public:
+  DeferredInlineSmiOperation(Token::Value op,
+                             Register dst,
+                             Register src,
+                             Smi* value,
+                             OverwriteMode overwrite_mode)
+      : op_(op),
+        dst_(dst),
+        src_(src),
+        value_(value),
+        overwrite_mode_(overwrite_mode) {
+    set_comment("[ DeferredInlineSmiOperation");
+  }
+
+  virtual void Generate();
+
+ private:
+  Token::Value op_;
+  Register dst_;
+  Register src_;
+  Smi* value_;
+  OverwriteMode overwrite_mode_;
+};
+
+
+class FloatingPointHelper : public AllStatic {
+ public:
+  // Code pattern for loading a floating point value. Input value must
+  // be either a smi or a heap number object (fp value). Requirements:
+  // operand on TOS+1. Returns operand as floating point number on FPU
+  // stack.
+  static void LoadFloatOperand(MacroAssembler* masm, Register scratch);
+
+  // Code pattern for loading a floating point value. Input value must
+  // be either a smi or a heap number object (fp value). Requirements:
+  // operand in src register. Returns operand as floating point number
+  // in XMM register
+  static void LoadFloatOperand(MacroAssembler* masm,
+                               Register src,
+                               XMMRegister dst);
+
+  // Code pattern for loading floating point values. Input values must
+  // be either smi or heap number objects (fp values). Requirements:
+  // operand_1 on TOS+1 , operand_2 on TOS+2; Returns operands as
+  // floating point numbers in XMM registers.
+  static void LoadFloatOperands(MacroAssembler* masm,
+                                XMMRegister dst1,
+                                XMMRegister dst2);
+
+  // Code pattern for loading floating point values onto the fp stack.
+  // Input values must be either smi or heap number objects (fp values).
+  // Requirements:
+  // Register version: operands in registers lhs and rhs.
+  // Stack version: operands on TOS+1 and TOS+2.
+  // Returns operands as floating point numbers on fp stack.
+  static void LoadFloatOperands(MacroAssembler* masm);
+  static void LoadFloatOperands(MacroAssembler* masm,
+                                Register lhs,
+                                Register rhs);
+
+  // Code pattern for loading a floating point value and converting it
+  // to a 32 bit integer. Input value must be either a smi or a heap number
+  // object.
+  // Returns operands as 32-bit sign extended integers in a general purpose
+  // registers.
+  static void LoadInt32Operand(MacroAssembler* masm,
+                               const Operand& src,
+                               Register dst);
+
+  // Test if operands are smi or number objects (fp). Requirements:
+  // operand_1 in rax, operand_2 in rdx; falls through on float or smi
+  // operands, jumps to the non_float label otherwise.
+  static void CheckFloatOperands(MacroAssembler* masm,
+                                 Label* non_float);
+
+  // Allocate a heap number in new space with undefined value.
+  // Returns tagged pointer in result, or jumps to need_gc if new space is full.
+  static void AllocateHeapNumber(MacroAssembler* masm,
+                                 Label* need_gc,
+                                 Register scratch,
+                                 Register result);
+};
+
+
 // -----------------------------------------------------------------------------
 // CodeGenerator implementation.
 
@@ -3351,10 +3503,161 @@
 }
 
 
-void CodeGenerator::GenerateFastCharCodeAt(ZoneList<Expression*>* a) {
-  // TODO(X64): Implement this function.
-  // Ignore arguments and return undefined, to signal failure.
-  frame_->Push(Factory::undefined_value());
+void CodeGenerator::GenerateFastCharCodeAt(ZoneList<Expression*>* args) {
+  Comment(masm_, "[ GenerateFastCharCodeAt");
+  ASSERT(args->length() == 2);
+
+  Label slow_case;
+  Label end;
+  Label not_a_flat_string;
+  Label a_cons_string;
+  Label try_again_with_new_string;
+  Label ascii_string;
+  Label got_char_code;
+
+  Load(args->at(0));
+  Load(args->at(1));
+  Result index = frame_->Pop();
+  Result object = frame_->Pop();
+
+  // Get register rcx to use as shift amount later.
+  Result shift_amount;
+  if (object.is_register() && object.reg().is(rcx)) {
+    Result fresh = allocator_->Allocate();
+    shift_amount = object;
+    object = fresh;
+    __ movq(object.reg(), rcx);
+  }
+  if (index.is_register() && index.reg().is(rcx)) {
+    Result fresh = allocator_->Allocate();
+    shift_amount = index;
+    index = fresh;
+    __ movq(index.reg(), rcx);
+  }
+  // There could be references to ecx in the frame. Allocating will
+  // spill them, otherwise spill explicitly.
+  if (shift_amount.is_valid()) {
+    frame_->Spill(rcx);
+  } else {
+    shift_amount = allocator()->Allocate(rcx);
+  }
+  ASSERT(shift_amount.is_register());
+  ASSERT(shift_amount.reg().is(rcx));
+  ASSERT(allocator_->count(rcx) == 1);
+
+  // We will mutate the index register and possibly the object register.
+  // The case where they are somehow the same register is handled
+  // because we only mutate them in the case where the receiver is a
+  // heap object and the index is not.
+  object.ToRegister();
+  index.ToRegister();
+  frame_->Spill(object.reg());
+  frame_->Spill(index.reg());
+
+  // We need a single extra temporary register.
+  Result temp = allocator()->Allocate();
+  ASSERT(temp.is_valid());
+
+  // There is no virtual frame effect from here up to the final result
+  // push.
+
+  // If the receiver is a smi trigger the slow case.
+  ASSERT(kSmiTag == 0);
+  __ testl(object.reg(), Immediate(kSmiTagMask));
+  __ j(zero, &slow_case);
+
+  // If the index is negative or non-smi trigger the slow case.
+  ASSERT(kSmiTag == 0);
+  __ testl(index.reg(),
+           Immediate(static_cast<int32_t>(kSmiTagMask | 0x80000000U)));
+  __ j(not_zero, &slow_case);
+  // Untag the index.
+  __ sarl(index.reg(), Immediate(kSmiTagSize));
+
+  __ bind(&try_again_with_new_string);
+  // Fetch the instance type of the receiver into rcx.
+  __ movq(rcx, FieldOperand(object.reg(), HeapObject::kMapOffset));
+  __ movzxbl(rcx, FieldOperand(rcx, Map::kInstanceTypeOffset));
+  // If the receiver is not a string trigger the slow case.
+  __ testb(rcx, Immediate(kIsNotStringMask));
+  __ j(not_zero, &slow_case);
+
+  // Here we make assumptions about the tag values and the shifts needed.
+  // See the comment in objects.h.
+  ASSERT(kLongStringTag == 0);
+  ASSERT(kMediumStringTag + String::kLongLengthShift ==
+         String::kMediumLengthShift);
+  ASSERT(kShortStringTag + String::kLongLengthShift ==
+         String::kShortLengthShift);
+  __ and_(rcx, Immediate(kStringSizeMask));
+  __ addq(rcx, Immediate(String::kLongLengthShift));
+  // Fetch the length field into the temporary register.
+  __ movl(temp.reg(), FieldOperand(object.reg(), String::kLengthOffset));
+  __ shrl(temp.reg());  // The shift amount in ecx is implicit operand.
+  // Check for index out of range.
+  __ cmpl(index.reg(), temp.reg());
+  __ j(greater_equal, &slow_case);
+  // Reload the instance type (into the temp register this time)..
+  __ movq(temp.reg(), FieldOperand(object.reg(), HeapObject::kMapOffset));
+  __ movzxbl(temp.reg(), FieldOperand(temp.reg(), Map::kInstanceTypeOffset));
+
+  // We need special handling for non-flat strings.
+  ASSERT(kSeqStringTag == 0);
+  __ testb(temp.reg(), Immediate(kStringRepresentationMask));
+  __ j(not_zero, &not_a_flat_string);
+  // Check for 1-byte or 2-byte string.
+  __ testb(temp.reg(), Immediate(kStringEncodingMask));
+  __ j(not_zero, &ascii_string);
+
+  // 2-byte string.
+  // Load the 2-byte character code into the temp register.
+  __ movzxwl(temp.reg(), FieldOperand(object.reg(),
+                                      index.reg(),
+                                      times_2,
+                                      SeqTwoByteString::kHeaderSize));
+  __ jmp(&got_char_code);
+
+  // ASCII string.
+  __ bind(&ascii_string);
+  // Load the byte into the temp register.
+  __ movzxbl(temp.reg(), FieldOperand(object.reg(),
+                                      index.reg(),
+                                      times_1,
+                                      SeqAsciiString::kHeaderSize));
+  __ bind(&got_char_code);
+  ASSERT(kSmiTag == 0);
+  __ shl(temp.reg(), Immediate(kSmiTagSize));
+  __ jmp(&end);
+
+  // Handle non-flat strings.
+  __ bind(&not_a_flat_string);
+  __ and_(temp.reg(), Immediate(kStringRepresentationMask));
+  __ cmpb(temp.reg(), Immediate(kConsStringTag));
+  __ j(equal, &a_cons_string);
+  __ cmpb(temp.reg(), Immediate(kSlicedStringTag));
+  __ j(not_equal, &slow_case);
+
+  // SlicedString.
+  // Add the offset to the index and trigger the slow case on overflow.
+  __ addl(index.reg(), FieldOperand(object.reg(), SlicedString::kStartOffset));
+  __ j(overflow, &slow_case);
+  // Getting the underlying string is done by running the cons string code.
+
+  // ConsString.
+  __ bind(&a_cons_string);
+  // Get the first of the two strings.  Both sliced and cons strings
+  // store their source string at the same offset.
+  ASSERT(SlicedString::kBufferOffset == ConsString::kFirstOffset);
+  __ movq(object.reg(), FieldOperand(object.reg(), ConsString::kFirstOffset));
+  __ jmp(&try_again_with_new_string);
+
+  __ bind(&slow_case);
+  // Move the undefined value into the result register, which will
+  // trigger the slow case.
+  __ Move(temp.reg(), Factory::undefined_value());
+
+  __ bind(&end);
+  frame_->Push(&temp);
 }
 
 
@@ -3459,11 +3762,58 @@
 
 
 void CodeGenerator::GenerateFastMathOp(MathOp op, ZoneList<Expression*>* args) {
-  // TODO(X64): Use inline floating point in the fast case.
+  JumpTarget done;
+  JumpTarget call_runtime;
   ASSERT(args->length() == 1);
 
-  // Load number.
+  // Load number and duplicate it.
   Load(args->at(0));
+  frame_->Dup();
+
+  // Get the number into an unaliased register and load it onto the
+  // floating point stack still leaving one copy on the frame.
+  Result number = frame_->Pop();
+  number.ToRegister();
+  frame_->Spill(number.reg());
+  FloatingPointHelper::LoadFloatOperand(masm_, number.reg());
+  number.Unuse();
+
+  // Perform the operation on the number.
+  switch (op) {
+    case SIN:
+      __ fsin();
+      break;
+    case COS:
+      __ fcos();
+      break;
+  }
+
+  // Go slow case if argument to operation is out of range.
+  Result eax_reg = allocator()->Allocate(rax);
+  ASSERT(eax_reg.is_valid());
+  __ fnstsw_ax();
+  __ testl(rax, Immediate(0x0400));  // Bit 10 is condition flag C2.
+  eax_reg.Unuse();
+  call_runtime.Branch(not_zero);
+
+  // Allocate heap number for result if possible.
+  Result scratch = allocator()->Allocate();
+  Result heap_number = allocator()->Allocate();
+  FloatingPointHelper::AllocateHeapNumber(masm_,
+                                          call_runtime.entry_label(),
+                                          scratch.reg(),
+                                          heap_number.reg());
+  scratch.Unuse();
+
+  // Store the result in the allocated heap number.
+  __ fstp_d(FieldOperand(heap_number.reg(), HeapNumber::kValueOffset));
+  // Replace the extra copy of the argument with the result.
+  frame_->SetElementAt(0, &heap_number);
+  done.Jump();
+
+  call_runtime.Bind();
+  // Free ST(0) which was not popped before calling into the runtime.
+  __ ffree(0);
   Result answer;
   switch (op) {
     case SIN:
@@ -3474,6 +3824,7 @@
       break;
   }
   frame_->Push(&answer);
+  done.Bind();
 }
 
 
@@ -4080,8 +4431,6 @@
 
 
 void CodeGenerator::StoreToSlot(Slot* slot, InitState init_state) {
-  // TODO(X64): Enable more types of slot.
-
   if (slot->type() == Slot::LOOKUP) {
     ASSERT(slot->var()->is_dynamic());
 
@@ -4534,108 +4883,6 @@
 }
 
 
-// Flag that indicates whether or not the code that handles smi arguments
-// should be placed in the stub, inlined, or omitted entirely.
-enum GenericBinaryFlags {
-  SMI_CODE_IN_STUB,
-  SMI_CODE_INLINED
-};
-
-
-class FloatingPointHelper : public AllStatic {
- public:
-  // Code pattern for loading a floating point value. Input value must
-  // be either a smi or a heap number object (fp value). Requirements:
-  // operand in src register. Returns operand as floating point number
-  // in XMM register
-  static void LoadFloatOperand(MacroAssembler* masm,
-                               Register src,
-                               XMMRegister dst);
-  // Code pattern for loading floating point values. Input values must
-  // be either smi or heap number objects (fp values). Requirements:
-  // operand_1 on TOS+1 , operand_2 on TOS+2; Returns operands as
-  // floating point numbers in XMM registers.
-  static void LoadFloatOperands(MacroAssembler* masm,
-                                XMMRegister dst1,
-                                XMMRegister dst2);
-
-  // Code pattern for loading floating point values onto the fp stack.
-  // Input values must be either smi or heap number objects (fp values).
-  // Requirements:
-  // Register version: operands in registers lhs and rhs.
-  // Stack version: operands on TOS+1 and TOS+2.
-  // Returns operands as floating point numbers on fp stack.
-  static void LoadFloatOperands(MacroAssembler* masm);
-  static void LoadFloatOperands(MacroAssembler* masm,
-                                Register lhs,
-                                Register rhs);
-
-  // Code pattern for loading a floating point value and converting it
-  // to a 32 bit integer. Input value must be either a smi or a heap number
-  // object.
-  // Returns operands as 32-bit sign extended integers in a general purpose
-  // registers.
-  static void LoadInt32Operand(MacroAssembler* masm,
-                               const Operand& src,
-                               Register dst);
-
-  // Test if operands are smi or number objects (fp). Requirements:
-  // operand_1 in rax, operand_2 in rdx; falls through on float
-  // operands, jumps to the non_float label otherwise.
-  static void CheckFloatOperands(MacroAssembler* masm,
-                                 Label* non_float);
-  // Allocate a heap number in new space with undefined value.
-  // Returns tagged pointer in result, or jumps to need_gc if new space is full.
-  static void AllocateHeapNumber(MacroAssembler* masm,
-                                 Label* need_gc,
-                                 Register scratch,
-                                 Register result);
-};
-
-
-class GenericBinaryOpStub: public CodeStub {
- public:
-  GenericBinaryOpStub(Token::Value op,
-                      OverwriteMode mode,
-                      GenericBinaryFlags flags)
-      : op_(op), mode_(mode), flags_(flags) {
-    ASSERT(OpBits::is_valid(Token::NUM_TOKENS));
-  }
-
-  void GenerateSmiCode(MacroAssembler* masm, Label* slow);
-
- private:
-  Token::Value op_;
-  OverwriteMode mode_;
-  GenericBinaryFlags flags_;
-
-  const char* GetName();
-
-#ifdef DEBUG
-  void Print() {
-    PrintF("GenericBinaryOpStub (op %s), (mode %d, flags %d)\n",
-           Token::String(op_),
-           static_cast<int>(mode_),
-           static_cast<int>(flags_));
-  }
-#endif
-
-  // Minor key encoding in 16 bits FOOOOOOOOOOOOOMM.
-  class ModeBits: public BitField<OverwriteMode, 0, 2> {};
-  class OpBits: public BitField<Token::Value, 2, 13> {};
-  class FlagBits: public BitField<GenericBinaryFlags, 15, 1> {};
-
-  Major MajorKey() { return GenericBinaryOp; }
-  int MinorKey() {
-    // Encode the parameters in a unique 16 bit value.
-    return OpBits::encode(op_)
-           | ModeBits::encode(mode_)
-           | FlagBits::encode(flags_);
-  }
-  void Generate(MacroAssembler* masm);
-};
-
-
 class DeferredInlineBinaryOperation: public DeferredCode {
  public:
   DeferredInlineBinaryOperation(Token::Value op,
@@ -4822,29 +5069,6 @@
 }
 
 
-
-
-// The result of src + value is in dst.  It either overflowed or was not
-// smi tagged.  Undo the speculative addition and call the appropriate
-// specialized stub for add.  The result is left in dst.
-class DeferredInlineSmiAdd: public DeferredCode {
- public:
-  DeferredInlineSmiAdd(Register dst,
-                       Smi* value,
-                       OverwriteMode overwrite_mode)
-      : dst_(dst), value_(value), overwrite_mode_(overwrite_mode) {
-    set_comment("[ DeferredInlineSmiAdd");
-  }
-
-  virtual void Generate();
-
- private:
-  Register dst_;
-  Smi* value_;
-  OverwriteMode overwrite_mode_;
-};
-
-
 void DeferredInlineSmiAdd::Generate() {
   __ push(dst_);
   __ push(Immediate(value_));
@@ -4854,29 +5078,8 @@
 }
 
 
-// The result of value + src is in dst.  It either overflowed or was not
-// smi tagged.  Undo the speculative addition and call the appropriate
-// specialized stub for add.  The result is left in dst.
-class DeferredInlineSmiAddReversed: public DeferredCode {
- public:
-  DeferredInlineSmiAddReversed(Register dst,
-                               Smi* value,
-                               OverwriteMode overwrite_mode)
-      : dst_(dst), value_(value), overwrite_mode_(overwrite_mode) {
-    set_comment("[ DeferredInlineSmiAddReversed");
-  }
-
-  virtual void Generate();
-
- private:
-  Register dst_;
-  Smi* value_;
-  OverwriteMode overwrite_mode_;
-};
-
-
 void DeferredInlineSmiAddReversed::Generate() {
-  __ push(Immediate(value_));
+  __ push(Immediate(value_));  // Note: sign extended.
   __ push(dst_);
   GenericBinaryOpStub igostub(Token::ADD, overwrite_mode_, SMI_CODE_INLINED);
   __ CallStub(&igostub);
@@ -4884,37 +5087,28 @@
 }
 
 
-// The result of src - value is in dst.  It either overflowed or was not
-// smi tagged.  Undo the speculative subtraction and call the
-// appropriate specialized stub for subtract.  The result is left in
-// dst.
-class DeferredInlineSmiSub: public DeferredCode {
- public:
-  DeferredInlineSmiSub(Register dst,
-                       Smi* value,
-                       OverwriteMode overwrite_mode)
-      : dst_(dst), value_(value), overwrite_mode_(overwrite_mode) {
-    set_comment("[ DeferredInlineSmiSub");
-  }
-
-  virtual void Generate();
-
- private:
-  Register dst_;
-  Smi* value_;
-  OverwriteMode overwrite_mode_;
-};
-
-
 void DeferredInlineSmiSub::Generate() {
   __ push(dst_);
-  __ push(Immediate(value_));
+  __ push(Immediate(value_));  // Note: sign extended.
   GenericBinaryOpStub igostub(Token::SUB, overwrite_mode_, SMI_CODE_INLINED);
   __ CallStub(&igostub);
   if (!dst_.is(rax)) __ movq(dst_, rax);
 }
 
 
+void DeferredInlineSmiOperation::Generate() {
+  __ push(src_);
+  __ push(Immediate(value_));  // Note: sign extended.
+  // For mod we don't generate all the Smi code inline.
+  GenericBinaryOpStub stub(
+      op_,
+      overwrite_mode_,
+      (op_ == Token::MOD) ? SMI_CODE_IN_STUB : SMI_CODE_INLINED);
+  __ CallStub(&stub);
+  if (!dst_.is(rax)) __ movq(dst_, rax);
+}
+
+
 void CodeGenerator::ConstantSmiBinaryOperation(Token::Value op,
                                                Result* operand,
                                                Handle<Object> value,
@@ -4943,6 +5137,7 @@
 
   // Get the literal value.
   Smi* smi_value = Smi::cast(*value);
+  int int_value = smi_value->value();
 
   switch (op) {
     case Token::ADD: {
@@ -4971,7 +5166,162 @@
       frame_->Push(operand);
       break;
     }
-    // TODO(X64): Move other implementations from ia32 to here.
+
+    case Token::SUB: {
+      if (reversed) {
+        Result constant_operand(value);
+        LikelySmiBinaryOperation(op, &constant_operand, operand,
+                                 overwrite_mode);
+      } else {
+        operand->ToRegister();
+        frame_->Spill(operand->reg());
+        DeferredCode* deferred = new DeferredInlineSmiSub(operand->reg(),
+                                                          smi_value,
+                                                          overwrite_mode);
+        __ testl(operand->reg(), Immediate(kSmiTagMask));
+        deferred->Branch(not_zero);
+        // A smi currently fits in a 32-bit Immediate.
+        __ subl(operand->reg(), Immediate(smi_value));
+        Label add_success;
+        __ j(no_overflow, &add_success);
+        __ addl(operand->reg(), Immediate(smi_value));
+        deferred->Jump();
+        __ bind(&add_success);
+        deferred->BindExit();
+        frame_->Push(operand);
+      }
+      break;
+    }
+
+    case Token::SAR:
+      if (reversed) {
+        Result constant_operand(value);
+        LikelySmiBinaryOperation(op, &constant_operand, operand,
+                                 overwrite_mode);
+      } else {
+        // Only the least significant 5 bits of the shift value are used.
+        // In the slow case, this masking is done inside the runtime call.
+        int shift_value = int_value & 0x1f;
+        operand->ToRegister();
+        frame_->Spill(operand->reg());
+        DeferredInlineSmiOperation* deferred =
+            new DeferredInlineSmiOperation(op,
+                                           operand->reg(),
+                                           operand->reg(),
+                                           smi_value,
+                                           overwrite_mode);
+        __ testl(operand->reg(), Immediate(kSmiTagMask));
+        deferred->Branch(not_zero);
+        if (shift_value > 0) {
+          __ sarl(operand->reg(), Immediate(shift_value));
+          __ and_(operand->reg(), Immediate(~kSmiTagMask));
+        }
+        deferred->BindExit();
+        frame_->Push(operand);
+      }
+      break;
+
+    case Token::SHR:
+      if (reversed) {
+        Result constant_operand(value);
+        LikelySmiBinaryOperation(op, &constant_operand, operand,
+                                 overwrite_mode);
+      } else {
+        // Only the least significant 5 bits of the shift value are used.
+        // In the slow case, this masking is done inside the runtime call.
+        int shift_value = int_value & 0x1f;
+        operand->ToRegister();
+        Result answer = allocator()->Allocate();
+        ASSERT(answer.is_valid());
+        DeferredInlineSmiOperation* deferred =
+            new DeferredInlineSmiOperation(op,
+                                           answer.reg(),
+                                           operand->reg(),
+                                           smi_value,
+                                           overwrite_mode);
+        __ testl(operand->reg(), Immediate(kSmiTagMask));
+        deferred->Branch(not_zero);
+        __ movl(answer.reg(), operand->reg());
+        __ sarl(answer.reg(), Immediate(kSmiTagSize));
+        __ shrl(answer.reg(), Immediate(shift_value));
+        // A negative Smi shifted right two is in the positive Smi range.
+        if (shift_value < 2) {
+          __ testl(answer.reg(), Immediate(0xc0000000));
+          deferred->Branch(not_zero);
+        }
+        operand->Unuse();
+        ASSERT(kSmiTag == 0);
+        ASSERT(kSmiTagSize == 1);
+        __ addl(answer.reg(), answer.reg());
+        deferred->BindExit();
+        frame_->Push(&answer);
+      }
+      break;
+
+    case Token::BIT_OR:
+    case Token::BIT_XOR:
+    case Token::BIT_AND: {
+      operand->ToRegister();
+      frame_->Spill(operand->reg());
+      if (reversed) {
+        // Bit operations with a constant smi are commutative.
+        // We can swap left and right operands with no problem.
+        // Swap left and right overwrite modes.  0->0, 1->2, 2->1.
+        overwrite_mode = static_cast<OverwriteMode>((2 * overwrite_mode) % 3);
+      }
+      DeferredCode* deferred =  new DeferredInlineSmiOperation(op,
+                                                               operand->reg(),
+                                                               operand->reg(),
+                                                               smi_value,
+                                                               overwrite_mode);
+      __ testl(operand->reg(), Immediate(kSmiTagMask));
+      deferred->Branch(not_zero);
+      if (op == Token::BIT_AND) {
+        __ and_(operand->reg(), Immediate(smi_value));
+      } else if (op == Token::BIT_XOR) {
+        if (int_value != 0) {
+          __ xor_(operand->reg(), Immediate(smi_value));
+        }
+      } else {
+        ASSERT(op == Token::BIT_OR);
+        if (int_value != 0) {
+          __ or_(operand->reg(), Immediate(smi_value));
+        }
+      }
+      deferred->BindExit();
+      frame_->Push(operand);
+      break;
+    }
+
+    // Generate inline code for mod of powers of 2 and negative powers of 2.
+    case Token::MOD:
+      if (!reversed &&
+          int_value != 0 &&
+          (IsPowerOf2(int_value) || IsPowerOf2(-int_value))) {
+        operand->ToRegister();
+        frame_->Spill(operand->reg());
+        DeferredCode* deferred = new DeferredInlineSmiOperation(op,
+                                                                operand->reg(),
+                                                                operand->reg(),
+                                                                smi_value,
+                                                                overwrite_mode);
+        // Check for negative or non-Smi left hand side.
+        __ testl(operand->reg(),
+                 Immediate(static_cast<int32_t>(kSmiTagMask | 0x80000000)));
+        deferred->Branch(not_zero);
+        if (int_value < 0) int_value = -int_value;
+        if (int_value == 1) {
+          __ movl(operand->reg(), Immediate(Smi::FromInt(0)));
+        } else {
+          __ and_(operand->reg(), Immediate((int_value << kSmiTagSize) - 1));
+        }
+        deferred->BindExit();
+        frame_->Push(operand);
+        break;  // This break only applies if we generated code for MOD.
+      }
+      // Fall through if we did not find a power of 2 on the right hand side!
+      // The next case must be the default.
+
     default: {
       Result constant_operand(value);
       if (reversed) {
@@ -5380,9 +5730,20 @@
         Comment cmnt(masm, "[ Inlined named property load");
         Result receiver = cgen_->frame()->Pop();
         receiver.ToRegister();
-
         Result value = cgen_->allocator()->Allocate();
         ASSERT(value.is_valid());
+        // Cannot use r12 for receiver, because that changes
+        // the distance between a call and a fixup location,
+        // due to a special encoding of r12 as r/m in a ModR/M byte.
+        if (receiver.reg().is(r12)) {
+          // Swap receiver and value.
+          __ movq(value.reg(), receiver.reg());
+          Result temp = receiver;
+          receiver = value;
+          value = temp;
+          cgen_->frame()->Spill(value.reg());  // r12 may have been shared.
+        }
+
         DeferredReferenceGetNamedValue* deferred =
             new DeferredReferenceGetNamedValue(value.reg(),
                                                receiver.reg(),
@@ -5746,7 +6107,7 @@
   __ and_(rcx, Immediate(kStringSizeMask));
   __ cmpq(rcx, Immediate(kShortStringTag));
   __ j(not_equal, &true_result);  // Empty string is always short.
-  __ movq(rdx, FieldOperand(rax, String::kLengthOffset));
+  __ movl(rdx, FieldOperand(rax, String::kLengthOffset));
   __ shr(rdx, Immediate(String::kShortLengthShift));
   __ j(zero, &false_result);
   __ jmp(&true_result);
@@ -6740,6 +7101,24 @@
 }
 
 
+void FloatingPointHelper::LoadFloatOperand(MacroAssembler* masm,
+                                           Register number) {
+  Label load_smi, done;
+
+  __ testl(number, Immediate(kSmiTagMask));
+  __ j(zero, &load_smi);
+  __ fld_d(FieldOperand(number, HeapNumber::kValueOffset));
+  __ jmp(&done);
+
+  __ bind(&load_smi);
+  __ sarl(number, Immediate(kSmiTagSize));
+  __ push(number);
+  __ fild_s(Operand(rsp, 0));
+  __ pop(number);
+
+  __ bind(&done);
+}
+
 
 void FloatingPointHelper::LoadFloatOperand(MacroAssembler* masm,
                                            Register src,
diff --git a/src/x64/codegen-x64.h b/src/x64/codegen-x64.h
index 5014f71..b1c61d8 100644
--- a/src/x64/codegen-x64.h
+++ b/src/x64/codegen-x64.h
@@ -601,6 +601,66 @@
 };
 
 
+// -------------------------------------------------------------------------
+// Code stubs
+//
+// These independent code objects are created once, and used multiple
+// times by generated code to perform common tasks, often the slow
+// case of a JavaScript operation.  They are all subclasses of CodeStub,
+// which is declared in code-stubs.h.
+
+
+// Flag that indicates whether or not the code that handles smi arguments
+// should be placed in the stub, inlined, or omitted entirely.
+enum GenericBinaryFlags {
+  SMI_CODE_IN_STUB,
+  SMI_CODE_INLINED
+};
+
+
+class GenericBinaryOpStub: public CodeStub {
+ public:
+  GenericBinaryOpStub(Token::Value op,
+                      OverwriteMode mode,
+                      GenericBinaryFlags flags)
+      : op_(op), mode_(mode), flags_(flags) {
+    ASSERT(OpBits::is_valid(Token::NUM_TOKENS));
+  }
+
+  void GenerateSmiCode(MacroAssembler* masm, Label* slow);
+
+ private:
+  Token::Value op_;
+  OverwriteMode mode_;
+  GenericBinaryFlags flags_;
+
+  const char* GetName();
+
+#ifdef DEBUG
+  void Print() {
+    PrintF("GenericBinaryOpStub (op %s), (mode %d, flags %d)\n",
+           Token::String(op_),
+           static_cast<int>(mode_),
+           static_cast<int>(flags_));
+  }
+#endif
+
+  // Minor key encoding in 16 bits FOOOOOOOOOOOOOMM.
+  class ModeBits: public BitField<OverwriteMode, 0, 2> {};
+  class OpBits: public BitField<Token::Value, 2, 13> {};
+  class FlagBits: public BitField<GenericBinaryFlags, 15, 1> {};
+
+  Major MajorKey() { return GenericBinaryOp; }
+  int MinorKey() {
+    // Encode the parameters in a unique 16 bit value.
+    return OpBits::encode(op_)
+           | ModeBits::encode(mode_)
+           | FlagBits::encode(flags_);
+  }
+  void Generate(MacroAssembler* masm);
+};
+
+
 } }  // namespace v8::internal
 
 #endif  // V8_X64_CODEGEN_X64_H_
diff --git a/src/x64/ic-x64.cc b/src/x64/ic-x64.cc
index 8659533..86008eb 100644
--- a/src/x64/ic-x64.cc
+++ b/src/x64/ic-x64.cc
@@ -562,13 +562,175 @@
   __ InvokeFunction(rdi, actual, JUMP_FUNCTION);
 }
 
+
+// Defined in ic.cc.
+Object* CallIC_Miss(Arguments args);
+
 void CallIC::GenerateMegamorphic(MacroAssembler* masm, int argc) {
+  // ----------- S t a t e -------------
+  // rsp[0] return address
+  // rsp[8] argument argc
+  // rsp[16] argument argc - 1
+  // ...
+  // rsp[argc * 8] argument 1
+  // rsp[(argc + 1) * 8] argument 0 = reciever
+  // rsp[(argc + 2) * 8] function name
+  // -----------------------------------
+  Label number, non_number, non_string, boolean, probe, miss;
+
+  // Get the receiver of the function from the stack; 1 ~ return address.
+  __ movq(rdx, Operand(rsp, (argc + 1) * kPointerSize));
+  // Get the name of the function from the stack; 2 ~ return address, receiver
+  __ movq(rcx, Operand(rsp, (argc + 2) * kPointerSize));
+
+  // Probe the stub cache.
+  Code::Flags flags =
+      Code::ComputeFlags(Code::CALL_IC, NOT_IN_LOOP, MONOMORPHIC, NORMAL, argc);
+  StubCache::GenerateProbe(masm, flags, rdx, rcx, rbx, rax);
+
+  // If the stub cache probing failed, the receiver might be a value.
+  // For value objects, we use the map of the prototype objects for
+  // the corresponding JSValue for the cache and that is what we need
+  // to probe.
+  //
+  // Check for number.
+  __ testl(rdx, Immediate(kSmiTagMask));
+  __ j(zero, &number);
+  __ CmpObjectType(rdx, HEAP_NUMBER_TYPE, rbx);
+  __ j(not_equal, &non_number);
+  __ bind(&number);
+  StubCompiler::GenerateLoadGlobalFunctionPrototype(
+      masm, Context::NUMBER_FUNCTION_INDEX, rdx);
+  __ jmp(&probe);
+
+  // Check for string.
+  __ bind(&non_number);
+  __ CmpInstanceType(rbx, FIRST_NONSTRING_TYPE);
+  __ j(above_equal, &non_string);
+  StubCompiler::GenerateLoadGlobalFunctionPrototype(
+      masm, Context::STRING_FUNCTION_INDEX, rdx);
+  __ jmp(&probe);
+
+  // Check for boolean.
+  __ bind(&non_string);
+  __ Cmp(rdx, Factory::true_value());
+  __ j(equal, &boolean);
+  __ Cmp(rdx, Factory::false_value());
+  __ j(not_equal, &miss);
+  __ bind(&boolean);
+  StubCompiler::GenerateLoadGlobalFunctionPrototype(
+      masm, Context::BOOLEAN_FUNCTION_INDEX, rdx);
+
+  // Probe the stub cache for the value object.
+  __ bind(&probe);
+  StubCache::GenerateProbe(masm, flags, rdx, rcx, rbx, no_reg);
+
   // Cache miss: Jump to runtime.
+  __ bind(&miss);
   Generate(masm, argc, ExternalReference(IC_Utility(kCallIC_Miss)));
 }
 
+
+static void GenerateNormalHelper(MacroAssembler* masm,
+                                 int argc,
+                                 bool is_global_object,
+                                 Label* miss) {
+  // Search dictionary - put result in register edx.
+  GenerateDictionaryLoad(masm, miss, rax, rdx, rbx, rcx);
+
+  // Move the result to register rdi and check that it isn't a smi.
+  __ movq(rdi, rdx);
+  __ testl(rdx, Immediate(kSmiTagMask));
+  __ j(zero, miss);
+
+  // Check that the value is a JavaScript function.
+  __ CmpObjectType(rdx, JS_FUNCTION_TYPE, rdx);
+  __ j(not_equal, miss);
+  // Check that the function has been loaded.
+  __ testb(FieldOperand(rdx, Map::kBitField2Offset),
+           Immediate(1 << Map::kNeedsLoading));
+  __ j(not_zero, miss);
+
+  // Patch the receiver with the global proxy if necessary.
+  if (is_global_object) {
+    __ movq(rdx, Operand(rsp, (argc + 1) * kPointerSize));
+    __ movq(rdx, FieldOperand(rdx, GlobalObject::kGlobalReceiverOffset));
+    __ movq(Operand(rsp, (argc + 1) * kPointerSize), rdx);
+  }
+
+  // Invoke the function.
+  ParameterCount actual(argc);
+  __ InvokeFunction(rdi, actual, JUMP_FUNCTION);
+}
+
+
 void CallIC::GenerateNormal(MacroAssembler* masm, int argc) {
+  // ----------- S t a t e -------------
+  // rsp[0] return address
+  // rsp[8] argument argc
+  // rsp[16] argument argc - 1
+  // ...
+  // rsp[argc * 8] argument 1
+  // rsp[(argc + 1) * 8] argument 0 = reciever
+  // rsp[(argc + 2) * 8] function name
+  // -----------------------------------
+
+  Label miss, global_object, non_global_object;
+
+  // Get the receiver of the function from the stack.
+  __ movq(rdx, Operand(rsp, (argc + 1) * kPointerSize));
+  // Get the name of the function from the stack.
+  __ movq(rcx, Operand(rsp, (argc + 2) * kPointerSize));
+
+  // Check that the receiver isn't a smi.
+  __ testl(rdx, Immediate(kSmiTagMask));
+  __ j(zero, &miss);
+
+  // Check that the receiver is a valid JS object.
+  // Because there are so many map checks and type checks, do not
+  // use CmpObjectType, but load map and type into registers.
+  __ movq(rbx, FieldOperand(rdx, HeapObject::kMapOffset));
+  __ movb(rax, FieldOperand(rbx, Map::kInstanceTypeOffset));
+  __ cmpb(rax, Immediate(FIRST_JS_OBJECT_TYPE));
+  __ j(below, &miss);
+
+  // If this assert fails, we have to check upper bound too.
+  ASSERT(LAST_TYPE == JS_FUNCTION_TYPE);
+
+  // Check for access to global object.
+  __ cmpb(rax, Immediate(JS_GLOBAL_OBJECT_TYPE));
+  __ j(equal, &global_object);
+  __ cmpb(rax, Immediate(JS_BUILTINS_OBJECT_TYPE));
+  __ j(not_equal, &non_global_object);
+
+  // Accessing global object: Load and invoke.
+  __ bind(&global_object);
+  // Check that the global object does not require access checks.
+  __ movb(rbx, FieldOperand(rbx, Map::kBitFieldOffset));
+  __ testb(rbx, Immediate(1 << Map::kIsAccessCheckNeeded));
+  __ j(not_equal, &miss);
+  GenerateNormalHelper(masm, argc, true, &miss);
+
+  // Accessing non-global object: Check for access to global proxy.
+  Label global_proxy, invoke;
+  __ bind(&non_global_object);
+  __ cmpb(rax, Immediate(JS_GLOBAL_PROXY_TYPE));
+  __ j(equal, &global_proxy);
+  // Check that the non-global, non-global-proxy object does not
+  // require access checks.
+  __ movb(rbx, FieldOperand(rbx, Map::kBitFieldOffset));
+  __ testb(rbx, Immediate(1 << Map::kIsAccessCheckNeeded));
+  __ j(not_equal, &miss);
+  __ bind(&invoke);
+  GenerateNormalHelper(masm, argc, false, &miss);
+
+  // Global object proxy access: Check access rights.
+  __ bind(&global_proxy);
+  __ CheckAccessGlobalProxy(rdx, rax, &miss);
+  __ jmp(&invoke);
+
   // Cache miss: Jump to runtime.
+  __ bind(&miss);
   Generate(masm, argc, ExternalReference(IC_Utility(kCallIC_Miss)));
 }
 
diff --git a/src/x64/jump-target-x64.cc b/src/x64/jump-target-x64.cc
index b804044..dd2f6d6 100644
--- a/src/x64/jump-target-x64.cc
+++ b/src/x64/jump-target-x64.cc
@@ -362,6 +362,70 @@
   __ bind(&entry_label_);
 }
 
+
+void BreakTarget::Jump() {
+  // Drop leftover statement state from the frame before merging, without
+  // emitting code.
+  ASSERT(cgen()->has_valid_frame());
+  int count = cgen()->frame()->height() - expected_height_;
+  cgen()->frame()->ForgetElements(count);
+  DoJump();
+}
+
+
+void BreakTarget::Jump(Result* arg) {
+  // Drop leftover statement state from the frame before merging, without
+  // emitting code.
+  ASSERT(cgen()->has_valid_frame());
+  int count = cgen()->frame()->height() - expected_height_;
+  cgen()->frame()->ForgetElements(count);
+  cgen()->frame()->Push(arg);
+  DoJump();
+}
+
+
+void BreakTarget::Bind() {
+#ifdef DEBUG
+  // All the forward-reaching frames should have been adjusted at the
+  // jumps to this target.
+  for (int i = 0; i < reaching_frames_.length(); i++) {
+    ASSERT(reaching_frames_[i] == NULL ||
+           reaching_frames_[i]->height() == expected_height_);
+  }
+#endif
+  // Drop leftover statement state from the frame before merging, even on
+  // the fall through.  This is so we can bind the return target with state
+  // on the frame.
+  if (cgen()->has_valid_frame()) {
+    int count = cgen()->frame()->height() - expected_height_;
+    cgen()->frame()->ForgetElements(count);
+  }
+  DoBind();
+}
+
+
+void BreakTarget::Bind(Result* arg) {
+#ifdef DEBUG
+  // All the forward-reaching frames should have been adjusted at the
+  // jumps to this target.
+  for (int i = 0; i < reaching_frames_.length(); i++) {
+    ASSERT(reaching_frames_[i] == NULL ||
+           reaching_frames_[i]->height() == expected_height_ + 1);
+  }
+#endif
+  // Drop leftover statement state from the frame before merging, even on
+  // the fall through.  This is so we can bind the return target with state
+  // on the frame.
+  if (cgen()->has_valid_frame()) {
+    int count = cgen()->frame()->height() - expected_height_;
+    cgen()->frame()->ForgetElements(count);
+    cgen()->frame()->Push(arg);
+  }
+  DoBind();
+  *arg = cgen()->frame()->Pop();
+}
+
+
 #undef __
 
 
diff --git a/src/x64/macro-assembler-x64.cc b/src/x64/macro-assembler-x64.cc
index 5e39cb6..2219a5a 100644
--- a/src/x64/macro-assembler-x64.cc
+++ b/src/x64/macro-assembler-x64.cc
@@ -53,43 +53,50 @@
                               Register scratch) {
   Label fast;
 
-  // Compute the page address from the heap object pointer, leave it
-  // in 'object'.
+  // Compute the page start address from the heap object pointer, and reuse
+  // the 'object' register for it.
   ASSERT(is_int32(~Page::kPageAlignmentMask));
   masm->and_(object,
              Immediate(static_cast<int32_t>(~Page::kPageAlignmentMask)));
+  Register page_start = object;
 
-  // Compute the bit addr in the remembered set, leave it in "addr".
-  masm->subq(addr, object);
+  // Compute the bit addr in the remembered set/index of the pointer in the
+  // page. Reuse 'addr' as pointer_offset.
+  masm->subq(addr, page_start);
   masm->shr(addr, Immediate(kPointerSizeLog2));
+  Register pointer_offset = addr;
 
   // If the bit offset lies beyond the normal remembered set range, it is in
   // the extra remembered set area of a large object.
-  masm->cmpq(addr, Immediate(Page::kPageSize / kPointerSize));
+  masm->cmpq(pointer_offset, Immediate(Page::kPageSize / kPointerSize));
   masm->j(less, &fast);
 
-  // Adjust 'addr' to be relative to the start of the extra remembered set
-  // and the page address in 'object' to be the address of the extra
-  // remembered set.
-  masm->subq(addr, Immediate(Page::kPageSize / kPointerSize));
+  // Adjust 'page_start' so that addressing using 'pointer_offset' hits the
+  // extra remembered set after the large object.
+
   // Load the array length into 'scratch'.
   masm->movl(scratch,
-             Operand(object,
+             Operand(page_start,
                      Page::kObjectStartOffset + FixedArray::kLengthOffset));
-  // Extra remembered set starts right after FixedArray.
-  // Add the page header, array header, and array body size
-  // (length * pointer size) to the page address to find the extra remembered
-  // set start.
-  masm->lea(object,
-            Operand(object, scratch, times_pointer_size,
-                    Page::kObjectStartOffset + FixedArray::kHeaderSize));
+  Register array_length = scratch;
+
+  // Extra remembered set starts right after the large object (a FixedArray), at
+  //   page_start + kObjectStartOffset + objectSize
+  // where objectSize is FixedArray::kHeaderSize + kPointerSize * array_length.
+  // Add the delta between the end of the normal RSet and the start of the
+  // extra RSet to 'page_start', so that addressing the bit using
+  // 'pointer_offset' hits the extra RSet words.
+  masm->lea(page_start,
+            Operand(page_start, array_length, times_pointer_size,
+                    Page::kObjectStartOffset + FixedArray::kHeaderSize
+                        - Page::kRSetEndOffset));
 
   // NOTE: For now, we use the bit-test-and-set (bts) x86 instruction
   // to limit code size. We should probably evaluate this decision by
   // measuring the performance of an equivalent implementation using
   // "simpler" instructions
   masm->bind(&fast);
-  masm->bts(Operand(object, Page::kRSetOffset), addr);
+  masm->bts(Operand(page_start, Page::kRSetOffset), pointer_offset);
 }
 
 
@@ -181,7 +188,7 @@
     } else {
       // array access: calculate the destination address in the same manner as
       // KeyedStoreIC::GenerateGeneric.  Multiply a smi by 4 to get an offset
-      // into an array of words.
+      // into an array of pointers.
       lea(dst, Operand(object, dst, times_half_pointer_size,
                        FixedArray::kHeaderSize - kHeapObjectTag));
     }
diff --git a/src/x64/virtual-frame-x64.cc b/src/x64/virtual-frame-x64.cc
index 888fdc2..fb911a1 100644
--- a/src/x64/virtual-frame-x64.cc
+++ b/src/x64/virtual-frame-x64.cc
@@ -206,6 +206,7 @@
 
 
 void VirtualFrame::Drop(int count) {
+  ASSERT(count >= 0);
   ASSERT(height() >= count);
   int num_virtual_elements = (element_count() - 1) - stack_pointer_;
 
diff --git a/test/mozilla/mozilla.status b/test/mozilla/mozilla.status
index d427e89..c4628a0 100644
--- a/test/mozilla/mozilla.status
+++ b/test/mozilla/mozilla.status
@@ -810,21 +810,3 @@
 # when the 64-bit port is fully debugged.
 
 js1_2/regexp/regress-9141: FAIL
-js1_5/Regress/regress-211590: CRASH
-js1_5/Regress/regress-303213: PASS || CRASH
-js1_5/extensions/regress-336410-2: CRASH
-js1_5/extensions/regress-336410-1: CRASH
-js1_5/Function/regress-338001: FAIL || CRASH
-js1_5/extensions/regress-371636: CRASH
-# The following failures were added when remembered sets were enabled.
-js1_5/GC/regress-203278-2: FAIL || PASS || CRASH
-js1_5/GC/regress-203278-3: FAIL || PASS
-js1_5/Regress/regress-280769-3: FAIL || PASS
-js1_5/Regress/regress-280769-4: CRASH || TIMEOUT
-js1_5/Regress/regress-290575: CRASH
-js1_5/extensions/regress-365692: FAIL || PASS
-js1_5/Regress/regress-366601: FAIL
-js1_5/Regress/regress-367561-03: CRASH
-js1_5/Regress/regress-367561-01: CRASH || PASS
-ecma/Expressions/11.7.2: CRASH
-