Version 2.2.21

Fix bug in externalizing some ASCII strings (Chromium issue 47824).

Update JSON.stringify to floor the space parameter (issue 753).

Update the Mozilla test expectations to the newest version.

Update the ES5 Conformance Test expectations to the latest version.

Update the V8 benchmark suite.

Provide actual breakpoints locations in response to setBreakpoint
and listBreakpoints requests.

git-svn-id: http://v8.googlecode.com/svn/trunk@4988 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
diff --git a/src/arm/assembler-arm.cc b/src/arm/assembler-arm.cc
index f8d98db..c8170b3 100644
--- a/src/arm/assembler-arm.cc
+++ b/src/arm/assembler-arm.cc
@@ -2112,6 +2112,18 @@
 }
 
 
+
+void Assembler::vsqrt(const DwVfpRegister dst,
+                      const DwVfpRegister src,
+                      const Condition cond) {
+  // cond(31-28) | 11101 (27-23)| D=?(22) | 11 (21-20) | 0001 (19-16) |
+  // Vd(15-12) | 101(11-9) | sz(8)=1 | 11 (7-6) | M(5)=? | 0(4) | Vm(3-0)
+  ASSERT(CpuFeatures::IsEnabled(VFP3));
+  emit(cond | 0xE*B24 | B23 | 0x3*B20 | B16 |
+       dst.code()*B12 | 0x5*B9 | B8 | 3*B6 | src.code());
+}
+
+
 // Pseudo instructions.
 void Assembler::nop(int type) {
   // This is mov rx, rx.
diff --git a/src/arm/assembler-arm.h b/src/arm/assembler-arm.h
index 54b584a..8a4173d 100644
--- a/src/arm/assembler-arm.h
+++ b/src/arm/assembler-arm.h
@@ -988,6 +988,9 @@
             const Condition cond = al);
   void vmrs(const Register dst,
             const Condition cond = al);
+  void vsqrt(const DwVfpRegister dst,
+             const DwVfpRegister src,
+             const Condition cond = al);
 
   // Pseudo instructions
   void nop(int type = 0);
diff --git a/src/arm/codegen-arm.cc b/src/arm/codegen-arm.cc
index b6639ae..f923c09 100644
--- a/src/arm/codegen-arm.cc
+++ b/src/arm/codegen-arm.cc
@@ -4279,22 +4279,147 @@
 }
 
 
-// Generates the Math.pow method - currently just calls runtime.
+// Generates the Math.pow method.
 void CodeGenerator::GenerateMathPow(ZoneList<Expression*>* args) {
   ASSERT(args->length() == 2);
   Load(args->at(0));
   Load(args->at(1));
-  frame_->CallRuntime(Runtime::kMath_pow, 2);
-  frame_->EmitPush(r0);
+
+  if (!CpuFeatures::IsSupported(VFP3)) {
+    frame_->CallRuntime(Runtime::kMath_pow, 2);
+    frame_->EmitPush(r0);
+  } else {
+    CpuFeatures::Scope scope(VFP3);
+    JumpTarget runtime, done;
+    Label not_minus_half, allocate_return;
+
+    Register scratch1 = VirtualFrame::scratch0();
+    Register scratch2 = VirtualFrame::scratch1();
+
+    // Get base and exponent to registers.
+    Register exponent = frame_->PopToRegister();
+    Register base = frame_->PopToRegister(exponent);
+
+    // Set the frame for the runtime jump target. The code below jumps to the
+    // jump target label so the frame needs to be established before that.
+    ASSERT(runtime.entry_frame() == NULL);
+    runtime.set_entry_frame(frame_);
+
+    __ BranchOnSmi(exponent, runtime.entry_label());
+
+    // Special handling of raising to the power of -0.5 and 0.5. First check
+    // that the value is a heap number and that the lower bits (which for both
+    // values are zero).
+    Register heap_number_map = r6;
+    __ LoadRoot(heap_number_map, Heap::kHeapNumberMapRootIndex);
+    __ ldr(scratch1, FieldMemOperand(exponent, HeapObject::kMapOffset));
+    __ ldr(scratch2, FieldMemOperand(exponent, HeapNumber::kMantissaOffset));
+    __ cmp(scratch1, heap_number_map);
+    runtime.Branch(ne);
+    __ tst(scratch2, scratch2);
+    runtime.Branch(ne);
+
+    // Load the e
+    __ ldr(scratch1, FieldMemOperand(exponent, HeapNumber::kExponentOffset));
+
+    // Compare exponent with -0.5.
+    __ cmp(scratch1, Operand(0xbfe00000));
+    __ b(ne, &not_minus_half);
+
+    // Get the double value from the base into vfp register d0.
+    __ ObjectToDoubleVFPRegister(base, d0,
+                                 scratch1, scratch2, heap_number_map, s0,
+                                 runtime.entry_label(),
+                                 AVOID_NANS_AND_INFINITIES);
+
+    // Load 1.0 into d2.
+    __ mov(scratch2, Operand(0x3ff00000));
+    __ mov(scratch1, Operand(0));
+    __ vmov(d2, scratch1, scratch2);
+
+    // Calculate the reciprocal of the square root. 1/sqrt(x) = sqrt(1/x).
+    __ vdiv(d0, d2, d0);
+    __ vsqrt(d0, d0);
+
+    __ b(&allocate_return);
+
+    __ bind(&not_minus_half);
+    // Compare exponent with 0.5.
+    __ cmp(scratch1, Operand(0x3fe00000));
+    runtime.Branch(ne);
+
+      // Get the double value from the base into vfp register d0.
+    __ ObjectToDoubleVFPRegister(base, d0,
+                                 scratch1, scratch2, heap_number_map, s0,
+                                 runtime.entry_label(),
+                                 AVOID_NANS_AND_INFINITIES);
+    __ vsqrt(d0, d0);
+
+    __ bind(&allocate_return);
+    __ AllocateHeapNumberWithValue(
+        base, d0, scratch1, scratch2, heap_number_map, runtime.entry_label());
+    done.Jump();
+
+    runtime.Bind();
+
+    // Push back the arguments again for the runtime call.
+    frame_->EmitPush(base);
+    frame_->EmitPush(exponent);
+    frame_->CallRuntime(Runtime::kMath_pow, 2);
+    __ Move(base, r0);
+
+    done.Bind();
+    frame_->EmitPush(base);
+  }
 }
 
 
-// Generates the Math.sqrt method - currently just calls runtime.
+// Generates the Math.sqrt method.
 void CodeGenerator::GenerateMathSqrt(ZoneList<Expression*>* args) {
   ASSERT(args->length() == 1);
   Load(args->at(0));
-  frame_->CallRuntime(Runtime::kMath_sqrt, 1);
-  frame_->EmitPush(r0);
+
+  if (!CpuFeatures::IsSupported(VFP3)) {
+    frame_->CallRuntime(Runtime::kMath_sqrt, 1);
+    frame_->EmitPush(r0);
+  } else {
+    CpuFeatures::Scope scope(VFP3);
+    JumpTarget runtime, done;
+
+    Register scratch1 = VirtualFrame::scratch0();
+    Register scratch2 = VirtualFrame::scratch1();
+
+    // Get the value from the frame.
+    Register tos = frame_->PopToRegister();
+
+    // Set the frame for the runtime jump target. The code below jumps to the
+    // jump target label so the frame needs to be established before that.
+    ASSERT(runtime.entry_frame() == NULL);
+    runtime.set_entry_frame(frame_);
+
+    Register heap_number_map = r6;
+    __ LoadRoot(heap_number_map, Heap::kHeapNumberMapRootIndex);
+
+    // Get the double value from the heap number into vfp register d0.
+    __ ObjectToDoubleVFPRegister(tos, d0,
+                                 scratch1, scratch2, heap_number_map, s0,
+                                 runtime.entry_label());
+
+    // Calculate the square root of d0 and place result in a heap number object.
+    __ vsqrt(d0, d0);
+    __ AllocateHeapNumberWithValue(
+        tos, d0, scratch1, scratch2, heap_number_map, runtime.entry_label());
+    done.Jump();
+
+    runtime.Bind();
+    // Push back the argument again for the runtime call.
+    frame_->EmitPush(tos);
+    frame_->CallRuntime(Runtime::kMath_sqrt, 1);
+    __ Move(tos, r0);
+
+    done.Bind();
+    frame_->EmitPush(tos);
+  }
 }
 
 
@@ -6257,7 +6382,6 @@
 #undef __
 #define __ ACCESS_MASM(masm)
 
-
 Handle<String> Reference::GetName() {
   ASSERT(type_ == NAMED);
   Property* property = expression_->AsProperty();
@@ -6621,7 +6745,7 @@
   __ bind(&not_special);
   // Count leading zeros.  Uses mantissa for a scratch register on pre-ARM5.
   // Gets the wrong answer for 0, but we already checked for that case above.
-  __ CountLeadingZeros(source_, mantissa, zeros_);
+  __ CountLeadingZeros(zeros_, source_, mantissa);
   // Compute exponent and or it into the exponent register.
   // We use mantissa as a scratch register here.  Use a fudge factor to
   // divide the constant 31 + HeapNumber::kExponentBias, 0x41d, into two parts
@@ -7350,7 +7474,7 @@
 
     // If we have floating point hardware, inline ADD, SUB, MUL, and DIV,
     // using registers d7 and d6 for the double values.
-    if (use_fp_registers) {
+    if (CpuFeatures::IsSupported(VFP3)) {
       CpuFeatures::Scope scope(VFP3);
       __ mov(r7, Operand(rhs, ASR, kSmiTagSize));
       __ vmov(s15, r7);
@@ -7358,8 +7482,12 @@
       __ mov(r7, Operand(lhs, ASR, kSmiTagSize));
       __ vmov(s13, r7);
       __ vcvt_f64_s32(d6, s13);
+      if (!use_fp_registers) {
+        __ vmov(r2, r3, d7);
+        __ vmov(r0, r1, d6);
+      }
     } else {
-      // Write Smi from rhs to r3 and r2 in double format.  r3 is scratch.
+      // Write Smi from rhs to r3 and r2 in double format.  r9 is scratch.
       __ mov(r7, Operand(rhs));
       ConvertToDoubleStub stub1(r3, r2, r7, r9);
       __ push(lr);
@@ -7434,12 +7562,15 @@
     __ AllocateHeapNumber(r5, r4, r7, heap_number_map, &slow);
     }
 
-    if (use_fp_registers) {
+    if (CpuFeatures::IsSupported(VFP3)) {
       CpuFeatures::Scope scope(VFP3);
       // Convert smi in r0 to double in d7.
       __ mov(r7, Operand(r0, ASR, kSmiTagSize));
       __ vmov(s15, r7);
       __ vcvt_f64_s32(d7, s15);
+      if (!use_fp_registers) {
+        __ vmov(r2, r3, d7);
+      }
     } else {
       // Write Smi from r0 to r3 and r2 in double format.
       __ mov(r7, Operand(r0));
@@ -7490,12 +7621,15 @@
     __ AllocateHeapNumber(r5, r4, r7, heap_number_map, &slow);
     }
 
-    if (use_fp_registers) {
+    if (CpuFeatures::IsSupported(VFP3)) {
       CpuFeatures::Scope scope(VFP3);
       // Convert smi in r1 to double in d6.
       __ mov(r7, Operand(r1, ASR, kSmiTagSize));
       __ vmov(s13, r7);
       __ vcvt_f64_s32(d6, s13);
+      if (!use_fp_registers) {
+        __ vmov(r0, r1, d6);
+      }
     } else {
       // Write Smi from r1 to r1 and r0 in double format.
       __ mov(r7, Operand(r1));
@@ -7942,6 +8076,173 @@
 }
 
 
+// This uses versions of the sum-of-digits-to-see-if-a-number-is-divisible-by-3
+// trick.  See http://en.wikipedia.org/wiki/Divisibility_rule
+// Takes the sum of the digits base (mask + 1) repeatedly until we have a
+// number from 0 to mask.  On exit the 'eq' condition flags are set if the
+// answer is exactly the mask.
+void IntegerModStub::DigitSum(MacroAssembler* masm,
+                              Register lhs,
+                              int mask,
+                              int shift,
+                              Label* entry) {
+  ASSERT(mask > 0);
+  ASSERT(mask <= 0xff);  // This ensures we don't need ip to use it.
+  Label loop;
+  __ bind(&loop);
+  __ and_(ip, lhs, Operand(mask));
+  __ add(lhs, ip, Operand(lhs, LSR, shift));
+  __ bind(entry);
+  __ cmp(lhs, Operand(mask));
+  __ b(gt, &loop);
+}
+
+
+void IntegerModStub::DigitSum(MacroAssembler* masm,
+                              Register lhs,
+                              Register scratch,
+                              int mask,
+                              int shift1,
+                              int shift2,
+                              Label* entry) {
+  ASSERT(mask > 0);
+  ASSERT(mask <= 0xff);  // This ensures we don't need ip to use it.
+  Label loop;
+  __ bind(&loop);
+  __ bic(scratch, lhs, Operand(mask));
+  __ and_(ip, lhs, Operand(mask));
+  __ add(lhs, ip, Operand(lhs, LSR, shift1));
+  __ add(lhs, lhs, Operand(scratch, LSR, shift2));
+  __ bind(entry);
+  __ cmp(lhs, Operand(mask));
+  __ b(gt, &loop);
+}
+
+
+// Splits the number into two halves (bottom half has shift bits).  The top
+// half is subtracted from the bottom half.  If the result is negative then
+// rhs is added.
+void IntegerModStub::ModGetInRangeBySubtraction(MacroAssembler* masm,
+                                                Register lhs,
+                                                int shift,
+                                                int rhs) {
+  int mask = (1 << shift) - 1;
+  __ and_(ip, lhs, Operand(mask));
+  __ sub(lhs, ip, Operand(lhs, LSR, shift), SetCC);
+  __ add(lhs, lhs, Operand(rhs), LeaveCC, mi);
+}
+
+
+void IntegerModStub::ModReduce(MacroAssembler* masm,
+                               Register lhs,
+                               int max,
+                               int denominator) {
+  int limit = denominator;
+  while (limit * 2 <= max) limit *= 2;
+  while (limit >= denominator) {
+    __ cmp(lhs, Operand(limit));
+    __ sub(lhs, lhs, Operand(limit), LeaveCC, ge);
+    limit >>= 1;
+  }
+}
+
+
+void IntegerModStub::ModAnswer(MacroAssembler* masm,
+                               Register result,
+                               Register shift_distance,
+                               Register mask_bits,
+                               Register sum_of_digits) {
+  __ add(result, mask_bits, Operand(sum_of_digits, LSL, shift_distance));
+  __ Ret();
+}
+
+
+// See comment for class.
+void IntegerModStub::Generate(MacroAssembler* masm) {
+  __ mov(lhs_, Operand(lhs_, LSR, shift_distance_));
+  __ bic(odd_number_, odd_number_, Operand(1));
+  __ mov(odd_number_, Operand(odd_number_, LSL, 1));
+  // We now have (odd_number_ - 1) * 2 in the register.
+  // Build a switch out of branches instead of data because it avoids
+  // having to teach the assembler about intra-code-object pointers
+  // that are not in relative branch instructions.
+  Label mod3, mod5, mod7, mod9, mod11, mod13, mod15, mod17, mod19;
+  Label mod21, mod23, mod25;
+  { Assembler::BlockConstPoolScope block_const_pool(masm);
+    __ add(pc, pc, Operand(odd_number_));
+    // When you read pc it is always 8 ahead, but when you write it you always
+    // write the actual value.  So we put in two nops to take up the slack.
+    __ nop();
+    __ nop();
+    __ b(&mod3);
+    __ b(&mod5);
+    __ b(&mod7);
+    __ b(&mod9);
+    __ b(&mod11);
+    __ b(&mod13);
+    __ b(&mod15);
+    __ b(&mod17);
+    __ b(&mod19);
+    __ b(&mod21);
+    __ b(&mod23);
+    __ b(&mod25);
+  }
+
+  // For each denominator we find a multiple that is almost only ones
+  // when expressed in binary.  Then we do the sum-of-digits trick for
+  // that number.  If the multiple is not 1 then we have to do a little
+  // more work afterwards to get the answer into the 0-denominator-1
+  // range.
+  DigitSum(masm, lhs_, 3, 2, &mod3);  // 3 = b11.
+  __ sub(lhs_, lhs_, Operand(3), LeaveCC, eq);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, 0xf, 4, &mod5);  // 5 * 3 = b1111.
+  ModGetInRangeBySubtraction(masm, lhs_, 2, 5);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, 7, 3, &mod7);  // 7 = b111.
+  __ sub(lhs_, lhs_, Operand(7), LeaveCC, eq);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, 0x3f, 6, &mod9);  // 7 * 9 = b111111.
+  ModGetInRangeBySubtraction(masm, lhs_, 3, 9);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, r5, 0x3f, 6, 3, &mod11);  // 5 * 11 = b110111.
+  ModReduce(masm, lhs_, 0x3f, 11);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, r5, 0xff, 8, 5, &mod13);  // 19 * 13 = b11110111.
+  ModReduce(masm, lhs_, 0xff, 13);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, 0xf, 4, &mod15);  // 15 = b1111.
+  __ sub(lhs_, lhs_, Operand(15), LeaveCC, eq);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, 0xff, 8, &mod17);  // 15 * 17 = b11111111.
+  ModGetInRangeBySubtraction(masm, lhs_, 4, 17);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, r5, 0xff, 8, 5, &mod19);  // 13 * 19 = b11110111.
+  ModReduce(masm, lhs_, 0xff, 19);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, 0x3f, 6, &mod21);  // 3 * 21 = b111111.
+  ModReduce(masm, lhs_, 0x3f, 21);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, r5, 0xff, 8, 7, &mod23);  // 11 * 23 = b11111101.
+  ModReduce(masm, lhs_, 0xff, 23);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, r5, 0x7f, 7, 6, &mod25);  // 5 * 25 = b1111101.
+  ModReduce(masm, lhs_, 0x7f, 25);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+}
+
+
 const char* GenericBinaryOpStub::GetName() {
   if (name_ != NULL) return name_;
   const int len = 100;
@@ -8069,7 +8370,7 @@
     case Token::MOD: {
       Label not_smi;
       if (ShouldGenerateSmiCode() && specialized_on_rhs_) {
-        Label smi_is_unsuitable;
+        Label lhs_is_unsuitable;
         __ BranchOnNotSmi(lhs, &not_smi);
         if (IsPowerOf2(constant_rhs_)) {
           if (op_ == Token::MOD) {
@@ -8090,14 +8391,14 @@
             __ eor(rhs, rhs, Operand(0x80000000u), SetCC);
             // Next two instructions are conditional on the answer being -0.
             __ mov(rhs, Operand(Smi::FromInt(constant_rhs_)), LeaveCC, eq);
-            __ b(eq, &smi_is_unsuitable);
+            __ b(eq, &lhs_is_unsuitable);
             // We need to subtract the dividend.  Eg. -3 % 4 == -3.
             __ sub(result, rhs, Operand(Smi::FromInt(constant_rhs_)));
           } else {
             ASSERT(op_ == Token::DIV);
             __ tst(lhs,
                    Operand(0x80000000u | ((constant_rhs_ << kSmiTagSize) - 1)));
-            __ b(ne, &smi_is_unsuitable);  // Go slow on negative or remainder.
+            __ b(ne, &lhs_is_unsuitable);  // Go slow on negative or remainder.
             int shift = 0;
             int d = constant_rhs_;
             while ((d & 1) == 0) {
@@ -8110,7 +8411,7 @@
         } else {
           // Not a power of 2.
           __ tst(lhs, Operand(0x80000000u));
-          __ b(ne, &smi_is_unsuitable);
+          __ b(ne, &lhs_is_unsuitable);
           // Find a fixed point reciprocal of the divisor so we can divide by
           // multiplying.
           double divisor = 1.0 / constant_rhs_;
@@ -8145,7 +8446,7 @@
           // (lhs / rhs) where / indicates integer division.
           if (op_ == Token::DIV) {
             __ cmp(lhs, Operand(scratch, LSL, required_scratch_shift));
-            __ b(ne, &smi_is_unsuitable);  // There was a remainder.
+            __ b(ne, &lhs_is_unsuitable);  // There was a remainder.
             __ mov(result, Operand(scratch2, LSL, kSmiTagSize));
           } else {
             ASSERT(op_ == Token::MOD);
@@ -8153,14 +8454,21 @@
           }
         }
         __ Ret();
-        __ bind(&smi_is_unsuitable);
+        __ bind(&lhs_is_unsuitable);
       } else if (op_ == Token::MOD &&
                  runtime_operands_type_ != BinaryOpIC::HEAP_NUMBERS &&
                  runtime_operands_type_ != BinaryOpIC::STRINGS) {
         // Do generate a bit of smi code for modulus even though the default for
         // modulus is not to do it, but as the ARM processor has no coprocessor
-        // support for modulus checking for smis makes sense.
+        // support for modulus checking for smis makes sense.  We can handle
+        // 1 to 25 times any power of 2.  This covers over half the numbers from
+        // 1 to 100 including all of the first 25.  (Actually the constants < 10
+        // are handled above by reciprocal multiplication.  We only get here for
+        // those cases if the right hand side is not a constant or for cases
+        // like 192 which is 3*2^6 and ends up in the 3 case in the integer mod
+        // stub.)
         Label slow;
+        Label not_power_of_2;
         ASSERT(!ShouldGenerateSmiCode());
         ASSERT(kSmiTag == 0);  // Adjust code below.
         // Check for two positive smis.
@@ -8168,13 +8476,42 @@
         __ tst(smi_test_reg, Operand(0x80000000u | kSmiTagMask));
         __ b(ne, &slow);
         // Check that rhs is a power of two and not zero.
+        Register mask_bits = r3;
         __ sub(scratch, rhs, Operand(1), SetCC);
         __ b(mi, &slow);
-        __ tst(rhs, scratch);
-        __ b(ne, &slow);
+        __ and_(mask_bits, rhs, Operand(scratch), SetCC);
+        __ b(ne, &not_power_of_2);
         // Calculate power of two modulus.
         __ and_(result, lhs, Operand(scratch));
         __ Ret();
+
+        __ bind(&not_power_of_2);
+        __ eor(scratch, scratch, Operand(mask_bits));
+        // At least two bits are set in the modulus.  The high one(s) are in
+        // mask_bits and the low one is scratch + 1.
+        __ and_(mask_bits, scratch, Operand(lhs));
+        Register shift_distance = scratch;
+        scratch = no_reg;
+
+        // The rhs consists of a power of 2 multiplied by some odd number.
+        // The power-of-2 part we handle by putting the corresponding bits
+        // from the lhs in the mask_bits register, and the power in the
+        // shift_distance register.  Shift distance is never 0 due to Smi
+        // tagging.
+        __ CountLeadingZeros(r4, shift_distance, shift_distance);
+        __ rsb(shift_distance, r4, Operand(32));
+
+        // Now we need to find out what the odd number is. The last bit is
+        // always 1.
+        Register odd_number = r4;
+        __ mov(odd_number, Operand(rhs, LSR, shift_distance));
+        __ cmp(odd_number, Operand(25));
+        __ b(gt, &slow);
+
+        IntegerModStub stub(
+            result, shift_distance, odd_number, mask_bits, lhs, r5);
+        __ Jump(stub.GetCode(), RelocInfo::CODE_TARGET);  // Tail call.
+
         __ bind(&slow);
       }
       HandleBinaryOpSlowCases(
diff --git a/src/arm/codegen-arm.h b/src/arm/codegen-arm.h
index 925d267..2d8a935 100644
--- a/src/arm/codegen-arm.h
+++ b/src/arm/codegen-arm.h
@@ -881,6 +881,102 @@
 };
 
 
+// This stub can do a fast mod operation without using fp.
+// It is tail called from the GenericBinaryOpStub and it always
+// returns an answer.  It never causes GC so it doesn't need a real frame.
+//
+// The inputs are always positive Smis.  This is never called
+// where the denominator is a power of 2.  We handle that separately.
+//
+// If we consider the denominator as an odd number multiplied by a power of 2,
+// then:
+// * The exponent (power of 2) is in the shift_distance register.
+// * The odd number is in the odd_number register.  It is always in the range
+//   of 3 to 25.
+// * The bits from the numerator that are to be copied to the answer (there are
+//   shift_distance of them) are in the mask_bits register.
+// * The other bits of the numerator have been shifted down and are in the lhs
+//   register.
+class IntegerModStub : public CodeStub {
+ public:
+  IntegerModStub(Register result,
+                 Register shift_distance,
+                 Register odd_number,
+                 Register mask_bits,
+                 Register lhs,
+                 Register scratch)
+      : result_(result),
+        shift_distance_(shift_distance),
+        odd_number_(odd_number),
+        mask_bits_(mask_bits),
+        lhs_(lhs),
+        scratch_(scratch) {
+    // We don't code these in the minor key, so they should always be the same.
+    // We don't really want to fix that since this stub is rather large and we
+    // don't want many copies of it.
+    ASSERT(shift_distance_.is(r9));
+    ASSERT(odd_number_.is(r4));
+    ASSERT(mask_bits_.is(r3));
+    ASSERT(scratch_.is(r5));
+  }
+
+ private:
+  Register result_;
+  Register shift_distance_;
+  Register odd_number_;
+  Register mask_bits_;
+  Register lhs_;
+  Register scratch_;
+
+  // Minor key encoding in 16 bits.
+  class ResultRegisterBits: public BitField<int, 0, 4> {};
+  class LhsRegisterBits: public BitField<int, 4, 4> {};
+
+  Major MajorKey() { return IntegerMod; }
+  int MinorKey() {
+    // Encode the parameters in a unique 16 bit value.
+    return ResultRegisterBits::encode(result_.code())
+           | LhsRegisterBits::encode(lhs_.code());
+  }
+
+  void Generate(MacroAssembler* masm);
+
+  const char* GetName() { return "IntegerModStub"; }
+
+  // Utility functions.
+  void DigitSum(MacroAssembler* masm,
+                Register lhs,
+                int mask,
+                int shift,
+                Label* entry);
+  void DigitSum(MacroAssembler* masm,
+                Register lhs,
+                Register scratch,
+                int mask,
+                int shift1,
+                int shift2,
+                Label* entry);
+  void ModGetInRangeBySubtraction(MacroAssembler* masm,
+                                  Register lhs,
+                                  int shift,
+                                  int rhs);
+  void ModReduce(MacroAssembler* masm,
+                 Register lhs,
+                 int max,
+                 int denominator);
+  void ModAnswer(MacroAssembler* masm,
+                 Register result,
+                 Register shift_distance,
+                 Register mask_bits,
+                 Register sum_of_digits);
+
+
+#ifdef DEBUG
+  void Print() { PrintF("IntegerModStub\n"); }
+#endif
+};
+
+
 // This stub can convert a signed int32 to a heap number (double).  It does
 // not work for int32s that are in Smi range!  No GC occurs during this stub
 // so you don't have to set up the frame.
diff --git a/src/arm/constants-arm.cc b/src/arm/constants-arm.cc
index 4e186d1..002e4c1 100644
--- a/src/arm/constants-arm.cc
+++ b/src/arm/constants-arm.cc
@@ -85,7 +85,7 @@
 
 const char* VFPRegisters::Name(int reg, bool is_double) {
   ASSERT((0 <= reg) && (reg < kNumVFPRegisters));
-  return names_[reg + is_double ? kNumVFPSingleRegisters : 0];
+  return names_[reg + (is_double ? kNumVFPSingleRegisters : 0)];
 }
 
 
diff --git a/src/arm/disasm-arm.cc b/src/arm/disasm-arm.cc
index 4005369..fb17d45 100644
--- a/src/arm/disasm-arm.cc
+++ b/src/arm/disasm-arm.cc
@@ -1038,7 +1038,8 @@
 // Dd = vmul(Dn, Dm)
 // Dd = vdiv(Dn, Dm)
 // vcmp(Dd, Dm)
-// VMRS
+// vmrs
+// Dd = vsqrt(Dm)
 void Decoder::DecodeTypeVFP(Instr* instr) {
   ASSERT((instr->TypeField() == 7) && (instr->Bit(24) == 0x0) );
   ASSERT(instr->Bits(11, 9) == 0x5);
@@ -1056,6 +1057,8 @@
       } else if (((instr->Opc2Field() == 0x4) || (instr->Opc2Field() == 0x5)) &&
                  (instr->Opc3Field() & 0x1)) {
         DecodeVCMP(instr);
+      } else if (((instr->Opc2Field() == 0x1)) && (instr->Opc3Field() == 0x3)) {
+        Format(instr, "vsqrt.f64'cond 'Dd, 'Dm");
       } else {
         Unknown(instr);  // Not used by V8.
       }
diff --git a/src/arm/ic-arm.cc b/src/arm/ic-arm.cc
index 134dfa3..0af1036 100644
--- a/src/arm/ic-arm.cc
+++ b/src/arm/ic-arm.cc
@@ -1722,7 +1722,7 @@
     // Count leading zeros.
     // Gets the wrong answer for 0, but we already checked for that case above.
     Register zeros = scratch2;
-    __ CountLeadingZeros(ival, scratch1, zeros);
+    __ CountLeadingZeros(zeros, ival, scratch1);
 
     // Compute exponent and or it into the exponent register.
     __ rsb(scratch1,
diff --git a/src/arm/macro-assembler-arm.cc b/src/arm/macro-assembler-arm.cc
index df7565f..630e0b8 100644
--- a/src/arm/macro-assembler-arm.cc
+++ b/src/arm/macro-assembler-arm.cc
@@ -1369,6 +1369,56 @@
 }
 
 
+void MacroAssembler::ObjectToDoubleVFPRegister(Register object,
+                                               DwVfpRegister result,
+                                               Register scratch1,
+                                               Register scratch2,
+                                               Register heap_number_map,
+                                               SwVfpRegister scratch3,
+                                               Label* not_number,
+                                               ObjectToDoubleFlags flags) {
+  Label done;
+  if ((flags & OBJECT_NOT_SMI) == 0) {
+    Label not_smi;
+    BranchOnNotSmi(object, &not_smi);
+    // Remove smi tag and convert to double.
+    mov(scratch1, Operand(object, ASR, kSmiTagSize));
+    vmov(scratch3, scratch1);
+    vcvt_f64_s32(result, scratch3);
+    b(&done);
+    bind(&not_smi);
+  }
+  // Check for heap number and load double value from it.
+  ldr(scratch1, FieldMemOperand(object, HeapObject::kMapOffset));
+  sub(scratch2, object, Operand(kHeapObjectTag));
+  cmp(scratch1, heap_number_map);
+  b(ne, not_number);
+  if ((flags & AVOID_NANS_AND_INFINITIES) != 0) {
+    // If exponent is all ones the number is either a NaN or +/-Infinity.
+    ldr(scratch1, FieldMemOperand(object, HeapNumber::kExponentOffset));
+    Sbfx(scratch1,
+         scratch1,
+         HeapNumber::kExponentShift,
+         HeapNumber::kExponentBits);
+    // All-one value sign extend to -1.
+    cmp(scratch1, Operand(-1));
+    b(eq, not_number);
+  }
+  vldr(result, scratch2, HeapNumber::kValueOffset);
+  bind(&done);
+}
+
+
+void MacroAssembler::SmiToDoubleVFPRegister(Register smi,
+                                            DwVfpRegister value,
+                                            Register scratch1,
+                                            SwVfpRegister scratch2) {
+  mov(scratch1, Operand(smi, ASR, kSmiTagSize));
+  vmov(scratch2, scratch1);
+  vcvt_f64_s32(value, scratch2);
+}
+
+
 void MacroAssembler::GetLeastBitsFromSmi(Register dst,
                                          Register src,
                                          int num_least_bits) {
@@ -1686,14 +1736,31 @@
 }
 
 
-void MacroAssembler::CountLeadingZeros(Register source,
-                                       Register scratch,
-                                       Register zeros) {
+void MacroAssembler::AllocateHeapNumberWithValue(Register result,
+                                                 DwVfpRegister value,
+                                                 Register scratch1,
+                                                 Register scratch2,
+                                                 Register heap_number_map,
+                                                 Label* gc_required) {
+  AllocateHeapNumber(result, scratch1, scratch2, heap_number_map, gc_required);
+  sub(scratch1, result, Operand(kHeapObjectTag));
+  vstr(value, scratch1, HeapNumber::kValueOffset);
+}
+
+
+void MacroAssembler::CountLeadingZeros(Register zeros,   // Answer.
+                                       Register source,  // Input.
+                                       Register scratch) {
+  ASSERT(!zeros.is(source) || !source.is(zeros));
+  ASSERT(!zeros.is(scratch));
+  ASSERT(!scratch.is(ip));
+  ASSERT(!source.is(ip));
+  ASSERT(!zeros.is(ip));
 #ifdef CAN_USE_ARMV5_INSTRUCTIONS
   clz(zeros, source);  // This instruction is only supported after ARM5.
 #else
   mov(zeros, Operand(0));
-  mov(scratch, source);
+  Move(scratch, source);
   // Top 16.
   tst(scratch, Operand(0xffff0000));
   add(zeros, zeros, Operand(16), LeaveCC, eq);
diff --git a/src/arm/macro-assembler-arm.h b/src/arm/macro-assembler-arm.h
index e02a6c8..c3f45a6 100644
--- a/src/arm/macro-assembler-arm.h
+++ b/src/arm/macro-assembler-arm.h
@@ -67,6 +67,17 @@
 };
 
 
+// Flags used for the ObjectToDoubleVFPRegister function.
+enum ObjectToDoubleFlags {
+  // No special flags.
+  NO_OBJECT_TO_DOUBLE_FLAGS = 0,
+  // Object is known to be a non smi.
+  OBJECT_NOT_SMI = 1 << 0,
+  // Don't load NaNs or infinities, branch to the non number case instead.
+  AVOID_NANS_AND_INFINITIES = 1 << 1
+};
+
+
 // MacroAssembler implements a collection of frequently used macros.
 class MacroAssembler: public Assembler {
  public:
@@ -381,6 +392,13 @@
                           Register scratch2,
                           Register heap_number_map,
                           Label* gc_required);
+  void AllocateHeapNumberWithValue(Register result,
+                                   DwVfpRegister value,
+                                   Register scratch1,
+                                   Register scratch2,
+                                   Register heap_number_map,
+                                   Label* gc_required);
+
 
   // ---------------------------------------------------------------------------
   // Support functions.
@@ -469,12 +487,35 @@
                                          Register outHighReg,
                                          Register outLowReg);
 
+  // Load the value of a number object into a VFP double register. If the object
+  // is not a number a jump to the label not_number is performed and the VFP
+  // double register is unchanged.
+  void ObjectToDoubleVFPRegister(
+      Register object,
+      DwVfpRegister value,
+      Register scratch1,
+      Register scratch2,
+      Register heap_number_map,
+      SwVfpRegister scratch3,
+      Label* not_number,
+      ObjectToDoubleFlags flags = NO_OBJECT_TO_DOUBLE_FLAGS);
+
+  // Load the value of a smi object into a VFP double register. The register
+  // scratch1 can be the same register as smi in which case smi will hold the
+  // untagged value afterwards.
+  void SmiToDoubleVFPRegister(Register smi,
+                              DwVfpRegister value,
+                              Register scratch1,
+                              SwVfpRegister scratch2);
+
   // Count leading zeros in a 32 bit word.  On ARM5 and later it uses the clz
   // instruction.  On pre-ARM5 hardware this routine gives the wrong answer
-  // for 0 (31 instead of 32).
-  void CountLeadingZeros(Register source,
-                         Register scratch,
-                         Register zeros);
+  // for 0 (31 instead of 32).  Source and scratch can be the same in which case
+  // the source is clobbered.  Source and zeros can also be the same in which
+  // case scratch should be a different register.
+  void CountLeadingZeros(Register zeros,
+                         Register source,
+                         Register scratch);
 
   // ---------------------------------------------------------------------------
   // Runtime calls
diff --git a/src/arm/simulator-arm.cc b/src/arm/simulator-arm.cc
index 77776c2..6240cd4 100644
--- a/src/arm/simulator-arm.cc
+++ b/src/arm/simulator-arm.cc
@@ -26,6 +26,7 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdlib.h>
+#include <math.h>
 #include <cstdarg>
 #include "v8.h"
 
@@ -2262,7 +2263,8 @@
 // Dd = vmul(Dn, Dm)
 // Dd = vdiv(Dn, Dm)
 // vcmp(Dd, Dm)
-// VMRS
+// vmrs
+// Dd = vsqrt(Dm)
 void Simulator::DecodeTypeVFP(Instr* instr) {
   ASSERT((instr->TypeField() == 7) && (instr->Bit(24) == 0x0) );
   ASSERT(instr->Bits(11, 9) == 0x5);
@@ -2284,6 +2286,11 @@
       } else if (((instr->Opc2Field() == 0x4) || (instr->Opc2Field() == 0x5)) &&
                  (instr->Opc3Field() & 0x1)) {
         DecodeVCMP(instr);
+      } else if (((instr->Opc2Field() == 0x1)) && (instr->Opc3Field() == 0x3)) {
+        // vsqrt
+        double dm_value = get_double_from_d_register(vm);
+        double dd_value = sqrt(dm_value);
+        set_d_register_from_double(vd, dd_value);
       } else {
         UNREACHABLE();  // Not used by V8.
       }