Upgrade to V8 3.6

Merge V8 at 3.6.6.11

Simple merge required updates to makefiles only.

Bug: 5688872
Change-Id: Ib38b7ffbcd409585f6cb6fccc59c767029cecc77
diff --git a/src/mips/assembler-mips.cc b/src/mips/assembler-mips.cc
index 28ac557..e01a0ca 100644
--- a/src/mips/assembler-mips.cc
+++ b/src/mips/assembler-mips.cc
@@ -49,11 +49,47 @@
 unsigned CpuFeatures::supported_ = 0;
 unsigned CpuFeatures::found_by_runtime_probing_ = 0;
 
+
+// Get the CPU features enabled by the build. For cross compilation the
+// preprocessor symbols CAN_USE_FPU_INSTRUCTIONS
+// can be defined to enable FPU instructions when building the
+// snapshot.
+static uint64_t CpuFeaturesImpliedByCompiler() {
+  uint64_t answer = 0;
+#ifdef CAN_USE_FPU_INSTRUCTIONS
+  answer |= 1u << FPU;
+#endif  // def CAN_USE_FPU_INSTRUCTIONS
+
+#ifdef __mips__
+  // If the compiler is allowed to use FPU then we can use FPU too in our code
+  // generation even when generating snapshots.  This won't work for cross
+  // compilation.
+#if(defined(__mips_hard_float) && __mips_hard_float != 0)
+  answer |= 1u << FPU;
+#endif  // defined(__mips_hard_float) && __mips_hard_float != 0
+#endif  // def __mips__
+
+  return answer;
+}
+
+
 void CpuFeatures::Probe() {
   ASSERT(!initialized_);
 #ifdef DEBUG
   initialized_ = true;
 #endif
+
+  // Get the features implied by the OS and the compiler settings. This is the
+  // minimal set of features which is also allowed for generated code in the
+  // snapshot.
+  supported_ |= OS::CpuFeaturesImpliedByPlatform();
+  supported_ |= CpuFeaturesImpliedByCompiler();
+
+  if (Serializer::enabled()) {
+    // No probing for features if we might serialize (generate snapshot).
+    return;
+  }
+
   // If the compiler is allowed to use fpu then we can use fpu too in our
   // code generation.
 #if !defined(__mips__)
@@ -62,11 +98,7 @@
       supported_ |= 1u << FPU;
   }
 #else
-  if (Serializer::enabled()) {
-    supported_ |= OS::CpuFeaturesImpliedByPlatform();
-    return;  // No features if we might serialize.
-  }
-
+  // Probe for additional features not already known to be available.
   if (OS::MipsCpuHasFeature(FPU)) {
     // This implementation also sets the FPU flags if
     // runtime detection of FPU returns true.
@@ -140,7 +172,8 @@
 // -----------------------------------------------------------------------------
 // Implementation of RelocInfo.
 
-const int RelocInfo::kApplyMask = 1 << RelocInfo::INTERNAL_REFERENCE;
+const int RelocInfo::kApplyMask = RelocInfo::kCodeTargetMask |
+                                  1 << RelocInfo::INTERNAL_REFERENCE;
 
 
 bool RelocInfo::IsCodedSpecially() {
@@ -514,6 +547,19 @@
 }
 
 
+bool Assembler::IsJal(Instr instr) {
+  return GetOpcodeField(instr) == JAL;
+}
+
+bool Assembler::IsJr(Instr instr) {
+  return GetOpcodeField(instr) == SPECIAL && GetFunctionField(instr) == JR;
+}
+
+bool Assembler::IsJalr(Instr instr) {
+  return GetOpcodeField(instr) == SPECIAL && GetFunctionField(instr) == JALR;
+}
+
+
 bool Assembler::IsLui(Instr instr) {
   uint32_t opcode = GetOpcodeField(instr);
   // Checks if the instruction is a load upper immediate.
@@ -907,7 +953,7 @@
 
 
 void Assembler::GenInstrJump(Opcode opcode,
-                              uint32_t address) {
+                             uint32_t address) {
   BlockTrampolinePoolScope block_trampoline_pool(this);
   ASSERT(is_uint26(address));
   Instr instr = opcode | address;
@@ -1080,7 +1126,12 @@
 
 
 void Assembler::j(int32_t target) {
-  ASSERT(is_uint28(target) && ((target & 3) == 0));
+#if DEBUG
+  // Get pc of delay slot.
+  uint32_t ipc = reinterpret_cast<uint32_t>(pc_ + 1 * kInstrSize);
+  bool in_range = ((uint32_t)(ipc^target) >> (kImm26Bits+kImmFieldShift)) == 0;
+  ASSERT(in_range && ((target & 3) == 0));
+#endif
   GenInstrJump(J, target >> 2);
 }
 
@@ -1096,8 +1147,13 @@
 
 
 void Assembler::jal(int32_t target) {
+#ifdef DEBUG
+  // Get pc of delay slot.
+  uint32_t ipc = reinterpret_cast<uint32_t>(pc_ + 1 * kInstrSize);
+  bool in_range = ((uint32_t)(ipc^target) >> (kImm26Bits+kImmFieldShift)) == 0;
+  ASSERT(in_range && ((target & 3) == 0));
+#endif
   positions_recorder()->WriteRecordedPositions();
-  ASSERT(is_uint28(target) && ((target & 3) == 0));
   GenInstrJump(JAL, target >> 2);
 }
 
@@ -1110,6 +1166,32 @@
 }
 
 
+void Assembler::j_or_jr(int32_t target, Register rs) {
+  // Get pc of delay slot.
+  uint32_t ipc = reinterpret_cast<uint32_t>(pc_ + 1 * kInstrSize);
+  bool in_range = ((uint32_t)(ipc^target) >> (kImm26Bits+kImmFieldShift)) == 0;
+
+  if (in_range) {
+      j(target);
+  } else {
+      jr(t9);
+  }
+}
+
+
+void Assembler::jal_or_jalr(int32_t target, Register rs) {
+  // Get pc of delay slot.
+  uint32_t ipc = reinterpret_cast<uint32_t>(pc_ + 1 * kInstrSize);
+  bool in_range = ((uint32_t)(ipc^target) >> (kImm26Bits+kImmFieldShift)) == 0;
+
+  if (in_range) {
+      jal(target);
+  } else {
+      jalr(t9);
+  }
+}
+
+
 //-------Data-processing-instructions---------
 
 // Arithmetic.
@@ -1582,6 +1664,13 @@
   GenInstrRegister(COP1, CFC1, rt, fs);
 }
 
+void Assembler::DoubleAsTwoUInt32(double d, uint32_t* lo, uint32_t* hi) {
+  uint64_t i;
+  memcpy(&i, &d, 8);
+
+  *lo = i & 0xffffffff;
+  *hi = i >> 32;
+}
 
 // Arithmetic.
 
@@ -1940,10 +2029,15 @@
   }
   if (rinfo.rmode() != RelocInfo::NONE) {
     // Don't record external references unless the heap will be serialized.
-    if (rmode == RelocInfo::EXTERNAL_REFERENCE &&
-        !Serializer::enabled() &&
-        !FLAG_debug_code) {
-      return;
+    if (rmode == RelocInfo::EXTERNAL_REFERENCE) {
+#ifdef DEBUG
+      if (!Serializer::enabled()) {
+        Serializer::TooLateToEnableNow();
+      }
+#endif
+      if (!Serializer::enabled() && !emit_debug_code()) {
+        return;
+      }
     }
     ASSERT(buffer_space() >= kMaxRelocSize);  // Too late to grow buffer here.
     if (rmode == RelocInfo::CODE_TARGET_WITH_ID) {
@@ -2038,30 +2132,142 @@
 }
 
 
+// On Mips, a target address is stored in a lui/ori instruction pair, each
+// of which load 16 bits of the 32-bit address to a register.
+// Patching the address must replace both instr, and flush the i-cache.
+//
+// There is an optimization below, which emits a nop when the address
+// fits in just 16 bits. This is unlikely to help, and should be benchmarked,
+// and possibly removed.
 void Assembler::set_target_address_at(Address pc, Address target) {
-  // On MIPS we patch the address into lui/ori instruction pair.
-
-  // First check we have an li (lui/ori pair).
   Instr instr2 = instr_at(pc + kInstrSize);
-#ifdef DEBUG
-  Instr instr1 = instr_at(pc);
-
-  // Check we have indeed the result from a li with MustUseReg true.
-  CHECK((GetOpcodeField(instr1) == LUI && GetOpcodeField(instr2) == ORI));
-#endif
-
   uint32_t rt_code = GetRtField(instr2);
   uint32_t* p = reinterpret_cast<uint32_t*>(pc);
   uint32_t itarget = reinterpret_cast<uint32_t>(target);
 
-  // lui rt, high-16.
-  // ori rt rt, low-16.
+#ifdef DEBUG
+  // Check we have the result from a li macro-instruction, using instr pair.
+  Instr instr1 = instr_at(pc);
+  CHECK((GetOpcodeField(instr1) == LUI && GetOpcodeField(instr2) == ORI));
+#endif
+
+  // Must use 2 instructions to insure patchable code => just use lui and ori.
+  // lui rt, upper-16.
+  // ori rt rt, lower-16.
   *p = LUI | rt_code | ((itarget & kHiMask) >> kLuiShift);
   *(p+1) = ORI | rt_code | (rt_code << 5) | (itarget & kImm16Mask);
 
-  CPU::FlushICache(pc, 2 * sizeof(int32_t));
+  // The following code is an optimization for the common case of Call()
+  // or Jump() which is load to register, and jump through register:
+  //     li(t9, address); jalr(t9)    (or jr(t9)).
+  // If the destination address is in the same 256 MB page as the call, it
+  // is faster to do a direct jal, or j, rather than jump thru register, since
+  // that lets the cpu pipeline prefetch the target address. However each
+  // time the address above is patched, we have to patch the direct jal/j
+  // instruction, as well as possibly revert to jalr/jr if we now cross a
+  // 256 MB page. Note that with the jal/j instructions, we do not need to
+  // load the register, but that code is left, since it makes it easy to
+  // revert this process. A further optimization could try replacing the
+  // li sequence with nops.
+  // This optimization can only be applied if the rt-code from instr2 is the
+  // register used for the jalr/jr. Finally, we have to skip 'jr ra', which is
+  // mips return. Occasionally this lands after an li().
+
+  Instr instr3 = instr_at(pc + 2 * kInstrSize);
+  uint32_t ipc = reinterpret_cast<uint32_t>(pc + 3 * kInstrSize);
+  bool in_range =
+             ((uint32_t)(ipc ^ itarget) >> (kImm26Bits + kImmFieldShift)) == 0;
+  uint32_t target_field = (uint32_t)(itarget & kJumpAddrMask) >> kImmFieldShift;
+  bool patched_jump = false;
+
+#ifndef ALLOW_JAL_IN_BOUNDARY_REGION
+  // This is a workaround to the 24k core E156 bug (affect some 34k cores also).
+  // Since the excluded space is only 64KB out of 256MB (0.02 %), we will just
+  // apply this workaround for all cores so we don't have to identify the core.
+  if (in_range) {
+    // The 24k core E156 bug has some very specific requirements, we only check
+    // the most simple one: if the address of the delay slot instruction is in
+    // the first or last 32 KB of the 256 MB segment.
+    uint32_t segment_mask = ((256 * MB) - 1) ^ ((32 * KB) - 1);
+    uint32_t ipc_segment_addr = ipc & segment_mask;
+    if (ipc_segment_addr == 0 || ipc_segment_addr == segment_mask)
+      in_range = false;
+  }
+#endif
+
+  if (IsJalr(instr3)) {
+    // Try to convert JALR to JAL.
+    if (in_range && GetRt(instr2) == GetRs(instr3)) {
+      *(p+2) = JAL | target_field;
+      patched_jump = true;
+    }
+  } else if (IsJr(instr3)) {
+    // Try to convert JR to J, skip returns (jr ra).
+    bool is_ret = static_cast<int>(GetRs(instr3)) == ra.code();
+    if (in_range && !is_ret && GetRt(instr2) == GetRs(instr3)) {
+      *(p+2) = J | target_field;
+      patched_jump = true;
+    }
+  } else if (IsJal(instr3)) {
+    if (in_range) {
+      // We are patching an already converted JAL.
+      *(p+2) = JAL | target_field;
+    } else {
+      // Patch JAL, but out of range, revert to JALR.
+      // JALR rs reg is the rt reg specified in the ORI instruction.
+      uint32_t rs_field = GetRt(instr2) << kRsShift;
+      uint32_t rd_field = ra.code() << kRdShift;  // Return-address (ra) reg.
+      *(p+2) = SPECIAL | rs_field | rd_field | JALR;
+    }
+    patched_jump = true;
+  } else if (IsJ(instr3)) {
+    if (in_range) {
+      // We are patching an already converted J (jump).
+      *(p+2) = J | target_field;
+    } else {
+      // Trying patch J, but out of range, just go back to JR.
+      // JR 'rs' reg is the 'rt' reg specified in the ORI instruction (instr2).
+      uint32_t rs_field = GetRt(instr2) << kRsShift;
+      *(p+2) = SPECIAL | rs_field | JR;
+    }
+    patched_jump = true;
+  }
+
+  CPU::FlushICache(pc, (patched_jump ? 3 : 2) * sizeof(int32_t));
 }
 
+void Assembler::JumpLabelToJumpRegister(Address pc) {
+  // Address pc points to lui/ori instructions.
+  // Jump to label may follow at pc + 2 * kInstrSize.
+  uint32_t* p = reinterpret_cast<uint32_t*>(pc);
+#ifdef DEBUG
+  Instr instr1 = instr_at(pc);
+#endif
+  Instr instr2 = instr_at(pc + 1 * kInstrSize);
+  Instr instr3 = instr_at(pc + 2 * kInstrSize);
+  bool patched = false;
+
+  if (IsJal(instr3)) {
+    ASSERT(GetOpcodeField(instr1) == LUI);
+    ASSERT(GetOpcodeField(instr2) == ORI);
+
+    uint32_t rs_field = GetRt(instr2) << kRsShift;
+    uint32_t rd_field = ra.code() << kRdShift;  // Return-address (ra) reg.
+    *(p+2) = SPECIAL | rs_field | rd_field | JALR;
+    patched = true;
+  } else if (IsJ(instr3)) {
+    ASSERT(GetOpcodeField(instr1) == LUI);
+    ASSERT(GetOpcodeField(instr2) == ORI);
+
+    uint32_t rs_field = GetRt(instr2) << kRsShift;
+    *(p+2) = SPECIAL | rs_field | JR;
+    patched = true;
+  }
+
+  if (patched) {
+      CPU::FlushICache(pc+2, sizeof(Address));
+  }
+}
 
 } }  // namespace v8::internal