Merge "Use try lock to fix class resolution race"
diff --git a/compiler/Android.mk b/compiler/Android.mk
index e3f8a5c..0b7bd9c 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -157,7 +157,6 @@
 
 LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES := \
   compiled_method.h \
-  dex/compiler_enums.h \
   dex/dex_to_dex_compiler.h \
   driver/compiler_driver.h \
   driver/compiler_options.h \
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index 2d139eb..c942375 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -115,13 +115,6 @@
   std::list<std::vector<uint8_t>> header_code_and_maps_chunks_;
 };
 
-// TODO: When read barrier works with all tests, get rid of this.
-#define TEST_DISABLED_FOR_READ_BARRIER() \
-  if (kUseReadBarrier) { \
-    printf("WARNING: TEST DISABLED FOR READ BARRIER\n"); \
-    return; \
-  }
-
 // TODO: When read barrier works with all Optimizing back ends, get rid of this.
 #define TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS() \
   if (kUseReadBarrier && GetCompilerKind() == Compiler::kOptimizing) {                    \
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
deleted file mode 100644
index 8800e4b..0000000
--- a/compiler/dex/compiler_enums.h
+++ /dev/null
@@ -1,677 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_COMPILER_ENUMS_H_
-#define ART_COMPILER_DEX_COMPILER_ENUMS_H_
-
-#include "dex_instruction.h"
-
-namespace art {
-
-enum RegisterClass {
-  kInvalidRegClass,
-  kCoreReg,
-  kFPReg,
-  kRefReg,
-  kAnyReg,
-};
-std::ostream& operator<<(std::ostream& os, const RegisterClass& rhs);
-
-enum BitsUsed {
-  kSize32Bits,
-  kSize64Bits,
-  kSize128Bits,
-  kSize256Bits,
-  kSize512Bits,
-  kSize1024Bits,
-};
-std::ostream& operator<<(std::ostream& os, const BitsUsed& rhs);
-
-enum SpecialTargetRegister {
-  kSelf,            // Thread pointer.
-  kSuspend,         // Used to reduce suspend checks for some targets.
-  kLr,
-  kPc,
-  kSp,
-  kArg0,
-  kArg1,
-  kArg2,
-  kArg3,
-  kArg4,
-  kArg5,
-  kArg6,
-  kArg7,
-  kFArg0,
-  kFArg1,
-  kFArg2,
-  kFArg3,
-  kFArg4,
-  kFArg5,
-  kFArg6,
-  kFArg7,
-  kFArg8,
-  kFArg9,
-  kFArg10,
-  kFArg11,
-  kFArg12,
-  kFArg13,
-  kFArg14,
-  kFArg15,
-  kRet0,
-  kRet1,
-  kInvokeTgt,
-  kHiddenArg,
-  kHiddenFpArg,
-  kCount
-};
-std::ostream& operator<<(std::ostream& os, const SpecialTargetRegister& code);
-
-enum RegLocationType {
-  kLocDalvikFrame = 0,  // Normal Dalvik register
-  kLocPhysReg,
-  kLocCompilerTemp,
-  kLocInvalid
-};
-std::ostream& operator<<(std::ostream& os, const RegLocationType& rhs);
-
-enum BBType {
-  kNullBlock,
-  kEntryBlock,
-  kDalvikByteCode,
-  kExitBlock,
-  kExceptionHandling,
-  kDead,
-};
-std::ostream& operator<<(std::ostream& os, const BBType& code);
-
-// Shared pseudo opcodes - must be < 0.
-enum LIRPseudoOpcode {
-  kPseudoPrologueBegin = -18,
-  kPseudoPrologueEnd = -17,
-  kPseudoEpilogueBegin = -16,
-  kPseudoEpilogueEnd = -15,
-  kPseudoExportedPC = -14,
-  kPseudoSafepointPC = -13,
-  kPseudoIntrinsicRetry = -12,
-  kPseudoSuspendTarget = -11,
-  kPseudoThrowTarget = -10,
-  kPseudoCaseLabel = -9,
-  kPseudoBarrier = -8,
-  kPseudoEntryBlock = -7,
-  kPseudoExitBlock = -6,
-  kPseudoTargetLabel = -5,
-  kPseudoDalvikByteCodeBoundary = -4,
-  kPseudoPseudoAlign4 = -3,
-  kPseudoEHBlockLabel = -2,
-  kPseudoNormalBlockLabel = -1,
-};
-std::ostream& operator<<(std::ostream& os, const LIRPseudoOpcode& rhs);
-
-enum ExtendedMIROpcode {
-  kMirOpFirst = kNumPackedOpcodes,
-  kMirOpPhi = kMirOpFirst,
-
-  // @brief Copy from one VR to another.
-  // @details
-  // vA: destination VR
-  // vB: source VR
-  kMirOpCopy,
-
-  // @brief Used to do float comparison with less-than bias.
-  // @details Unlike cmpl-float, this does not store result of comparison in VR.
-  // vA: left-hand side VR for comparison.
-  // vB: right-hand side VR for comparison.
-  kMirOpFusedCmplFloat,
-
-  // @brief Used to do float comparison with greater-than bias.
-  // @details Unlike cmpg-float, this does not store result of comparison in VR.
-  // vA: left-hand side VR for comparison.
-  // vB: right-hand side VR for comparison.
-  kMirOpFusedCmpgFloat,
-
-  // @brief Used to do double comparison with less-than bias.
-  // @details Unlike cmpl-double, this does not store result of comparison in VR.
-  // vA: left-hand side wide VR for comparison.
-  // vB: right-hand side wide VR for comparison.
-  kMirOpFusedCmplDouble,
-
-  // @brief Used to do double comparison with greater-than bias.
-  // @details Unlike cmpl-double, this does not store result of comparison in VR.
-  // vA: left-hand side wide VR for comparison.
-  // vB: right-hand side wide VR for comparison.
-  kMirOpFusedCmpgDouble,
-
-  // @brief Used to do comparison of 64-bit long integers.
-  // @details Unlike cmp-long, this does not store result of comparison in VR.
-  // vA: left-hand side wide VR for comparison.
-  // vB: right-hand side wide VR for comparison.
-  kMirOpFusedCmpLong,
-
-  // @brief This represents no-op.
-  kMirOpNop,
-
-  // @brief Do a null check on the object register.
-  // @details The backends may implement this implicitly or explicitly. This MIR is guaranteed
-  // to have the correct offset as an exception thrower.
-  // vA: object register
-  kMirOpNullCheck,
-
-  kMirOpRangeCheck,
-  kMirOpDivZeroCheck,
-  kMirOpCheck,
-  kMirOpSelect,
-
-  // Vector opcodes:
-  // TypeSize is an encoded field giving the element type and the vector size.
-  // It is encoded as OpSize << 16 | (number of bits in vector)
-  //
-  // Destination and source are integers that will be interpreted by the
-  // backend that supports Vector operations.  Backends are permitted to support only
-  // certain vector register sizes.
-  //
-  // At this point, only two operand instructions are supported.  Three operand instructions
-  // could be supported by using a bit in TypeSize and arg[0] where needed.
-
-  // @brief MIR to move constant data to a vector register
-  // vA: destination
-  // vB: number of bits in register
-  // args[0]~args[3]: up to 128 bits of data for initialization
-  kMirOpConstVector,
-
-  // @brief MIR to move a vectorized register to another
-  // vA: destination
-  // vB: source
-  // vC: TypeSize
-  kMirOpMoveVector,
-
-  // @brief Packed multiply of units in two vector registers: vB = vB .* vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: source
-  // vC: TypeSize
-  kMirOpPackedMultiply,
-
-  // @brief Packed addition of units in two vector registers: vB = vB .+ vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: source
-  // vC: TypeSize
-  kMirOpPackedAddition,
-
-  // @brief Packed subtraction of units in two vector registers: vB = vB .- vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: source
-  // vC: TypeSize
-  kMirOpPackedSubtract,
-
-  // @brief Packed shift left of units in two vector registers: vB = vB .<< vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: amount to shift
-  // vC: TypeSize
-  kMirOpPackedShiftLeft,
-
-  // @brief Packed signed shift right of units in two vector registers: vB = vB .>> vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: amount to shift
-  // vC: TypeSize
-  kMirOpPackedSignedShiftRight,
-
-  // @brief Packed unsigned shift right of units in two vector registers: vB = vB .>>> vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: amount to shift
-  // vC: TypeSize
-  kMirOpPackedUnsignedShiftRight,
-
-  // @brief Packed bitwise and of units in two vector registers: vB = vB .& vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: source
-  // vC: TypeSize
-  kMirOpPackedAnd,
-
-  // @brief Packed bitwise or of units in two vector registers: vB = vB .| vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: source
-  // vC: TypeSize
-  kMirOpPackedOr,
-
-  // @brief Packed bitwise xor of units in two vector registers: vB = vB .^ vC using vA to know the type of the vector.
-  // vA: destination and source
-  // vB: source
-  // vC: TypeSize
-  kMirOpPackedXor,
-
-  // @brief Reduce a 128-bit packed element into a single VR by taking lower bits
-  // @details Instruction does a horizontal addition of the packed elements and then adds it to VR
-  // vA: destination and source VR (not vector register)
-  // vB: source (vector register)
-  // vC: TypeSize
-  kMirOpPackedAddReduce,
-
-  // @brief Extract a packed element into a single VR.
-  // vA: destination VR (not vector register)
-  // vB: source (vector register)
-  // vC: TypeSize
-  // arg[0]: The index to use for extraction from vector register (which packed element)
-  kMirOpPackedReduce,
-
-  // @brief Create a vector value, with all TypeSize values equal to vC
-  // vA: destination vector register
-  // vB: source VR (not vector register)
-  // vC: TypeSize
-  kMirOpPackedSet,
-
-  // @brief Reserve a range of vector registers.
-  // vA: Start vector register to reserve.
-  // vB: Inclusive end vector register to reserve.
-  // @note: The backend may choose to map vector numbers used in vector opcodes.
-  //  Reserved registers are removed from the list of backend temporary pool.
-  kMirOpReserveVectorRegisters,
-
-  // @brief Free a range of reserved vector registers
-  // vA: Start vector register to unreserve.
-  // vB: Inclusive end vector register to unreserve.
-  // @note: All currently reserved vector registers are returned to the temporary pool.
-  kMirOpReturnVectorRegisters,
-
-  // @brief Create a memory barrier.
-  // vA: a constant defined by enum MemBarrierKind.
-  kMirOpMemBarrier,
-
-  // @brief Used to fill a vector register with array values.
-  // @details Just as with normal arrays, access on null object register must ensure NullPointerException
-  // and invalid index must ensure ArrayIndexOutOfBoundsException. Exception behavior must be the same
-  // as the aget it replaced and must happen at same index. Therefore, it is generally recommended that
-  // before using this MIR, it is proven that exception is guaranteed to not be thrown and marked with
-  // MIR_IGNORE_NULL_CHECK and MIR_IGNORE_RANGE_CHECK.
-  // vA: destination vector register
-  // vB: array register
-  // vC: index register
-  // arg[0]: TypeSize (most other vector opcodes have this in vC)
-  kMirOpPackedArrayGet,
-
-  // @brief Used to store a vector register into array.
-  // @details Just as with normal arrays, access on null object register must ensure NullPointerException
-  // and invalid index must ensure ArrayIndexOutOfBoundsException. Exception behavior must be the same
-  // as the aget it replaced and must happen at same index. Therefore, it is generally recommended that
-  // before using this MIR, it is proven that exception is guaranteed to not be thrown and marked with
-  // MIR_IGNORE_NULL_CHECK and MIR_IGNORE_RANGE_CHECK.
-  // vA: source vector register
-  // vB: array register
-  // vC: index register
-  // arg[0]: TypeSize (most other vector opcodes have this in vC)
-  kMirOpPackedArrayPut,
-
-  // @brief Multiply-add integer.
-  // vA: destination
-  // vB: multiplicand
-  // vC: multiplier
-  // arg[0]: addend
-  kMirOpMaddInt,
-
-  // @brief Multiply-subtract integer.
-  // vA: destination
-  // vB: multiplicand
-  // vC: multiplier
-  // arg[0]: minuend
-  kMirOpMsubInt,
-
-  // @brief Multiply-add long.
-  // vA: destination
-  // vB: multiplicand
-  // vC: multiplier
-  // arg[0]: addend
-  kMirOpMaddLong,
-
-  // @brief Multiply-subtract long.
-  // vA: destination
-  // vB: multiplicand
-  // vC: multiplier
-  // arg[0]: minuend
-  kMirOpMsubLong,
-
-  kMirOpLast,
-};
-
-enum MIROptimizationFlagPositions {
-  kMIRIgnoreNullCheck = 0,
-  kMIRIgnoreRangeCheck,
-  kMIRIgnoreCheckCast,
-  kMIRStoreNonNullValue,              // Storing non-null value, always mark GC card.
-  kMIRClassIsInitialized,
-  kMIRClassIsInDexCache,
-  kMirIgnoreDivZeroCheck,
-  kMIRInlined,                        // Invoke is inlined (ie dead).
-  kMIRInlinedPred,                    // Invoke is inlined via prediction.
-  kMIRCallee,                         // Instruction is inlined from callee.
-  kMIRIgnoreSuspendCheck,
-  kMIRDup,
-  kMIRMark,                           // Temporary node mark can be used by
-                                      // opt passes for their private needs.
-  kMIRStoreNonTemporal,
-  kMIRLastMIRFlag,
-};
-
-// For successor_block_list.
-enum BlockListType {
-  kNotUsed = 0,
-  kCatch,
-  kPackedSwitch,
-  kSparseSwitch,
-};
-std::ostream& operator<<(std::ostream& os, const BlockListType& rhs);
-
-enum AssemblerStatus {
-  kSuccess,
-  kRetryAll,
-};
-std::ostream& operator<<(std::ostream& os, const AssemblerStatus& rhs);
-
-enum OpSize {
-  kWord,            // Natural word size of target (32/64).
-  k32,
-  k64,
-  kReference,       // Object reference; compressed on 64-bit targets.
-  kSingle,
-  kDouble,
-  kUnsignedHalf,
-  kSignedHalf,
-  kUnsignedByte,
-  kSignedByte,
-};
-std::ostream& operator<<(std::ostream& os, const OpSize& kind);
-
-enum OpKind {
-  kOpMov,
-  kOpCmov,
-  kOpMvn,
-  kOpCmp,
-  kOpLsl,
-  kOpLsr,
-  kOpAsr,
-  kOpRor,
-  kOpNot,
-  kOpAnd,
-  kOpOr,
-  kOpXor,
-  kOpNeg,
-  kOpAdd,
-  kOpAdc,
-  kOpSub,
-  kOpSbc,
-  kOpRsub,
-  kOpMul,
-  kOpDiv,
-  kOpRem,
-  kOpBic,
-  kOpCmn,
-  kOpTst,
-  kOpRev,
-  kOpRevsh,
-  kOpBkpt,
-  kOpBlx,
-  kOpPush,
-  kOpPop,
-  kOp2Char,
-  kOp2Short,
-  kOp2Byte,
-  kOpCondBr,
-  kOpUncondBr,
-  kOpBx,
-  kOpInvalid,
-};
-std::ostream& operator<<(std::ostream& os, const OpKind& rhs);
-
-enum MoveType {
-  kMov8GP,      // Move 8-bit general purpose register.
-  kMov16GP,     // Move 16-bit general purpose register.
-  kMov32GP,     // Move 32-bit general purpose register.
-  kMov64GP,     // Move 64-bit general purpose register.
-  kMov32FP,     // Move 32-bit FP register.
-  kMov64FP,     // Move 64-bit FP register.
-  kMovLo64FP,   // Move low 32-bits of 64-bit FP register.
-  kMovHi64FP,   // Move high 32-bits of 64-bit FP register.
-  kMovU128FP,   // Move 128-bit FP register to/from possibly unaligned region.
-  kMov128FP = kMovU128FP,
-  kMovA128FP,   // Move 128-bit FP register to/from region surely aligned to 16-bytes.
-  kMovLo128FP,  // Move low 64-bits of 128-bit FP register.
-  kMovHi128FP,  // Move high 64-bits of 128-bit FP register.
-};
-std::ostream& operator<<(std::ostream& os, const MoveType& kind);
-
-enum ConditionCode {
-  kCondEq,  // equal
-  kCondNe,  // not equal
-  kCondCs,  // carry set
-  kCondCc,  // carry clear
-  kCondUlt,  // unsigned less than
-  kCondUge,  // unsigned greater than or same
-  kCondMi,  // minus
-  kCondPl,  // plus, positive or zero
-  kCondVs,  // overflow
-  kCondVc,  // no overflow
-  kCondHi,  // unsigned greater than
-  kCondLs,  // unsigned lower or same
-  kCondGe,  // signed greater than or equal
-  kCondLt,  // signed less than
-  kCondGt,  // signed greater than
-  kCondLe,  // signed less than or equal
-  kCondAl,  // always
-  kCondNv,  // never
-};
-std::ostream& operator<<(std::ostream& os, const ConditionCode& kind);
-
-// Target specific condition encodings
-enum ArmConditionCode {
-  kArmCondEq = 0x0,  // 0000
-  kArmCondNe = 0x1,  // 0001
-  kArmCondCs = 0x2,  // 0010
-  kArmCondCc = 0x3,  // 0011
-  kArmCondMi = 0x4,  // 0100
-  kArmCondPl = 0x5,  // 0101
-  kArmCondVs = 0x6,  // 0110
-  kArmCondVc = 0x7,  // 0111
-  kArmCondHi = 0x8,  // 1000
-  kArmCondLs = 0x9,  // 1001
-  kArmCondGe = 0xa,  // 1010
-  kArmCondLt = 0xb,  // 1011
-  kArmCondGt = 0xc,  // 1100
-  kArmCondLe = 0xd,  // 1101
-  kArmCondAl = 0xe,  // 1110
-  kArmCondNv = 0xf,  // 1111
-};
-std::ostream& operator<<(std::ostream& os, const ArmConditionCode& kind);
-
-enum X86ConditionCode {
-  kX86CondO   = 0x0,    // overflow
-  kX86CondNo  = 0x1,    // not overflow
-
-  kX86CondB   = 0x2,    // below
-  kX86CondNae = kX86CondB,  // not-above-equal
-  kX86CondC   = kX86CondB,  // carry
-
-  kX86CondNb  = 0x3,    // not-below
-  kX86CondAe  = kX86CondNb,  // above-equal
-  kX86CondNc  = kX86CondNb,  // not-carry
-
-  kX86CondZ   = 0x4,    // zero
-  kX86CondEq  = kX86CondZ,  // equal
-
-  kX86CondNz  = 0x5,    // not-zero
-  kX86CondNe  = kX86CondNz,  // not-equal
-
-  kX86CondBe  = 0x6,    // below-equal
-  kX86CondNa  = kX86CondBe,  // not-above
-
-  kX86CondNbe = 0x7,    // not-below-equal
-  kX86CondA   = kX86CondNbe,  // above
-
-  kX86CondS   = 0x8,    // sign
-  kX86CondNs  = 0x9,    // not-sign
-
-  kX86CondP   = 0xa,    // 8-bit parity even
-  kX86CondPE  = kX86CondP,
-
-  kX86CondNp  = 0xb,    // 8-bit parity odd
-  kX86CondPo  = kX86CondNp,
-
-  kX86CondL   = 0xc,    // less-than
-  kX86CondNge = kX86CondL,  // not-greater-equal
-
-  kX86CondNl  = 0xd,    // not-less-than
-  kX86CondGe  = kX86CondNl,  // not-greater-equal
-
-  kX86CondLe  = 0xe,    // less-than-equal
-  kX86CondNg  = kX86CondLe,  // not-greater
-
-  kX86CondNle = 0xf,    // not-less-than
-  kX86CondG   = kX86CondNle,  // greater
-};
-std::ostream& operator<<(std::ostream& os, const X86ConditionCode& kind);
-
-enum DividePattern {
-  DivideNone,
-  Divide3,
-  Divide5,
-  Divide7,
-};
-std::ostream& operator<<(std::ostream& os, const DividePattern& pattern);
-
-/**
- * @brief Memory barrier types (see "The JSR-133 Cookbook for Compiler Writers").
- * @details We define the combined barrier types that are actually required
- * by the Java Memory Model, rather than using exactly the terminology from
- * the JSR-133 cookbook.  These should, in many cases, be replaced by acquire/release
- * primitives.  Note that the JSR-133 cookbook generally does not deal with
- * store atomicity issues, and the recipes there are not always entirely sufficient.
- * The current recipe is as follows:
- * -# Use AnyStore ~= (LoadStore | StoreStore) ~= release barrier before volatile store.
- * -# Use AnyAny barrier after volatile store.  (StoreLoad is as expensive.)
- * -# Use LoadAny barrier ~= (LoadLoad | LoadStore) ~= acquire barrier after each volatile load.
- * -# Use StoreStore barrier after all stores but before return from any constructor whose
- *    class has final fields.
- * -# Use NTStoreStore to order non-temporal stores with respect to all later
- *    store-to-memory instructions.  Only generated together with non-temporal stores.
- */
-enum MemBarrierKind {
-  kAnyStore,
-  kLoadAny,
-  kStoreStore,
-  kAnyAny,
-  kNTStoreStore,
-  kLastBarrierKind = kNTStoreStore
-};
-std::ostream& operator<<(std::ostream& os, const MemBarrierKind& kind);
-
-enum OpFeatureFlags {
-  kIsBranch = 0,
-  kNoOperand,
-  kIsUnaryOp,
-  kIsBinaryOp,
-  kIsTertiaryOp,
-  kIsQuadOp,
-  kIsQuinOp,
-  kIsSextupleOp,
-  kIsIT,
-  kIsMoveOp,
-  kMemLoad,
-  kMemStore,
-  kMemVolatile,
-  kMemScaledx0,
-  kMemScaledx2,
-  kMemScaledx4,
-  kPCRelFixup,  // x86 FIXME: add NEEDS_FIXUP to instruction attributes.
-  kRegDef0,
-  kRegDef1,
-  kRegDef2,
-  kRegDefA,
-  kRegDefD,
-  kRegDefFPCSList0,
-  kRegDefFPCSList2,
-  kRegDefList0,
-  kRegDefList1,
-  kRegDefList2,
-  kRegDefLR,
-  kRegDefSP,
-  kRegUse0,
-  kRegUse1,
-  kRegUse2,
-  kRegUse3,
-  kRegUse4,
-  kRegUseA,
-  kRegUseC,
-  kRegUseD,
-  kRegUseB,
-  kRegUseFPCSList0,
-  kRegUseFPCSList2,
-  kRegUseList0,
-  kRegUseList1,
-  kRegUseLR,
-  kRegUsePC,
-  kRegUseSP,
-  kSetsCCodes,
-  kUsesCCodes,
-  kUseFpStack,
-  kUseHi,
-  kUseLo,
-  kDefHi,
-  kDefLo
-};
-std::ostream& operator<<(std::ostream& os, const OpFeatureFlags& rhs);
-
-enum SelectInstructionKind {
-  kSelectNone,
-  kSelectConst,
-  kSelectMove,
-  kSelectGoto
-};
-std::ostream& operator<<(std::ostream& os, const SelectInstructionKind& kind);
-
-// LIR fixup kinds for Arm and X86.
-enum FixupKind {
-  kFixupNone,
-  kFixupLabel,             // For labels we just adjust the offset.
-  kFixupLoad,              // Mostly for immediates.
-  kFixupVLoad,             // FP load which *may* be pc-relative.
-  kFixupCBxZ,              // Cbz, Cbnz.
-  kFixupTBxZ,              // Tbz, Tbnz.
-  kFixupCondBranch,        // Conditional branch
-  kFixupT1Branch,          // Thumb1 Unconditional branch
-  kFixupT2Branch,          // Thumb2 Unconditional branch
-  kFixupBlx1,              // Blx1 (start of Blx1/Blx2 pair).
-  kFixupBl1,               // Bl1 (start of Bl1/Bl2 pair).
-  kFixupAdr,               // Adr.
-  kFixupMovImmLST,         // kThumb2MovImm16LST.
-  kFixupMovImmHST,         // kThumb2MovImm16HST.
-  kFixupAlign4,            // Align to 4-byte boundary.
-  kFixupA53Erratum835769,  // Cortex A53 Erratum 835769.
-  kFixupSwitchTable,       // X86_64 packed switch table.
-};
-std::ostream& operator<<(std::ostream& os, const FixupKind& kind);
-
-enum VolatileKind {
-  kNotVolatile,      // Load/Store is not volatile
-  kVolatile          // Load/Store is volatile
-};
-std::ostream& operator<<(std::ostream& os, const VolatileKind& kind);
-
-enum WideKind {
-  kNotWide,      // Non-wide view
-  kWide,         // Wide view
-  kRef           // Ref width
-};
-std::ostream& operator<<(std::ostream& os, const WideKind& kind);
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_COMPILER_ENUMS_H_
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index fbe403f..50dc032 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -21,7 +21,6 @@
 #include "base/mutex.h"
 #include "base/macros.h"
 #include "safe_map.h"
-#include "dex/compiler_enums.h"
 #include "dex_file.h"
 #include "quick/inline_method_analyser.h"
 
@@ -31,6 +30,13 @@
 class MethodVerifier;
 }  // namespace verifier
 
+enum OpSize {
+  k32,
+  k64,
+  kSignedHalf,
+  kSignedByte,
+};
+
 /**
  * Handles inlining of methods from a particular DexFile.
  *
diff --git a/compiler/linker/mips/relative_patcher_mips.cc b/compiler/linker/mips/relative_patcher_mips.cc
index 7c0423b..c09950c 100644
--- a/compiler/linker/mips/relative_patcher_mips.cc
+++ b/compiler/linker/mips/relative_patcher_mips.cc
@@ -49,6 +49,7 @@
                                                    uint32_t target_offset) {
   uint32_t anchor_literal_offset = patch.PcInsnOffset();
   uint32_t literal_offset = patch.LiteralOffset();
+  bool dex_cache_array = (patch.GetType() == LinkerPatch::Type::kDexCacheArray);
 
   // Basic sanity checks.
   if (is_r6) {
@@ -68,12 +69,16 @@
     DCHECK_GE(code->size(), 16u);
     DCHECK_LE(literal_offset, code->size() - 12u);
     DCHECK_GE(literal_offset, 4u);
-    DCHECK_EQ(literal_offset + 4u, anchor_literal_offset);
-    // NAL
-    DCHECK_EQ((*code)[literal_offset - 4], 0x00);
-    DCHECK_EQ((*code)[literal_offset - 3], 0x00);
-    DCHECK_EQ((*code)[literal_offset - 2], 0x10);
-    DCHECK_EQ((*code)[literal_offset - 1], 0x04);
+    // The NAL instruction may not precede immediately as the PC+0 value may
+    // come from HMipsComputeBaseMethodAddress.
+    if (dex_cache_array) {
+      DCHECK_EQ(literal_offset + 4u, anchor_literal_offset);
+      // NAL
+      DCHECK_EQ((*code)[literal_offset - 4], 0x00);
+      DCHECK_EQ((*code)[literal_offset - 3], 0x00);
+      DCHECK_EQ((*code)[literal_offset - 2], 0x10);
+      DCHECK_EQ((*code)[literal_offset - 1], 0x04);
+    }
     // LUI reg, offset_high
     DCHECK_EQ((*code)[literal_offset + 0], 0x34);
     DCHECK_EQ((*code)[literal_offset + 1], 0x12);
@@ -83,16 +88,22 @@
     DCHECK_EQ((*code)[literal_offset + 4], 0x78);
     DCHECK_EQ((*code)[literal_offset + 5], 0x56);
     DCHECK_EQ(((*code)[literal_offset + 7] & 0xFC), 0x34);
-    // ADDU reg, reg, RA
+    // ADDU reg, reg, reg2
     DCHECK_EQ((*code)[literal_offset + 8], 0x21);
     DCHECK_EQ(((*code)[literal_offset + 9] & 0x07), 0x00);
-    DCHECK_EQ(((*code)[literal_offset + 10] & 0x1F), 0x1F);
+    if (dex_cache_array) {
+      // reg2 is either RA or from HMipsComputeBaseMethodAddress.
+      DCHECK_EQ(((*code)[literal_offset + 10] & 0x1F), 0x1F);
+    }
     DCHECK_EQ(((*code)[literal_offset + 11] & 0xFC), 0x00);
   }
 
   // Apply patch.
   uint32_t anchor_offset = patch_offset - literal_offset + anchor_literal_offset;
-  uint32_t diff = target_offset - anchor_offset + kDexCacheArrayLwOffset;
+  uint32_t diff = target_offset - anchor_offset;
+  if (dex_cache_array) {
+    diff += kDexCacheArrayLwOffset;
+  }
   if (is_r6) {
     diff += (diff & 0x8000) << 1;  // Account for sign extension in ADDIU.
   }
diff --git a/compiler/linker/mips/relative_patcher_mips32r6_test.cc b/compiler/linker/mips/relative_patcher_mips32r6_test.cc
index 0f1dcbc..a16aaca 100644
--- a/compiler/linker/mips/relative_patcher_mips32r6_test.cc
+++ b/compiler/linker/mips/relative_patcher_mips32r6_test.cc
@@ -29,40 +29,78 @@
   Mips32r6RelativePatcherTest() : RelativePatcherTest(kMips, "mips32r6") {}
 
  protected:
+  static const uint8_t UnpatchedPcRelativeRawCode[];
+  static const uint32_t LiteralOffset;
+  static const uint32_t AnchorOffset;
+  static const ArrayRef<const uint8_t> UnpatchedPcRelativeCode;
+
   uint32_t GetMethodOffset(uint32_t method_idx) {
     auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
     CHECK(result.first);
     return result.second;
   }
+
+  void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset);
+  void TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset);
+  void TestStringReference(uint32_t string_offset);
 };
 
-TEST_F(Mips32r6RelativePatcherTest, DexCacheReference) {
-  dex_cache_arrays_begin_ = 0x12345678;
-  constexpr size_t kElementOffset = 0x1234;
-  static const uint8_t raw_code[] = {
-      0x34, 0x12, 0x5E, 0xEE,  // auipc s2, high(diff); placeholder = 0x1234
-      0x78, 0x56, 0x52, 0x26,  // addiu s2, s2, low(diff); placeholder = 0x5678
-  };
-  constexpr uint32_t literal_offset = 0;  // At auipc (where patching starts).
-  constexpr uint32_t anchor_offset = literal_offset;  // At auipc (where PC+0 points).
-  ArrayRef<const uint8_t> code(raw_code);
-  LinkerPatch patches[] = {
-      LinkerPatch::DexCacheArrayPatch(literal_offset, nullptr, anchor_offset, kElementOffset),
-  };
-  AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches));
+const uint8_t Mips32r6RelativePatcherTest::UnpatchedPcRelativeRawCode[] = {
+    0x34, 0x12, 0x5E, 0xEE,  // auipc s2, high(diff); placeholder = 0x1234
+    0x78, 0x56, 0x52, 0x26,  // addiu s2, s2, low(diff); placeholder = 0x5678
+};
+const uint32_t Mips32r6RelativePatcherTest::LiteralOffset = 0;  // At auipc (where patching starts).
+const uint32_t Mips32r6RelativePatcherTest::AnchorOffset = 0;  // At auipc (where PC+0 points).
+const ArrayRef<const uint8_t> Mips32r6RelativePatcherTest::UnpatchedPcRelativeCode(
+    UnpatchedPcRelativeRawCode);
+
+void Mips32r6RelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches,
+                                                       uint32_t target_offset) {
+  AddCompiledMethod(MethodRef(1u), UnpatchedPcRelativeCode, ArrayRef<const LinkerPatch>(patches));
   Link();
 
   auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
   ASSERT_TRUE(result.first);
-  uint32_t diff = dex_cache_arrays_begin_ + kElementOffset - (result.second + anchor_offset) +
-      kDexCacheArrayLwOffset;
+
+  uint32_t diff = target_offset - (result.second + AnchorOffset);
+  if (patches[0].GetType() == LinkerPatch::Type::kDexCacheArray) {
+    diff += kDexCacheArrayLwOffset;
+  }
   diff += (diff & 0x8000) << 1;  // Account for sign extension in addiu.
-  static const uint8_t expected_code[] = {
+
+  const uint8_t expected_code[] = {
       static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24), 0x5E, 0xEE,
       static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x26,
   };
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
 }
 
+void Mips32r6RelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_begin,
+                                                        uint32_t element_offset) {
+  dex_cache_arrays_begin_ = dex_cache_arrays_begin;
+  LinkerPatch patches[] = {
+      LinkerPatch::DexCacheArrayPatch(LiteralOffset, nullptr, AnchorOffset, element_offset)
+  };
+  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches),
+                       dex_cache_arrays_begin_ + element_offset);
+}
+
+void Mips32r6RelativePatcherTest::TestStringReference(uint32_t string_offset) {
+  constexpr uint32_t kStringIndex = 1u;
+  string_index_to_offset_map_.Put(kStringIndex, string_offset);
+  LinkerPatch patches[] = {
+      LinkerPatch::RelativeStringPatch(LiteralOffset, nullptr, AnchorOffset, kStringIndex)
+  };
+  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), string_offset);
+}
+
+TEST_F(Mips32r6RelativePatcherTest, DexCacheReference) {
+  TestDexCacheReference(/* dex_cache_arrays_begin */ 0x12345678, /* element_offset */ 0x1234);
+}
+
+TEST_F(Mips32r6RelativePatcherTest, StringReference) {
+  TestStringReference(/* string_offset*/ 0x87651234);
+}
+
 }  // namespace linker
 }  // namespace art
diff --git a/compiler/linker/mips/relative_patcher_mips_test.cc b/compiler/linker/mips/relative_patcher_mips_test.cc
index 8391b53..335ce2e 100644
--- a/compiler/linker/mips/relative_patcher_mips_test.cc
+++ b/compiler/linker/mips/relative_patcher_mips_test.cc
@@ -29,36 +29,47 @@
   MipsRelativePatcherTest() : RelativePatcherTest(kMips, "mips32r2") {}
 
  protected:
+  static const uint8_t UnpatchedPcRelativeRawCode[];
+  static const uint32_t LiteralOffset;
+  static const uint32_t AnchorOffset;
+  static const ArrayRef<const uint8_t> UnpatchedPcRelativeCode;
+
   uint32_t GetMethodOffset(uint32_t method_idx) {
     auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
     CHECK(result.first);
     return result.second;
   }
+
+  void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset);
+  void TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset);
+  void TestStringReference(uint32_t string_offset);
 };
 
-TEST_F(MipsRelativePatcherTest, DexCacheReference) {
-  dex_cache_arrays_begin_ = 0x12345678;
-  constexpr size_t kElementOffset = 0x1234;
-  static const uint8_t raw_code[] = {
-      0x00, 0x00, 0x10, 0x04,  // nal
-      0x34, 0x12, 0x12, 0x3C,  // lui  s2, high(diff); placeholder = 0x1234
-      0x78, 0x56, 0x52, 0x36,  // ori  s2, s2, low(diff); placeholder = 0x5678
-      0x21, 0x90, 0x5F, 0x02,  // addu s2, s2, ra
-  };
-  constexpr uint32_t literal_offset = 4;  // At lui (where patching starts).
-  constexpr uint32_t anchor_offset = 8;  // At ori (where PC+0 points).
-  ArrayRef<const uint8_t> code(raw_code);
-  LinkerPatch patches[] = {
-      LinkerPatch::DexCacheArrayPatch(literal_offset, nullptr, anchor_offset, kElementOffset),
-  };
-  AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches));
+const uint8_t MipsRelativePatcherTest::UnpatchedPcRelativeRawCode[] = {
+    0x00, 0x00, 0x10, 0x04,  // nal
+    0x34, 0x12, 0x12, 0x3C,  // lui  s2, high(diff); placeholder = 0x1234
+    0x78, 0x56, 0x52, 0x36,  // ori  s2, s2, low(diff); placeholder = 0x5678
+    0x21, 0x90, 0x5F, 0x02,  // addu s2, s2, ra
+};
+const uint32_t MipsRelativePatcherTest::LiteralOffset = 4;  // At lui (where patching starts).
+const uint32_t MipsRelativePatcherTest::AnchorOffset = 8;  // At ori (where PC+0 points).
+const ArrayRef<const uint8_t> MipsRelativePatcherTest::UnpatchedPcRelativeCode(
+    UnpatchedPcRelativeRawCode);
+
+void MipsRelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches,
+                                                   uint32_t target_offset) {
+  AddCompiledMethod(MethodRef(1u), UnpatchedPcRelativeCode, ArrayRef<const LinkerPatch>(patches));
   Link();
 
   auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
   ASSERT_TRUE(result.first);
-  uint32_t diff = dex_cache_arrays_begin_ + kElementOffset - (result.second + anchor_offset) +
-      kDexCacheArrayLwOffset;
-  static const uint8_t expected_code[] = {
+
+  uint32_t diff = target_offset - (result.second + AnchorOffset);
+  if (patches[0].GetType() == LinkerPatch::Type::kDexCacheArray) {
+    diff += kDexCacheArrayLwOffset;
+  }
+
+  const uint8_t expected_code[] = {
       0x00, 0x00, 0x10, 0x04,
       static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24), 0x12, 0x3C,
       static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x36,
@@ -67,5 +78,32 @@
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
 }
 
+void MipsRelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_begin,
+                                                    uint32_t element_offset) {
+  dex_cache_arrays_begin_ = dex_cache_arrays_begin;
+  LinkerPatch patches[] = {
+      LinkerPatch::DexCacheArrayPatch(LiteralOffset, nullptr, AnchorOffset, element_offset)
+  };
+  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches),
+                       dex_cache_arrays_begin_ + element_offset);
+}
+
+void MipsRelativePatcherTest::TestStringReference(uint32_t string_offset) {
+  constexpr uint32_t kStringIndex = 1u;
+  string_index_to_offset_map_.Put(kStringIndex, string_offset);
+  LinkerPatch patches[] = {
+      LinkerPatch::RelativeStringPatch(LiteralOffset, nullptr, AnchorOffset, kStringIndex)
+  };
+  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), string_offset);
+}
+
+TEST_F(MipsRelativePatcherTest, DexCacheReference) {
+  TestDexCacheReference(/* dex_cache_arrays_begin */ 0x12345678, /* element_offset */ 0x1234);
+}
+
+TEST_F(MipsRelativePatcherTest, StringReference) {
+  TestStringReference(/* string_offset*/ 0x87651234);
+}
+
 }  // namespace linker
 }  // namespace art
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 18ebfeb..0762eec 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -444,7 +444,7 @@
   EXPECT_EQ(72U, sizeof(OatHeader));
   EXPECT_EQ(4U, sizeof(OatMethodOffsets));
   EXPECT_EQ(20U, sizeof(OatQuickMethodHeader));
-  EXPECT_EQ(164 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
+  EXPECT_EQ(162 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
 }
 
 TEST_F(OatTest, OatHeaderIsValid) {
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 2042ade..62dd1cc 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -359,7 +359,8 @@
   // Return the entry point offset for ReadBarrierMarkRegX, where X is `reg`.
   template <size_t pointer_size>
   static int32_t GetReadBarrierMarkEntryPointsOffset(size_t reg) {
-    DCHECK_LT(reg, 32u);
+    // The entry point list defines 30 ReadBarrierMarkRegX entry points.
+    DCHECK_LT(reg, 30u);
     // The ReadBarrierMarkRegX entry points are ordered by increasing
     // register number in Thread::tls_Ptr_.quick_entrypoints.
     return QUICK_ENTRYPOINT_OFFSET(pointer_size, pReadBarrierMarkReg00).Int32Value()
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 05cb8d1..a07a233 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -18,7 +18,6 @@
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_H_
 
 #include "code_generator.h"
-#include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
 #include "string_reference.h"
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 88e8cea..03f5a33 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -20,7 +20,6 @@
 #include "arch/arm64/quick_method_frame_info_arm64.h"
 #include "code_generator.h"
 #include "common_arm64.h"
-#include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 39248aa..334d30d 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -482,11 +482,22 @@
       move_resolver_(graph->GetArena(), this),
       assembler_(graph->GetArena(), &isa_features),
       isa_features_(isa_features),
+      uint32_literals_(std::less<uint32_t>(),
+                       graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       method_patches_(MethodReferenceComparator(),
                       graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       call_patches_(MethodReferenceComparator(),
                     graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+      pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_string_patches_(StringReferenceValueComparator(),
+                                 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_type_patches_(TypeReferenceValueComparator(),
+                               graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_address_patches_(std::less<uint32_t>(),
+                                  graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      clobbered_ra_(false) {
   // Save RA (containing the return address) to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(RA));
 }
@@ -688,6 +699,16 @@
   if ((fpu_spill_mask_ != 0) && (POPCOUNT(core_spill_mask_) % 2 != 0)) {
     core_spill_mask_ |= (1 << ZERO);
   }
+  // If RA is clobbered by PC-relative operations on R2 and it's the only spilled register
+  // (this can happen in leaf methods), artificially spill the ZERO register in order to
+  // force explicit saving and restoring of RA. RA isn't saved/restored when it's the only
+  // spilled register.
+  // TODO: Can this be improved? It causes creation of a stack frame (while RA might be
+  // saved in an unused temporary register) and saving of RA and the current method pointer
+  // in the frame.
+  if (clobbered_ra_ && core_spill_mask_ == (1u << RA) && fpu_spill_mask_ == 0) {
+    core_spill_mask_ |= (1 << ZERO);
+  }
 }
 
 static dwarf::Reg DWARFReg(Register reg) {
@@ -962,7 +983,12 @@
   size_t size =
       method_patches_.size() +
       call_patches_.size() +
-      pc_relative_dex_cache_patches_.size();
+      pc_relative_dex_cache_patches_.size() +
+      pc_relative_string_patches_.size() +
+      pc_relative_type_patches_.size() +
+      boot_image_string_patches_.size() +
+      boot_image_type_patches_.size() +
+      boot_image_address_patches_.size();
   linker_patches->reserve(size);
   for (const auto& entry : method_patches_) {
     const MethodReference& target_method = entry.first;
@@ -994,6 +1020,71 @@
                                                               pc_rel_offset,
                                                               base_element_offset));
   }
+  for (const PcRelativePatchInfo& info : pc_relative_string_patches_) {
+    const DexFile& dex_file = info.target_dex_file;
+    size_t string_index = info.offset_or_index;
+    DCHECK(info.high_label.IsBound());
+    uint32_t high_offset = __ GetLabelLocation(&info.high_label);
+    // On R2 we use HMipsComputeBaseMethodAddress and patch relative to
+    // the assembler's base label used for PC-relative literals.
+    uint32_t pc_rel_offset = info.pc_rel_label.IsBound()
+        ? __ GetLabelLocation(&info.pc_rel_label)
+        : __ GetPcRelBaseLabelLocation();
+    linker_patches->push_back(LinkerPatch::RelativeStringPatch(high_offset,
+                                                               &dex_file,
+                                                               pc_rel_offset,
+                                                               string_index));
+  }
+  for (const PcRelativePatchInfo& info : pc_relative_type_patches_) {
+    const DexFile& dex_file = info.target_dex_file;
+    size_t type_index = info.offset_or_index;
+    DCHECK(info.high_label.IsBound());
+    uint32_t high_offset = __ GetLabelLocation(&info.high_label);
+    // On R2 we use HMipsComputeBaseMethodAddress and patch relative to
+    // the assembler's base label used for PC-relative literals.
+    uint32_t pc_rel_offset = info.pc_rel_label.IsBound()
+        ? __ GetLabelLocation(&info.pc_rel_label)
+        : __ GetPcRelBaseLabelLocation();
+    linker_patches->push_back(LinkerPatch::RelativeTypePatch(high_offset,
+                                                             &dex_file,
+                                                             pc_rel_offset,
+                                                             type_index));
+  }
+  for (const auto& entry : boot_image_string_patches_) {
+    const StringReference& target_string = entry.first;
+    Literal* literal = entry.second;
+    DCHECK(literal->GetLabel()->IsBound());
+    uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
+    linker_patches->push_back(LinkerPatch::StringPatch(literal_offset,
+                                                       target_string.dex_file,
+                                                       target_string.string_index));
+  }
+  for (const auto& entry : boot_image_type_patches_) {
+    const TypeReference& target_type = entry.first;
+    Literal* literal = entry.second;
+    DCHECK(literal->GetLabel()->IsBound());
+    uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
+    linker_patches->push_back(LinkerPatch::TypePatch(literal_offset,
+                                                     target_type.dex_file,
+                                                     target_type.type_index));
+  }
+  for (const auto& entry : boot_image_address_patches_) {
+    DCHECK(GetCompilerOptions().GetIncludePatchInformation());
+    Literal* literal = entry.second;
+    DCHECK(literal->GetLabel()->IsBound());
+    uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
+    linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
+  }
+}
+
+CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeStringPatch(
+    const DexFile& dex_file, uint32_t string_index) {
+  return NewPcRelativePatch(dex_file, string_index, &pc_relative_string_patches_);
+}
+
+CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeTypePatch(
+    const DexFile& dex_file, uint32_t type_index) {
+  return NewPcRelativePatch(dex_file, type_index, &pc_relative_type_patches_);
 }
 
 CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeDexCacheArrayPatch(
@@ -1007,6 +1098,12 @@
   return &patches->back();
 }
 
+Literal* CodeGeneratorMIPS::DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map) {
+  return map->GetOrCreate(
+      value,
+      [this, value]() { return __ NewLiteral<uint32_t>(value); });
+}
+
 Literal* CodeGeneratorMIPS::DeduplicateMethodLiteral(MethodReference target_method,
                                                      MethodToLiteralMap* map) {
   return map->GetOrCreate(
@@ -1022,6 +1119,26 @@
   return DeduplicateMethodLiteral(target_method, &call_patches_);
 }
 
+Literal* CodeGeneratorMIPS::DeduplicateBootImageStringLiteral(const DexFile& dex_file,
+                                                              uint32_t string_index) {
+  return boot_image_string_patches_.GetOrCreate(
+      StringReference(&dex_file, string_index),
+      [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
+}
+
+Literal* CodeGeneratorMIPS::DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
+                                                            uint32_t type_index) {
+  return boot_image_type_patches_.GetOrCreate(
+      TypeReference(&dex_file, type_index),
+      [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
+}
+
+Literal* CodeGeneratorMIPS::DeduplicateBootImageAddressLiteral(uint32_t address) {
+  bool needs_patch = GetCompilerOptions().GetIncludePatchInformation();
+  Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_;
+  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
+}
+
 void CodeGeneratorMIPS::MarkGCCard(Register object, Register value) {
   MipsLabel done;
   Register card = AT;
@@ -1067,6 +1184,15 @@
     blocked_fpu_registers_[i] = true;
   }
 
+  if (GetGraph()->IsDebuggable()) {
+    // Stubs do not save callee-save floating point registers. If the graph
+    // is debuggable, we need to deal with these registers differently. For
+    // now, just block them.
+    for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
+      blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
+    }
+  }
+
   UpdateBlockedPairRegisters();
 }
 
@@ -3440,7 +3566,8 @@
     if (field_type == Primitive::kPrimLong) {
       locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimLong));
     } else {
-      locations->SetOut(Location::RequiresFpuRegister());
+      // Use Location::Any() to prevent situations when running out of available fp registers.
+      locations->SetOut(Location::Any());
       // Need some temp core regs since FP results are returned in core registers
       Location reg = calling_convention.GetReturnLocation(Primitive::kPrimLong);
       locations->AddTemp(Location::RegisterLocation(reg.AsRegisterPairLow<Register>()));
@@ -3505,11 +3632,23 @@
                             IsDirectEntrypoint(kQuickA64Load));
     CheckEntrypointTypes<kQuickA64Load, int64_t, volatile const int64_t*>();
     if (type == Primitive::kPrimDouble) {
-      // Need to move to FP regs since FP results are returned in core registers.
-      __ Mtc1(locations->GetTemp(1).AsRegister<Register>(),
-              locations->Out().AsFpuRegister<FRegister>());
-      __ MoveToFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
-                       locations->Out().AsFpuRegister<FRegister>());
+      // FP results are returned in core registers. Need to move them.
+      Location out = locations->Out();
+      if (out.IsFpuRegister()) {
+        __ Mtc1(locations->GetTemp(1).AsRegister<Register>(), out.AsFpuRegister<FRegister>());
+        __ MoveToFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
+                         out.AsFpuRegister<FRegister>());
+      } else {
+        DCHECK(out.IsDoubleStackSlot());
+        __ StoreToOffset(kStoreWord,
+                         locations->GetTemp(1).AsRegister<Register>(),
+                         SP,
+                         out.GetStackIndex());
+        __ StoreToOffset(kStoreWord,
+                         locations->GetTemp(2).AsRegister<Register>(),
+                         SP,
+                         out.GetStackIndex() + 4);
+      }
     }
   } else {
     if (!Primitive::IsFloatingPointType(type)) {
@@ -3568,7 +3707,8 @@
       locations->SetInAt(1, Location::RegisterPairLocation(
           calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
     } else {
-      locations->SetInAt(1, Location::RequiresFpuRegister());
+      // Use Location::Any() to prevent situations when running out of available fp registers.
+      locations->SetInAt(1, Location::Any());
       // Pass FP parameters in core registers.
       locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
       locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
@@ -3627,10 +3767,28 @@
     codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
     if (type == Primitive::kPrimDouble) {
       // Pass FP parameters in core registers.
-      __ Mfc1(locations->GetTemp(1).AsRegister<Register>(),
-              locations->InAt(1).AsFpuRegister<FRegister>());
-      __ MoveFromFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
-                         locations->InAt(1).AsFpuRegister<FRegister>());
+      Location in = locations->InAt(1);
+      if (in.IsFpuRegister()) {
+        __ Mfc1(locations->GetTemp(1).AsRegister<Register>(), in.AsFpuRegister<FRegister>());
+        __ MoveFromFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
+                           in.AsFpuRegister<FRegister>());
+      } else if (in.IsDoubleStackSlot()) {
+        __ LoadFromOffset(kLoadWord,
+                          locations->GetTemp(1).AsRegister<Register>(),
+                          SP,
+                          in.GetStackIndex());
+        __ LoadFromOffset(kLoadWord,
+                          locations->GetTemp(2).AsRegister<Register>(),
+                          SP,
+                          in.GetStackIndex() + 4);
+      } else {
+        DCHECK(in.IsConstant());
+        DCHECK(in.GetConstant()->IsDoubleConstant());
+        int64_t value = bit_cast<int64_t, double>(in.GetConstant()->AsDoubleConstant()->GetValue());
+        __ LoadConst64(locations->GetTemp(2).AsRegister<Register>(),
+                       locations->GetTemp(1).AsRegister<Register>(),
+                       value);
+      }
     }
     codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pA64Store),
                             instruction,
@@ -3696,6 +3854,23 @@
   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetDexPc());
 }
 
+void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad(
+    HInstruction* instruction ATTRIBUTE_UNUSED,
+    Location root,
+    Register obj,
+    uint32_t offset) {
+  Register root_reg = root.AsRegister<Register>();
+  if (kEmitCompilerReadBarrier) {
+    UNIMPLEMENTED(FATAL) << "for read barrier";
+  } else {
+    // Plain GC root load with no read barrier.
+    // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+    __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
+    // Note that GC roots are not affected by heap poisoning, thus we
+    // do not have to unpoison `root_reg` here.
+  }
+}
+
 void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind =
       instruction->IsExactCheck() ? LocationSummary::kNoCall : LocationSummary::kCallOnSlowPath;
@@ -3861,16 +4036,80 @@
 }
 
 HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind(
-    HLoadString::LoadKind desired_string_load_kind ATTRIBUTE_UNUSED) {
-  // TODO: Implement other kinds.
-  return HLoadString::LoadKind::kDexCacheViaMethod;
+    HLoadString::LoadKind desired_string_load_kind) {
+  if (kEmitCompilerReadBarrier) {
+    UNIMPLEMENTED(FATAL) << "for read barrier";
+  }
+  // We disable PC-relative load when there is an irreducible loop, as the optimization
+  // is incompatible with it.
+  bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops();
+  bool fallback_load = has_irreducible_loops;
+  switch (desired_string_load_kind) {
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadString::LoadKind::kBootImageAddress:
+      break;
+    case HLoadString::LoadKind::kDexCacheAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      fallback_load = false;
+      break;
+    case HLoadString::LoadKind::kDexCachePcRelative:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods
+      // with irreducible loops.
+      break;
+    case HLoadString::LoadKind::kDexCacheViaMethod:
+      fallback_load = false;
+      break;
+  }
+  if (fallback_load) {
+    desired_string_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
+  }
+  return desired_string_load_kind;
 }
 
 HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind(
     HLoadClass::LoadKind desired_class_load_kind) {
-  DCHECK_NE(desired_class_load_kind, HLoadClass::LoadKind::kReferrersClass);
-  // TODO: Implement other kinds.
-  return HLoadClass::LoadKind::kDexCacheViaMethod;
+  if (kEmitCompilerReadBarrier) {
+    UNIMPLEMENTED(FATAL) << "for read barrier";
+  }
+  // We disable pc-relative load when there is an irreducible loop, as the optimization
+  // is incompatible with it.
+  bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops();
+  bool fallback_load = has_irreducible_loops;
+  switch (desired_class_load_kind) {
+    case HLoadClass::LoadKind::kReferrersClass:
+      fallback_load = false;
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadClass::LoadKind::kBootImageAddress:
+      break;
+    case HLoadClass::LoadKind::kDexCacheAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      fallback_load = false;
+      break;
+    case HLoadClass::LoadKind::kDexCachePcRelative:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods
+      // with irreducible loops.
+      break;
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      fallback_load = false;
+      break;
+  }
+  if (fallback_load) {
+    desired_class_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
+  }
+  return desired_class_load_kind;
 }
 
 Register CodeGeneratorMIPS::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
@@ -4107,11 +4346,40 @@
 }
 
 void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) {
-  InvokeRuntimeCallingConvention calling_convention;
-  CodeGenerator::CreateLoadClassLocationSummary(
-      cls,
-      Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-      Location::RegisterLocation(V0));
+  if (cls->NeedsAccessCheck()) {
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGenerator::CreateLoadClassLocationSummary(
+        cls,
+        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+        Location::RegisterLocation(V0),
+        /* code_generator_supports_read_barrier */ false);  // TODO: revisit this bool.
+    return;
+  }
+
+  LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  switch (load_kind) {
+    // We need an extra register for PC-relative literals on R2.
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+    case HLoadClass::LoadKind::kBootImageAddress:
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      if (codegen_->GetInstructionSetFeatures().IsR6()) {
+        break;
+      }
+      FALLTHROUGH_INTENDED;
+    // We need an extra register for PC-relative dex cache accesses.
+    case HLoadClass::LoadKind::kDexCachePcRelative:
+    case HLoadClass::LoadKind::kReferrersClass:
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      locations->SetInAt(0, Location::RequiresRegister());
+      break;
+    default:
+      break;
+  }
+  locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) {
@@ -4127,34 +4395,126 @@
     return;
   }
 
-  Register out = locations->Out().AsRegister<Register>();
-  Register current_method = locations->InAt(0).AsRegister<Register>();
-  if (cls->IsReferrersClass()) {
-    DCHECK(!cls->CanCallRuntime());
-    DCHECK(!cls->MustGenerateClinitCheck());
-    __ LoadFromOffset(kLoadWord, out, current_method,
-                      ArtMethod::DeclaringClassOffset().Int32Value());
-  } else {
-    __ LoadFromOffset(kLoadWord, out, current_method,
-                      ArtMethod::DexCacheResolvedTypesOffset(kMipsPointerSize).Int32Value());
-    __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  Location out_loc = locations->Out();
+  Register out = out_loc.AsRegister<Register>();
+  Register base_or_current_method_reg;
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+  switch (load_kind) {
+    // We need an extra register for PC-relative literals on R2.
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+    case HLoadClass::LoadKind::kBootImageAddress:
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>();
+      break;
+    // We need an extra register for PC-relative dex cache accesses.
+    case HLoadClass::LoadKind::kDexCachePcRelative:
+    case HLoadClass::LoadKind::kReferrersClass:
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      base_or_current_method_reg = locations->InAt(0).AsRegister<Register>();
+      break;
+    default:
+      base_or_current_method_reg = ZERO;
+      break;
+  }
 
-    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
-      DCHECK(cls->CanCallRuntime());
-      SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS(
-          cls,
-          cls,
-          cls->GetDexPc(),
-          cls->MustGenerateClinitCheck());
-      codegen_->AddSlowPath(slow_path);
-      if (!cls->IsInDexCache()) {
-        __ Beqz(out, slow_path->GetEntryLabel());
-      }
-      if (cls->MustGenerateClinitCheck()) {
-        GenerateClassInitializationCheck(slow_path, out);
+  bool generate_null_check = false;
+  switch (load_kind) {
+    case HLoadClass::LoadKind::kReferrersClass: {
+      DCHECK(!cls->CanCallRuntime());
+      DCHECK(!cls->MustGenerateClinitCheck());
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      GenerateGcRootFieldLoad(cls,
+                              out_loc,
+                              base_or_current_method_reg,
+                              ArtMethod::DeclaringClassOffset().Int32Value());
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!kEmitCompilerReadBarrier);
+      __ LoadLiteral(out,
+                     base_or_current_method_reg,
+                     codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
+                                                               cls->GetTypeIndex()));
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      CodeGeneratorMIPS::PcRelativePatchInfo* info =
+          codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
+      if (isR6) {
+        __ Bind(&info->high_label);
+        __ Bind(&info->pc_rel_label);
+        // Add a 32-bit offset to PC.
+        __ Auipc(out, /* placeholder */ 0x1234);
+        __ Addiu(out, out, /* placeholder */ 0x5678);
       } else {
-        __ Bind(slow_path->GetExitLabel());
+        __ Bind(&info->high_label);
+        __ Lui(out, /* placeholder */ 0x1234);
+        // We do not bind info->pc_rel_label here, we'll use the assembler's label
+        // for PC-relative literals and the base from HMipsComputeBaseMethodAddress.
+        __ Ori(out, out, /* placeholder */ 0x5678);
+        // Add a 32-bit offset to PC.
+        __ Addu(out, out, base_or_current_method_reg);
       }
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK_NE(cls->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
+      __ LoadLiteral(out,
+                     base_or_current_method_reg,
+                     codegen_->DeduplicateBootImageAddressLiteral(address));
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheAddress: {
+      DCHECK_NE(cls->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
+      static_assert(sizeof(GcRoot<mirror::Class>) == 4u, "Expected GC root to be 4 bytes.");
+      DCHECK_ALIGNED(cls->GetAddress(), 4u);
+      int16_t offset = Low16Bits(address);
+      uint32_t base_address = address - offset;  // This accounts for offset sign extension.
+      __ Lui(out, High16Bits(base_address));
+      // /* GcRoot<mirror::Class> */ out = *(base_address + offset)
+      GenerateGcRootFieldLoad(cls, out_loc, out, offset);
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCachePcRelative: {
+      HMipsDexCacheArraysBase* base = cls->InputAt(0)->AsMipsDexCacheArraysBase();
+      int32_t offset =
+          cls->GetDexCacheElementOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset;
+      // /* GcRoot<mirror::Class> */ out = *(dex_cache_arrays_base + offset)
+      GenerateGcRootFieldLoad(cls, out_loc, base_or_current_method_reg, offset);
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheViaMethod: {
+      // /* GcRoot<mirror::Class>[] */ out =
+      //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
+      __ LoadFromOffset(kLoadWord,
+                        out,
+                        base_or_current_method_reg,
+                        ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value());
+      // /* GcRoot<mirror::Class> */ out = out[type_index]
+      size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
+      GenerateGcRootFieldLoad(cls, out_loc, out, offset);
+      generate_null_check = !cls->IsInDexCache();
+    }
+  }
+
+  if (generate_null_check || cls->MustGenerateClinitCheck()) {
+    DCHECK(cls->CanCallRuntime());
+    SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS(
+        cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+    codegen_->AddSlowPath(slow_path);
+    if (generate_null_check) {
+      __ Beqz(out, slow_path->GetEntryLabel());
+    }
+    if (cls->MustGenerateClinitCheck()) {
+      GenerateClassInitializationCheck(slow_path, out);
+    } else {
+      __ Bind(slow_path->GetExitLabel());
     }
   }
 }
@@ -4183,21 +4543,132 @@
 }
 
 void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) {
-  LocationSummary::CallKind call_kind = load->NeedsEnvironment()
+  LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier)
       ? LocationSummary::kCallOnSlowPath
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
-  locations->SetInAt(0, Location::RequiresRegister());
+  HLoadString::LoadKind load_kind = load->GetLoadKind();
+  switch (load_kind) {
+    // We need an extra register for PC-relative literals on R2.
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+    case HLoadString::LoadKind::kBootImageAddress:
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+      if (codegen_->GetInstructionSetFeatures().IsR6()) {
+        break;
+      }
+      FALLTHROUGH_INTENDED;
+    // We need an extra register for PC-relative dex cache accesses.
+    case HLoadString::LoadKind::kDexCachePcRelative:
+    case HLoadString::LoadKind::kDexCacheViaMethod:
+      locations->SetInAt(0, Location::RequiresRegister());
+      break;
+    default:
+      break;
+  }
   locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) {
+  HLoadString::LoadKind load_kind = load->GetLoadKind();
   LocationSummary* locations = load->GetLocations();
-  Register out = locations->Out().AsRegister<Register>();
-  Register current_method = locations->InAt(0).AsRegister<Register>();
-  __ LoadFromOffset(kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
-  __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
-  __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
+  Location out_loc = locations->Out();
+  Register out = out_loc.AsRegister<Register>();
+  Register base_or_current_method_reg;
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+  switch (load_kind) {
+    // We need an extra register for PC-relative literals on R2.
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+    case HLoadString::LoadKind::kBootImageAddress:
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+      base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>();
+      break;
+    // We need an extra register for PC-relative dex cache accesses.
+    case HLoadString::LoadKind::kDexCachePcRelative:
+    case HLoadString::LoadKind::kDexCacheViaMethod:
+      base_or_current_method_reg = locations->InAt(0).AsRegister<Register>();
+      break;
+    default:
+      base_or_current_method_reg = ZERO;
+      break;
+  }
+
+  switch (load_kind) {
+    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!kEmitCompilerReadBarrier);
+      __ LoadLiteral(out,
+                     base_or_current_method_reg,
+                     codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(),
+                                                                 load->GetStringIndex()));
+      return;  // No dex cache slow path.
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      CodeGeneratorMIPS::PcRelativePatchInfo* info =
+          codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
+      if (isR6) {
+        __ Bind(&info->high_label);
+        __ Bind(&info->pc_rel_label);
+        // Add a 32-bit offset to PC.
+        __ Auipc(out, /* placeholder */ 0x1234);
+        __ Addiu(out, out, /* placeholder */ 0x5678);
+      } else {
+        __ Bind(&info->high_label);
+        __ Lui(out, /* placeholder */ 0x1234);
+        // We do not bind info->pc_rel_label here, we'll use the assembler's label
+        // for PC-relative literals and the base from HMipsComputeBaseMethodAddress.
+        __ Ori(out, out, /* placeholder */ 0x5678);
+        // Add a 32-bit offset to PC.
+        __ Addu(out, out, base_or_current_method_reg);
+      }
+      return;  // No dex cache slow path.
+    }
+    case HLoadString::LoadKind::kBootImageAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK_NE(load->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
+      __ LoadLiteral(out,
+                     base_or_current_method_reg,
+                     codegen_->DeduplicateBootImageAddressLiteral(address));
+      return;  // No dex cache slow path.
+    }
+    case HLoadString::LoadKind::kDexCacheAddress: {
+      DCHECK_NE(load->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
+      static_assert(sizeof(GcRoot<mirror::String>) == 4u, "Expected GC root to be 4 bytes.");
+      DCHECK_ALIGNED(load->GetAddress(), 4u);
+      int16_t offset = Low16Bits(address);
+      uint32_t base_address = address - offset;  // This accounts for offset sign extension.
+      __ Lui(out, High16Bits(base_address));
+      // /* GcRoot<mirror::String> */ out = *(base_address + offset)
+      GenerateGcRootFieldLoad(load, out_loc, out, offset);
+      break;
+    }
+    case HLoadString::LoadKind::kDexCachePcRelative: {
+      HMipsDexCacheArraysBase* base = load->InputAt(0)->AsMipsDexCacheArraysBase();
+      int32_t offset =
+          load->GetDexCacheElementOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset;
+      // /* GcRoot<mirror::String> */ out = *(dex_cache_arrays_base + offset)
+      GenerateGcRootFieldLoad(load, out_loc, base_or_current_method_reg, offset);
+      break;
+    }
+    case HLoadString::LoadKind::kDexCacheViaMethod: {
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      GenerateGcRootFieldLoad(load,
+                              out_loc,
+                              base_or_current_method_reg,
+                              ArtMethod::DeclaringClassOffset().Int32Value());
+      // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
+      __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
+      // /* GcRoot<mirror::String> */ out = out[string_index]
+      GenerateGcRootFieldLoad(load,
+                              out_loc,
+                              out,
+                              CodeGenerator::GetCacheOffset(load->GetStringIndex()));
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind();
+      UNREACHABLE();
+  }
 
   if (!load->IsInDexCache()) {
     SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
@@ -5327,6 +5798,7 @@
   __ Nal();
   // Grab the return address off RA.
   __ Move(reg, RA);
+  // TODO: Can we share this code with that of VisitMipsDexCacheArraysBase()?
 
   // Remember this offset (the obtained PC value) for later use with constant area.
   __ BindPcRelBaseLabel();
@@ -5357,6 +5829,7 @@
     __ Ori(reg, reg, /* placeholder */ 0x5678);
     // Add a 32-bit offset to PC.
     __ Addu(reg, reg, RA);
+    // TODO: Can we share this code with that of VisitMipsComputeBaseMethodAddress()?
   }
 }
 
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 08f74c0..63a0345 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -18,11 +18,12 @@
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS_H_
 
 #include "code_generator.h"
-#include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
+#include "string_reference.h"
 #include "utils/mips/assembler_mips.h"
+#include "utils/type_reference.h"
 
 namespace art {
 namespace mips {
@@ -226,6 +227,15 @@
   void HandleShift(HBinaryOperation* operation);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc);
+  // Generate a GC root reference load:
+  //
+  //   root <- *(obj + offset)
+  //
+  // while honoring read barriers (if any).
+  void GenerateGcRootFieldLoad(HInstruction* instruction,
+                               Location root,
+                               Register obj,
+                               uint32_t offset);
   void GenerateIntCompare(IfCondition cond, LocationSummary* locations);
   void GenerateIntCompareAndBranch(IfCondition cond,
                                    LocationSummary* locations,
@@ -298,6 +308,9 @@
   size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id);
   size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id);
   size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id);
+  void ClobberRA() {
+    clobbered_ra_ = true;
+  }
 
   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
@@ -383,7 +396,7 @@
     PcRelativePatchInfo(PcRelativePatchInfo&& other) = default;
 
     const DexFile& target_dex_file;
-    // Either the dex cache array element offset or the string index.
+    // Either the dex cache array element offset or the string/type index.
     uint32_t offset_or_index;
     // Label for the instruction loading the most significant half of the offset that's added to PC
     // to form the base address (the least significant half is loaded with the instruction that
@@ -393,14 +406,27 @@
     MipsLabel pc_rel_label;
   };
 
+  PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, uint32_t string_index);
+  PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, uint32_t type_index);
   PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
                                                        uint32_t element_offset);
+  Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file, uint32_t string_index);
+  Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, uint32_t type_index);
+  Literal* DeduplicateBootImageAddressLiteral(uint32_t address);
 
  private:
   Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
 
+  using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>;
   using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>;
+  using BootStringToLiteralMap = ArenaSafeMap<StringReference,
+                                              Literal*,
+                                              StringReferenceValueComparator>;
+  using BootTypeToLiteralMap = ArenaSafeMap<TypeReference,
+                                            Literal*,
+                                            TypeReferenceValueComparator>;
 
+  Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map);
   Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map);
   Literal* DeduplicateMethodAddressLiteral(MethodReference target_method);
   Literal* DeduplicateMethodCodeLiteral(MethodReference target_method);
@@ -417,11 +443,27 @@
   MipsAssembler assembler_;
   const MipsInstructionSetFeatures& isa_features_;
 
+  // Deduplication map for 32-bit literals, used for non-patchable boot image addresses.
+  Uint32ToLiteralMap uint32_literals_;
   // Method patch info, map MethodReference to a literal for method address and method code.
   MethodToLiteralMap method_patches_;
   MethodToLiteralMap call_patches_;
   // PC-relative patch info for each HMipsDexCacheArraysBase.
   ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
+  // Deduplication map for boot string literals for kBootImageLinkTimeAddress.
+  BootStringToLiteralMap boot_image_string_patches_;
+  // PC-relative String patch info.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+  // Deduplication map for boot type literals for kBootImageLinkTimeAddress.
+  BootTypeToLiteralMap boot_image_type_patches_;
+  // PC-relative type patch info.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
+  // Deduplication map for patchable boot image addresses.
+  Uint32ToLiteralMap boot_image_address_patches_;
+
+  // PC-relative loads on R2 clobber RA, which may need to be preserved explicitly in leaf methods.
+  // This is a flag set by pc_relative_fixups_mips and dex_cache_array_fixups_mips optimizations.
+  bool clobbered_ra_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorMIPS);
 };
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 4b462cc..197f86b 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -18,7 +18,6 @@
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS64_H_
 
 #include "code_generator.h"
-#include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 1290172..39ea7d5 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -19,7 +19,6 @@
 
 #include "arch/x86/instruction_set_features_x86.h"
 #include "code_generator.h"
-#include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index cf92d68..fbb78bc 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -19,7 +19,6 @@
 
 #include "arch/x86_64/instruction_set_features_x86_64.h"
 #include "code_generator.h"
-#include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.cc b/compiler/optimizing/dex_cache_array_fixups_mips.cc
index 0f42d9c..19bab08 100644
--- a/compiler/optimizing/dex_cache_array_fixups_mips.cc
+++ b/compiler/optimizing/dex_cache_array_fixups_mips.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "code_generator_mips.h"
 #include "dex_cache_array_fixups_mips.h"
 
 #include "base/arena_containers.h"
@@ -27,8 +28,9 @@
  */
 class DexCacheArrayFixupsVisitor : public HGraphVisitor {
  public:
-  explicit DexCacheArrayFixupsVisitor(HGraph* graph)
+  explicit DexCacheArrayFixupsVisitor(HGraph* graph, CodeGenerator* codegen)
       : HGraphVisitor(graph),
+        codegen_(down_cast<CodeGeneratorMIPS*>(codegen)),
         dex_cache_array_bases_(std::less<const DexFile*>(),
                                // Attribute memory use to code generator.
                                graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {}
@@ -41,9 +43,45 @@
       HMipsDexCacheArraysBase* base = entry.second;
       base->MoveBeforeFirstUserAndOutOfLoops();
     }
+    // Computing the dex cache base for PC-relative accesses will clobber RA with
+    // the NAL instruction on R2. Take a note of this before generating the method
+    // entry.
+    if (!dex_cache_array_bases_.empty() && !codegen_->GetInstructionSetFeatures().IsR6()) {
+      codegen_->ClobberRA();
+    }
   }
 
  private:
+  void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
+    // If this is a load with PC-relative access to the dex cache types array,
+    // we need to add the dex cache arrays base as the special input.
+    if (load_class->GetLoadKind() == HLoadClass::LoadKind::kDexCachePcRelative) {
+      // Initialize base for target dex file if needed.
+      const DexFile& dex_file = load_class->GetDexFile();
+      HMipsDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(dex_file);
+      // Update the element offset in base.
+      DexCacheArraysLayout layout(kMipsPointerSize, &dex_file);
+      base->UpdateElementOffset(layout.TypeOffset(load_class->GetTypeIndex()));
+      // Add the special argument base to the load.
+      load_class->AddSpecialInput(base);
+    }
+  }
+
+  void VisitLoadString(HLoadString* load_string) OVERRIDE {
+    // If this is a load with PC-relative access to the dex cache strings array,
+    // we need to add the dex cache arrays base as the special input.
+    if (load_string->GetLoadKind() == HLoadString::LoadKind::kDexCachePcRelative) {
+      // Initialize base for target dex file if needed.
+      const DexFile& dex_file = load_string->GetDexFile();
+      HMipsDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(dex_file);
+      // Update the element offset in base.
+      DexCacheArraysLayout layout(kMipsPointerSize, &dex_file);
+      base->UpdateElementOffset(layout.StringOffset(load_string->GetStringIndex()));
+      // Add the special argument base to the load.
+      load_string->AddSpecialInput(base);
+    }
+  }
+
   void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
     // If this is an invoke with PC-relative access to the dex cache methods array,
     // we need to add the dex cache arrays base as the special input.
@@ -74,6 +112,8 @@
         });
   }
 
+  CodeGeneratorMIPS* codegen_;
+
   using DexCacheArraysBaseMap =
       ArenaSafeMap<const DexFile*, HMipsDexCacheArraysBase*, std::less<const DexFile*>>;
   DexCacheArraysBaseMap dex_cache_array_bases_;
@@ -85,7 +125,7 @@
     // that can be live-in at the irreducible loop header.
     return;
   }
-  DexCacheArrayFixupsVisitor visitor(graph_);
+  DexCacheArrayFixupsVisitor visitor(graph_, codegen_);
   visitor.VisitInsertionOrder();
   visitor.MoveBasesIfNeeded();
 }
diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.h b/compiler/optimizing/dex_cache_array_fixups_mips.h
index c8def28..21056e1 100644
--- a/compiler/optimizing/dex_cache_array_fixups_mips.h
+++ b/compiler/optimizing/dex_cache_array_fixups_mips.h
@@ -21,14 +21,21 @@
 #include "optimization.h"
 
 namespace art {
+
+class CodeGenerator;
+
 namespace mips {
 
 class DexCacheArrayFixups : public HOptimization {
  public:
-  DexCacheArrayFixups(HGraph* graph, OptimizingCompilerStats* stats)
-      : HOptimization(graph, "dex_cache_array_fixups_mips", stats) {}
+  DexCacheArrayFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats)
+      : HOptimization(graph, "dex_cache_array_fixups_mips", stats),
+        codegen_(codegen) {}
 
   void Run() OVERRIDE;
+
+ private:
+  CodeGenerator* codegen_;
 };
 
 }  // namespace mips
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index afac5f9..e5dab56 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -19,6 +19,7 @@
 #include "art_method-inl.h"
 #include "bytecode_utils.h"
 #include "class_linker.h"
+#include "dex_instruction-inl.h"
 #include "driver/compiler_options.h"
 #include "scoped_thread_state_change.h"
 
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 9cfc065..517cf76 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -30,6 +30,8 @@
 
 namespace art {
 
+class Instruction;
+
 class HInstructionBuilder : public ValueObject {
  public:
   HInstructionBuilder(HGraph* graph,
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index d557f42..2808e1b 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -2632,4 +2632,23 @@
   }
 }
 
+std::ostream& operator<<(std::ostream& os, const MemBarrierKind& kind) {
+  switch (kind) {
+    case MemBarrierKind::kAnyStore:
+      return os << "AnyStore";
+    case MemBarrierKind::kLoadAny:
+      return os << "LoadAny";
+    case MemBarrierKind::kStoreStore:
+      return os << "StoreStore";
+    case MemBarrierKind::kAnyAny:
+      return os << "AnyAny";
+    case MemBarrierKind::kNTStoreStore:
+      return os << "NTStoreStore";
+
+    default:
+      LOG(FATAL) << "Unknown MemBarrierKind: " << static_cast<int>(kind);
+      UNREACHABLE();
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 23ac457..dfa8276 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -25,7 +25,6 @@
 #include "base/arena_containers.h"
 #include "base/arena_object.h"
 #include "base/stl_util.h"
-#include "dex/compiler_enums.h"
 #include "dex_file.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "handle.h"
@@ -5626,9 +5625,12 @@
 
 // Note: defined outside class to see operator<<(., HLoadClass::LoadKind).
 inline void HLoadClass::AddSpecialInput(HInstruction* special_input) {
-  // The special input is used for PC-relative loads on some architectures.
+  // The special input is used for PC-relative loads on some architectures,
+  // including literal pool loads, which are PC-relative too.
   DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
-         GetLoadKind() == LoadKind::kDexCachePcRelative) << GetLoadKind();
+         GetLoadKind() == LoadKind::kDexCachePcRelative ||
+         GetLoadKind() == LoadKind::kBootImageLinkTimeAddress ||
+         GetLoadKind() == LoadKind::kBootImageAddress) << GetLoadKind();
   DCHECK(special_input_.GetInstruction() == nullptr);
   special_input_ = HUserRecord<HInstruction*>(special_input);
   special_input->AddUseAt(this, 0);
@@ -5836,9 +5838,12 @@
 
 // Note: defined outside class to see operator<<(., HLoadString::LoadKind).
 inline void HLoadString::AddSpecialInput(HInstruction* special_input) {
-  // The special input is used for PC-relative loads on some architectures.
+  // The special input is used for PC-relative loads on some architectures,
+  // including literal pool loads, which are PC-relative too.
   DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
-         GetLoadKind() == LoadKind::kDexCachePcRelative) << GetLoadKind();
+         GetLoadKind() == LoadKind::kDexCachePcRelative ||
+         GetLoadKind() == LoadKind::kBootImageLinkTimeAddress ||
+         GetLoadKind() == LoadKind::kBootImageAddress) << GetLoadKind();
   // HLoadString::GetInputRecords() returns an empty array at this point,
   // so use the GetInputRecords() from the base class to set the input record.
   DCHECK(special_input_.GetInstruction() == nullptr);
@@ -6305,6 +6310,32 @@
   DISALLOW_COPY_AND_ASSIGN(HCheckCast);
 };
 
+/**
+ * @brief Memory barrier types (see "The JSR-133 Cookbook for Compiler Writers").
+ * @details We define the combined barrier types that are actually required
+ * by the Java Memory Model, rather than using exactly the terminology from
+ * the JSR-133 cookbook.  These should, in many cases, be replaced by acquire/release
+ * primitives.  Note that the JSR-133 cookbook generally does not deal with
+ * store atomicity issues, and the recipes there are not always entirely sufficient.
+ * The current recipe is as follows:
+ * -# Use AnyStore ~= (LoadStore | StoreStore) ~= release barrier before volatile store.
+ * -# Use AnyAny barrier after volatile store.  (StoreLoad is as expensive.)
+ * -# Use LoadAny barrier ~= (LoadLoad | LoadStore) ~= acquire barrier after each volatile load.
+ * -# Use StoreStore barrier after all stores but before return from any constructor whose
+ *    class has final fields.
+ * -# Use NTStoreStore to order non-temporal stores with respect to all later
+ *    store-to-memory instructions.  Only generated together with non-temporal stores.
+ */
+enum MemBarrierKind {
+  kAnyStore,
+  kLoadAny,
+  kStoreStore,
+  kAnyAny,
+  kNTStoreStore,
+  kLastBarrierKind = kNTStoreStore
+};
+std::ostream& operator<<(std::ostream& os, const MemBarrierKind& kind);
+
 class HMemoryBarrier FINAL : public HTemplateInstruction<0> {
  public:
   explicit HMemoryBarrier(MemBarrierKind barrier_kind, uint32_t dex_pc = kNoDexPc)
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index aedfcb4..d5b0d77 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -504,7 +504,7 @@
       mips::PcRelativeFixups* pc_relative_fixups =
           new (arena) mips::PcRelativeFixups(graph, codegen, stats);
       mips::DexCacheArrayFixups* dex_cache_array_fixups =
-          new (arena) mips::DexCacheArrayFixups(graph, stats);
+          new (arena) mips::DexCacheArrayFixups(graph, codegen, stats);
       HOptimization* mips_optimizations[] = {
           pc_relative_fixups,
           dex_cache_array_fixups
diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc
index ba405cd..c6acc45 100644
--- a/compiler/optimizing/pc_relative_fixups_mips.cc
+++ b/compiler/optimizing/pc_relative_fixups_mips.cc
@@ -37,6 +37,10 @@
       // entry block) and relieve some pressure on the register allocator
       // while avoiding recalculation of the base in a loop.
       base_->MoveBeforeFirstUserAndOutOfLoops();
+      // Computing the base for PC-relative literals will clobber RA with
+      // the NAL instruction on R2. Take a note of this before generating
+      // the method entry.
+      codegen_->ClobberRA();
     }
   }
 
@@ -58,6 +62,36 @@
     DCHECK(base_ != nullptr);
   }
 
+  void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
+    HLoadClass::LoadKind load_kind = load_class->GetLoadKind();
+    switch (load_kind) {
+      case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      case HLoadClass::LoadKind::kBootImageAddress:
+      case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+        // Add a base register for PC-relative literals on R2.
+        InitializePCRelativeBasePointer();
+        load_class->AddSpecialInput(base_);
+        break;
+      default:
+        break;
+    }
+  }
+
+  void VisitLoadString(HLoadString* load_string) OVERRIDE {
+    HLoadString::LoadKind load_kind = load_string->GetLoadKind();
+    switch (load_kind) {
+      case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+      case HLoadString::LoadKind::kBootImageAddress:
+      case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+        // Add a base register for PC-relative literals on R2.
+        InitializePCRelativeBasePointer();
+        load_string->AddSpecialInput(base_);
+        break;
+      default:
+        break;
+    }
+  }
+
   void HandleInvoke(HInvoke* invoke) {
     // If this is an invoke-static/-direct with PC-relative dex cache array
     // addressing, we need the PC-relative address base.
@@ -77,7 +111,7 @@
       // method pointer from the invoke.
       if (invoke_static_or_direct->HasCurrentMethodInput()) {
         DCHECK(!invoke_static_or_direct->HasPcRelativeDexCache());
-        CHECK(!has_extra_input);  // TODO: review this.
+        CHECK(!has_extra_input);
         return;
       }
 
@@ -116,7 +150,6 @@
   CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen_);
   if (mips_codegen->GetInstructionSetFeatures().IsR6()) {
     // Do nothing for R6 because it has PC-relative addressing.
-    // TODO: review. Move this check into RunArchOptimizations()?
     return;
   }
   if (graph_->HasIrreducibleLoops()) {
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index ebaf1c0..608b3bc 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -2024,6 +2024,10 @@
   Bind(&pc_rel_base_label_);
 }
 
+uint32_t MipsAssembler::GetPcRelBaseLabelLocation() const {
+  return GetLabelLocation(&pc_rel_base_label_);
+}
+
 void MipsAssembler::FinalizeLabeledBranch(MipsLabel* label) {
   uint32_t length = branches_.back().GetLength();
   if (!label->IsBound()) {
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 1f7781f..8367e68 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -646,6 +646,9 @@
   // The assembler then computes literal offsets relative to this label.
   void BindPcRelBaseLabel();
 
+  // Returns the location of the label bound with BindPcRelBaseLabel().
+  uint32_t GetPcRelBaseLabelLocation() const;
+
   // Note that PC-relative literal loads are handled as pseudo branches because they need very
   // similar relocation and may similarly expand in size to accomodate for larger offsets relative
   // to PC.
diff --git a/dex2oat/dex2oat_test.cc b/dex2oat/dex2oat_test.cc
index c076b5a..58dd047 100644
--- a/dex2oat/dex2oat_test.cc
+++ b/dex2oat/dex2oat_test.cc
@@ -396,6 +396,11 @@
 };
 
 TEST_F(Dex2oatSwapUseTest, CheckSwapUsage) {
+  // The `native_alloc_2_ >= native_alloc_1_` assertion below may not
+  // hold true on some x86 systems when read barriers are enabled;
+  // disable this test while we investigate (b/29259363).
+  TEST_DISABLED_FOR_READ_BARRIER_ON_X86();
+
   RunTest(false /* use_fd */,
           false /* expect_use */);
   GrabResult1();
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index 966587d..0e2a672 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -152,8 +152,6 @@
   qpoints->pReadBarrierMarkReg27 = nullptr;
   qpoints->pReadBarrierMarkReg28 = nullptr;
   qpoints->pReadBarrierMarkReg29 = nullptr;
-  qpoints->pReadBarrierMarkReg30 = nullptr;
-  qpoints->pReadBarrierMarkReg31 = nullptr;
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
 }
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 2e5f5ad..cc5bf29 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -124,6 +124,15 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
+  // ARM64 is the architecture with the largest number of core
+  // registers (32) that supports the read barrier configuration.
+  // Because registers 30 (LR) and 31 (SP/XZR) cannot be used to pass
+  // arguments, only define ReadBarrierMarkRegX entrypoints for the
+  // first 30 registers.  This limitation is not a problem on other
+  // supported architectures (ARM, x86 and x86-64) either, as they
+  // have less core registers (resp. 16, 8 and 16).  (We may have to
+  // revise that design choice if read barrier support is added for
+  // MIPS and/or MIPS64.)
   qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00;
   qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01;
   qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02;
@@ -154,8 +163,6 @@
   qpoints->pReadBarrierMarkReg27 = art_quick_read_barrier_mark_reg27;
   qpoints->pReadBarrierMarkReg28 = art_quick_read_barrier_mark_reg28;
   qpoints->pReadBarrierMarkReg29 = art_quick_read_barrier_mark_reg29;
-  qpoints->pReadBarrierMarkReg30 = nullptr;  // Cannot use register 30 (LR) to pass arguments.
-  qpoints->pReadBarrierMarkReg31 = nullptr;  // Cannot use register 31 (SP/XZR) to pass arguments.
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
 };
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 22efd19..09f8849 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -376,12 +376,6 @@
   qpoints->pReadBarrierMarkReg29 = nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg29),
                 "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg30 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg30),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg31 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg31),
-                "Non-direct C stub marked direct.");
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   static_assert(IsDirectEntrypoint(kQuickReadBarrierSlow), "Direct C stub not marked direct.");
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc
index b02edb6..34b0638 100644
--- a/runtime/arch/mips64/entrypoints_init_mips64.cc
+++ b/runtime/arch/mips64/entrypoints_init_mips64.cc
@@ -129,8 +129,6 @@
   qpoints->pReadBarrierMarkReg27 = nullptr;
   qpoints->pReadBarrierMarkReg28 = nullptr;
   qpoints->pReadBarrierMarkReg29 = nullptr;
-  qpoints->pReadBarrierMarkReg30 = nullptr;
-  qpoints->pReadBarrierMarkReg31 = nullptr;
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
 };
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index 4e9756c..bdf11da 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -117,8 +117,6 @@
   qpoints->pReadBarrierMarkReg27 = nullptr;
   qpoints->pReadBarrierMarkReg28 = nullptr;
   qpoints->pReadBarrierMarkReg29 = nullptr;
-  qpoints->pReadBarrierMarkReg30 = nullptr;
-  qpoints->pReadBarrierMarkReg31 = nullptr;
   qpoints->pReadBarrierSlow = art_quick_read_barrier_slow;
   qpoints->pReadBarrierForRootSlow = art_quick_read_barrier_for_root_slow;
 };
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index c2e3023..42b9699 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -131,8 +131,6 @@
   qpoints->pReadBarrierMarkReg27 = nullptr;
   qpoints->pReadBarrierMarkReg28 = nullptr;
   qpoints->pReadBarrierMarkReg29 = nullptr;
-  qpoints->pReadBarrierMarkReg30 = nullptr;
-  qpoints->pReadBarrierMarkReg31 = nullptr;
   qpoints->pReadBarrierSlow = art_quick_read_barrier_slow;
   qpoints->pReadBarrierForRootSlow = art_quick_read_barrier_for_root_slow;
 #endif  // __APPLE__
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index e1f00eb..b7df90d 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -84,7 +84,7 @@
             art::Thread::SelfOffset<__SIZEOF_POINTER__>().Int32Value())
 
 // Offset of field Thread::tlsPtr_.thread_local_objects.
-#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_CARD_TABLE_OFFSET + 199 * __SIZEOF_POINTER__)
+#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_CARD_TABLE_OFFSET + 197 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET,
             art::Thread::ThreadLocalObjectsOffset<__SIZEOF_POINTER__>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_pos.
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index b68eb19..8f7d18b 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -205,6 +205,12 @@
     return; \
   }
 
+#define TEST_DISABLED_FOR_READ_BARRIER_ON_X86() \
+  if (kUseReadBarrier && kRuntimeISA == kX86) { \
+    printf("WARNING: TEST DISABLED FOR READ BARRIER ON X86\n"); \
+    return; \
+  }
+
 }  // namespace art
 
 namespace std {
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index e0ec68e..07f0394 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -194,8 +194,6 @@
   V(ReadBarrierMarkReg27, mirror::Object*, mirror::Object*) \
   V(ReadBarrierMarkReg28, mirror::Object*, mirror::Object*) \
   V(ReadBarrierMarkReg29, mirror::Object*, mirror::Object*) \
-  V(ReadBarrierMarkReg30, mirror::Object*, mirror::Object*) \
-  V(ReadBarrierMarkReg31, mirror::Object*, mirror::Object*) \
   V(ReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t) \
   V(ReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*)
 
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index ffe4109..e3203dc 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -383,11 +383,7 @@
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg28, pReadBarrierMarkReg29,
                          sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg29, pReadBarrierMarkReg30,
-                         sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg30, pReadBarrierMarkReg31,
-                         sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg31, pReadBarrierSlow, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg29, pReadBarrierSlow, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierSlow, pReadBarrierForRootSlow,
                          sizeof(void*));
 
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index d413a50..90446b0 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -520,7 +520,7 @@
   explicit ImmuneSpaceScanObjVisitor(ConcurrentCopying* cc)
       : collector_(cc) {}
 
-  void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_) {
+  ALWAYS_INLINE void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_) {
     if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects) {
       if (obj->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
         collector_->ScanImmuneObject(obj);
@@ -534,6 +534,10 @@
     }
   }
 
+  static void Callback(mirror::Object* obj, void* arg) SHARED_REQUIRES(Locks::mutator_lock_) {
+    reinterpret_cast<ImmuneSpaceScanObjVisitor*>(arg)->operator()(obj);
+  }
+
  private:
   ConcurrentCopying* const collector_;
 };
@@ -558,10 +562,15 @@
     for (auto& space : immune_spaces_.GetSpaces()) {
       DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
       accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
+      accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
       ImmuneSpaceScanObjVisitor visitor(this);
-      live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
-                                    reinterpret_cast<uintptr_t>(space->Limit()),
-                                    visitor);
+      if (table != nullptr) {
+        table->VisitObjects(ImmuneSpaceScanObjVisitor::Callback, &visitor);
+      } else {
+        live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
+                                      reinterpret_cast<uintptr_t>(space->Limit()),
+                                      visitor);
+      }
     }
   }
   if (kUseBakerReadBarrier) {
@@ -1714,12 +1723,19 @@
 
 // Scan ref fields of an object.
 inline void ConcurrentCopying::Scan(mirror::Object* to_ref) {
+  if (kIsDebugBuild) {
+    // Avoid all read barriers during visit references to help performance.
+    Thread::Current()->ModifyDebugDisallowReadBarrier(1);
+  }
   DCHECK(!region_space_->IsInFromSpace(to_ref));
   DCHECK_EQ(Thread::Current(), thread_running_gc_);
   RefFieldsVisitor visitor(this);
   // Disable the read barrier for a performance reason.
   to_ref->VisitReferences</*kVisitNativeRoots*/true, kDefaultVerifyFlags, kWithoutReadBarrier>(
       visitor, visitor);
+  if (kIsDebugBuild) {
+    Thread::Current()->ModifyDebugDisallowReadBarrier(-1);
+  }
 }
 
 // Process a field.
@@ -1836,7 +1852,7 @@
   mirror::Class* int_array_class = mirror::IntArray::GetArrayClass();
   CHECK(int_array_class != nullptr);
   AssertToSpaceInvariant(nullptr, MemberOffset(0), int_array_class);
-  size_t component_size = int_array_class->GetComponentSize();
+  size_t component_size = int_array_class->GetComponentSize<kWithoutReadBarrier>();
   CHECK_EQ(component_size, sizeof(int32_t));
   size_t data_offset = mirror::Array::DataOffset(component_size).SizeValue();
   if (data_offset > byte_size) {
@@ -1849,13 +1865,14 @@
   } else {
     // Use an int array.
     dummy_obj->SetClass(int_array_class);
-    CHECK(dummy_obj->IsArrayInstance());
+    CHECK((dummy_obj->IsArrayInstance<kVerifyNone, kWithoutReadBarrier>()));
     int32_t length = (byte_size - data_offset) / component_size;
-    dummy_obj->AsArray()->SetLength(length);
-    CHECK_EQ(dummy_obj->AsArray()->GetLength(), length)
+    mirror::Array* dummy_arr = dummy_obj->AsArray<kVerifyNone, kWithoutReadBarrier>();
+    dummy_arr->SetLength(length);
+    CHECK_EQ(dummy_arr->GetLength(), length)
         << "byte_size=" << byte_size << " length=" << length
         << " component_size=" << component_size << " data_offset=" << data_offset;
-    CHECK_EQ(byte_size, dummy_obj->SizeOf())
+    CHECK_EQ(byte_size, (dummy_obj->SizeOf<kVerifyNone, kWithoutReadBarrier>()))
         << "byte_size=" << byte_size << " length=" << length
         << " component_size=" << component_size << " data_offset=" << data_offset;
   }
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index 6088a43..62625c4 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -44,7 +44,9 @@
     // 1 element cyclic queue, ie: Reference ref = ..; ref.pendingNext = ref;
     list_ = ref;
   } else {
-    mirror::Reference* head = list_->GetPendingNext();
+    // The list is owned by the GC, everything that has been inserted must already be at least
+    // gray.
+    mirror::Reference* head = list_->GetPendingNext<kWithoutReadBarrier>();
     DCHECK(head != nullptr);
     ref->SetPendingNext(head);
   }
@@ -54,14 +56,14 @@
 
 mirror::Reference* ReferenceQueue::DequeuePendingReference() {
   DCHECK(!IsEmpty());
-  mirror::Reference* ref = list_->GetPendingNext();
+  mirror::Reference* ref = list_->GetPendingNext<kWithoutReadBarrier>();
   DCHECK(ref != nullptr);
   // Note: the following code is thread-safe because it is only called from ProcessReferences which
   // is single threaded.
   if (list_ == ref) {
     list_ = nullptr;
   } else {
-    mirror::Reference* next = ref->GetPendingNext();
+    mirror::Reference* next = ref->GetPendingNext<kWithoutReadBarrier>();
     list_->SetPendingNext(next);
   }
   ref->SetPendingNext(nullptr);
diff --git a/runtime/interpreter/mterp/arm64/binopLit8.S b/runtime/interpreter/mterp/arm64/binopLit8.S
index 326c657..0b7c68a 100644
--- a/runtime/interpreter/mterp/arm64/binopLit8.S
+++ b/runtime/interpreter/mterp/arm64/binopLit8.S
@@ -13,7 +13,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
diff --git a/runtime/interpreter/mterp/arm64/header.S b/runtime/interpreter/mterp/arm64/header.S
index 4257200..c791eb5 100644
--- a/runtime/interpreter/mterp/arm64/header.S
+++ b/runtime/interpreter/mterp/arm64/header.S
@@ -272,6 +272,14 @@
 .endm
 
 /*
+ * Get the 32-bit value from a Dalvik register and sign-extend to 64-bit.
+ * Used to avoid an extra instruction in int-to-long.
+ */
+.macro GET_VREG_S reg, vreg
+    ldrsw   \reg, [xFP, \vreg, uxtw #2]
+.endm
+
+/*
  * Convert a virtual register index into an address.
  */
 .macro VREG_INDEX_TO_ADDR reg, vreg
diff --git a/runtime/interpreter/mterp/arm64/op_const_16.S b/runtime/interpreter/mterp/arm64/op_const_16.S
index 27f5273..f0e8192 100644
--- a/runtime/interpreter/mterp/arm64/op_const_16.S
+++ b/runtime/interpreter/mterp/arm64/op_const_16.S
@@ -1,5 +1,5 @@
     /* const/16 vAA, #+BBBB */
-    FETCH_S w0, 1                       // w0<- ssssBBBB (sign-extended
+    FETCH_S w0, 1                       // w0<- ssssBBBB (sign-extended)
     lsr     w3, wINST, #8               // w3<- AA
     FETCH_ADVANCE_INST 2                // advance xPC, load wINST
     SET_VREG w0, w3                     // vAA<- w0
diff --git a/runtime/interpreter/mterp/arm64/op_const_4.S b/runtime/interpreter/mterp/arm64/op_const_4.S
index 04cd4f8..9a36115 100644
--- a/runtime/interpreter/mterp/arm64/op_const_4.S
+++ b/runtime/interpreter/mterp/arm64/op_const_4.S
@@ -1,8 +1,7 @@
     /* const/4 vA, #+B */
-    lsl     w1, wINST, #16              // w1<- Bxxx0000
+    sbfx    w1, wINST, #12, #4          // w1<- sssssssB
     ubfx    w0, wINST, #8, #4           // w0<- A
     FETCH_ADVANCE_INST 1                // advance xPC, load wINST
-    asr     w1, w1, #28                 // w1<- sssssssB (sign-extended)
     GET_INST_OPCODE ip                  // ip<- opcode from xINST
     SET_VREG w1, w0                     // fp[A]<- w1
     GOTO_OPCODE ip                      // execute next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_const_high16.S b/runtime/interpreter/mterp/arm64/op_const_high16.S
index dd51ce1..3a9edff 100644
--- a/runtime/interpreter/mterp/arm64/op_const_high16.S
+++ b/runtime/interpreter/mterp/arm64/op_const_high16.S
@@ -1,5 +1,5 @@
     /* const/high16 vAA, #+BBBB0000 */
-    FETCH   w0, 1                       // r0<- 0000BBBB (zero-extended
+    FETCH   w0, 1                       // r0<- 0000BBBB (zero-extended)
     lsr     w3, wINST, #8               // r3<- AA
     lsl     w0, w0, #16                 // r0<- BBBB0000
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/arm64/op_if_eqz.S b/runtime/interpreter/mterp/arm64/op_if_eqz.S
index 1d3202e1..47c1dee 100644
--- a/runtime/interpreter/mterp/arm64/op_if_eqz.S
+++ b/runtime/interpreter/mterp/arm64/op_if_eqz.S
@@ -1 +1 @@
-%include "arm64/zcmp.S" { "condition":"eq" }
+%include "arm64/zcmp.S" { "compare":"0", "branch":"cbz     w2," }
diff --git a/runtime/interpreter/mterp/arm64/op_if_gez.S b/runtime/interpreter/mterp/arm64/op_if_gez.S
index 8e3abd3..087e094 100644
--- a/runtime/interpreter/mterp/arm64/op_if_gez.S
+++ b/runtime/interpreter/mterp/arm64/op_if_gez.S
@@ -1 +1 @@
-%include "arm64/zcmp.S" { "condition":"ge" }
+%include "arm64/zcmp.S" { "compare":"0", "branch":"tbz     w2, #31," }
diff --git a/runtime/interpreter/mterp/arm64/op_if_gtz.S b/runtime/interpreter/mterp/arm64/op_if_gtz.S
index a4f2f6b..476b265 100644
--- a/runtime/interpreter/mterp/arm64/op_if_gtz.S
+++ b/runtime/interpreter/mterp/arm64/op_if_gtz.S
@@ -1 +1 @@
-%include "arm64/zcmp.S" { "condition":"gt" }
+%include "arm64/zcmp.S" { "branch":"b.gt" }
diff --git a/runtime/interpreter/mterp/arm64/op_if_lez.S b/runtime/interpreter/mterp/arm64/op_if_lez.S
index c1425fdd..2717a60 100644
--- a/runtime/interpreter/mterp/arm64/op_if_lez.S
+++ b/runtime/interpreter/mterp/arm64/op_if_lez.S
@@ -1 +1 @@
-%include "arm64/zcmp.S" { "condition":"le" }
+%include "arm64/zcmp.S" { "branch":"b.le" }
diff --git a/runtime/interpreter/mterp/arm64/op_if_ltz.S b/runtime/interpreter/mterp/arm64/op_if_ltz.S
index 03cd3d6..86089c1 100644
--- a/runtime/interpreter/mterp/arm64/op_if_ltz.S
+++ b/runtime/interpreter/mterp/arm64/op_if_ltz.S
@@ -1 +1 @@
-%include "arm64/zcmp.S" { "condition":"lt" }
+%include "arm64/zcmp.S" { "compare":"0", "branch":"tbnz    w2, #31," }
diff --git a/runtime/interpreter/mterp/arm64/op_if_nez.S b/runtime/interpreter/mterp/arm64/op_if_nez.S
index 21e1bc2..efacc88 100644
--- a/runtime/interpreter/mterp/arm64/op_if_nez.S
+++ b/runtime/interpreter/mterp/arm64/op_if_nez.S
@@ -1 +1 @@
-%include "arm64/zcmp.S" { "condition":"ne" }
+%include "arm64/zcmp.S" { "compare":"0", "branch":"cbnz    w2," }
diff --git a/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S b/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S
index 30b30c2..e9388e4 100644
--- a/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S
+++ b/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S
@@ -4,8 +4,7 @@
     GET_VREG w3, w2                     // w3<- object we're operating on
     ubfx    w2, wINST, #8, #4           // w2<- A
     cbz     w3, common_errNullObject    // object was null
-    add     x4, x3, x4                  // create direct pointer
-    ldr     x0, [x4]
+    ldr     x0, [x3, x4]                // x0<- obj.field
     FETCH_ADVANCE_INST 2                // advance rPC, load wINST
     SET_VREG_WIDE x0, w2
     GET_INST_OPCODE ip                  // extract opcode from wINST
diff --git a/runtime/interpreter/mterp/arm64/op_int_to_long.S b/runtime/interpreter/mterp/arm64/op_int_to_long.S
index 35830f3..45e3112 100644
--- a/runtime/interpreter/mterp/arm64/op_int_to_long.S
+++ b/runtime/interpreter/mterp/arm64/op_int_to_long.S
@@ -1 +1,8 @@
-%include "arm64/funopWider.S" {"instr":"sxtw x0, w0", "srcreg":"w0", "tgtreg":"x0"}
+    /* int-to-long vA, vB */
+    lsr     w3, wINST, #12              // w3<- B
+    ubfx    w4, wINST, #8, #4           // w4<- A
+    GET_VREG_S x0, w3                   // x0<- sign_extend(fp[B])
+    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    SET_VREG_WIDE x0, w4                // fp[A]<- x0
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_iput_quick.S b/runtime/interpreter/mterp/arm64/op_iput_quick.S
index 2afc51b..e95da76 100644
--- a/runtime/interpreter/mterp/arm64/op_iput_quick.S
+++ b/runtime/interpreter/mterp/arm64/op_iput_quick.S
@@ -5,7 +5,6 @@
     FETCH w1, 1                         // w1<- field byte offset
     GET_VREG w3, w2                     // w3<- fp[B], the object pointer
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cmp     w3, #0                      // check object for null
     cbz     w3, common_errNullObject    // object was null
     GET_VREG w0, w2                     // w0<- fp[A]
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S b/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S
index 566e2bf..6cec363 100644
--- a/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S
+++ b/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S
@@ -6,7 +6,6 @@
     cbz     w2, common_errNullObject    // object was null
     GET_VREG_WIDE x0, w0                // x0-< fp[A]
     FETCH_ADVANCE_INST 2                // advance rPC, load wINST
-    add     x1, x2, x3                  // create a direct pointer
-    str     x0, [x1]
+    str     x0, [x2, x3]                // obj.field<- x0
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_long_to_int.S b/runtime/interpreter/mterp/arm64/op_long_to_int.S
index 360a69b..73f58d8 100644
--- a/runtime/interpreter/mterp/arm64/op_long_to_int.S
+++ b/runtime/interpreter/mterp/arm64/op_long_to_int.S
@@ -1 +1,2 @@
-%include "arm64/funopNarrower.S" {"instr":"", "srcreg":"x0", "tgtreg":"w0"}
+/* we ignore the high word, making this equivalent to a 32-bit reg move */
+%include "arm64/op_move.S"
diff --git a/runtime/interpreter/mterp/arm64/op_neg_double.S b/runtime/interpreter/mterp/arm64/op_neg_double.S
index e9064c4..d77859d 100644
--- a/runtime/interpreter/mterp/arm64/op_neg_double.S
+++ b/runtime/interpreter/mterp/arm64/op_neg_double.S
@@ -1 +1 @@
-%include "arm64/unopWide.S" {"preinstr":"mov x1, #0x8000000000000000", "instr":"add     x0, x0, x1"}
+%include "arm64/unopWide.S" {"instr":"eor     x0, x0, #0x8000000000000000"}
diff --git a/runtime/interpreter/mterp/arm64/op_neg_float.S b/runtime/interpreter/mterp/arm64/op_neg_float.S
index 49d51af..6652aec 100644
--- a/runtime/interpreter/mterp/arm64/op_neg_float.S
+++ b/runtime/interpreter/mterp/arm64/op_neg_float.S
@@ -1 +1 @@
-%include "arm64/unop.S" {"preinstr":"mov w4, #0x80000000", "instr":"add     w0, w0, w4"}
+%include "arm64/unop.S" {"instr":"eor     w0, w0, #0x80000000"}
diff --git a/runtime/interpreter/mterp/arm64/unop.S b/runtime/interpreter/mterp/arm64/unop.S
index 474a961..e681968 100644
--- a/runtime/interpreter/mterp/arm64/unop.S
+++ b/runtime/interpreter/mterp/arm64/unop.S
@@ -1,4 +1,3 @@
-%default {"preinstr":""}
     /*
      * Generic 32-bit unary operation.  Provide an "instr" line that
      * specifies an instruction that performs "result = op w0".
@@ -11,7 +10,6 @@
     lsr     w3, wINST, #12              // w3<- B
     GET_VREG w0, w3                     // w0<- vB
     ubfx    w9, wINST, #8, #4           // w9<- A
-    $preinstr                           // optional op; may set condition codes
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
     $instr                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm64/unopWide.S b/runtime/interpreter/mterp/arm64/unopWide.S
index 109302a..6ee4f92 100644
--- a/runtime/interpreter/mterp/arm64/unopWide.S
+++ b/runtime/interpreter/mterp/arm64/unopWide.S
@@ -1,4 +1,4 @@
-%default {"instr":"sub x0, xzr, x0", "preinstr":""}
+%default {"instr":"sub x0, xzr, x0"}
     /*
      * Generic 64-bit unary operation.  Provide an "instr" line that
      * specifies an instruction that performs "result = op x0".
@@ -10,7 +10,6 @@
     ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG_WIDE x0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    $preinstr
     $instr
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG_WIDE x0, w4
diff --git a/runtime/interpreter/mterp/arm64/zcmp.S b/runtime/interpreter/mterp/arm64/zcmp.S
index b303e6a..510a3c1 100644
--- a/runtime/interpreter/mterp/arm64/zcmp.S
+++ b/runtime/interpreter/mterp/arm64/zcmp.S
@@ -1,3 +1,4 @@
+%default { "compare":"1" }
     /*
      * Generic one-operand compare-and-branch operation.  Provide a "condition"
      * fragment that specifies the comparison to perform.
@@ -8,8 +9,10 @@
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    .if ${compare}
     cmp     w2, #0                      // compare (vA, 0)
-    b.${condition} MterpCommonTakenBranchNoFlags
+    .endif
+    ${branch} MterpCommonTakenBranchNoFlags
     cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
     b.eq    .L_check_not_taken_osr
     FETCH_ADVANCE_INST 2
diff --git a/runtime/interpreter/mterp/config_arm64 b/runtime/interpreter/mterp/config_arm64
index 57206d2..6427ead 100644
--- a/runtime/interpreter/mterp/config_arm64
+++ b/runtime/interpreter/mterp/config_arm64
@@ -20,9 +20,6 @@
 handler-style computed-goto
 handler-size 128
 
-# source for alternate entry stub
-asm-alt-stub arm64/alt_stub.S
-
 # file header and basic definitions
 import arm64/header.S
 
@@ -295,5 +292,12 @@
     # op op_unused_ff FALLBACK
 op-end
 
-# common subroutines for asm
+# common subroutines for asm; we emit the footer before alternate
+# entry stubs, so that TBZ/TBNZ from ops can reach targets in footer
 import arm64/footer.S
+
+# source for alternate entry stub
+asm-alt-stub arm64/alt_stub.S
+
+# emit alternate entry stubs
+alt-ops
diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S
index e318782..de37e07 100644
--- a/runtime/interpreter/mterp/out/mterp_arm64.S
+++ b/runtime/interpreter/mterp/out/mterp_arm64.S
@@ -279,6 +279,14 @@
 .endm
 
 /*
+ * Get the 32-bit value from a Dalvik register and sign-extend to 64-bit.
+ * Used to avoid an extra instruction in int-to-long.
+ */
+.macro GET_VREG_S reg, vreg
+    ldrsw   \reg, [xFP, \vreg, uxtw #2]
+.endm
+
+/*
  * Convert a virtual register index into an address.
  */
 .macro VREG_INDEX_TO_ADDR reg, vreg
@@ -695,10 +703,9 @@
 .L_op_const_4: /* 0x12 */
 /* File: arm64/op_const_4.S */
     /* const/4 vA, #+B */
-    lsl     w1, wINST, #16              // w1<- Bxxx0000
+    sbfx    w1, wINST, #12, #4          // w1<- sssssssB
     ubfx    w0, wINST, #8, #4           // w0<- A
     FETCH_ADVANCE_INST 1                // advance xPC, load wINST
-    asr     w1, w1, #28                 // w1<- sssssssB (sign-extended)
     GET_INST_OPCODE ip                  // ip<- opcode from xINST
     SET_VREG w1, w0                     // fp[A]<- w1
     GOTO_OPCODE ip                      // execute next instruction
@@ -708,7 +715,7 @@
 .L_op_const_16: /* 0x13 */
 /* File: arm64/op_const_16.S */
     /* const/16 vAA, #+BBBB */
-    FETCH_S w0, 1                       // w0<- ssssBBBB (sign-extended
+    FETCH_S w0, 1                       // w0<- ssssBBBB (sign-extended)
     lsr     w3, wINST, #8               // w3<- AA
     FETCH_ADVANCE_INST 2                // advance xPC, load wINST
     SET_VREG w0, w3                     // vAA<- w0
@@ -734,7 +741,7 @@
 .L_op_const_high16: /* 0x15 */
 /* File: arm64/op_const_high16.S */
     /* const/high16 vAA, #+BBBB0000 */
-    FETCH   w0, 1                       // r0<- 0000BBBB (zero-extended
+    FETCH   w0, 1                       // r0<- 0000BBBB (zero-extended)
     lsr     w3, wINST, #8               // r3<- AA
     lsl     w0, w0, #16                 // r0<- BBBB0000
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
@@ -1465,8 +1472,10 @@
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    .if 0
     cmp     w2, #0                      // compare (vA, 0)
-    b.eq MterpCommonTakenBranchNoFlags
+    .endif
+    cbz     w2, MterpCommonTakenBranchNoFlags
     cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
     b.eq    .L_check_not_taken_osr
     FETCH_ADVANCE_INST 2
@@ -1489,8 +1498,10 @@
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    .if 0
     cmp     w2, #0                      // compare (vA, 0)
-    b.ne MterpCommonTakenBranchNoFlags
+    .endif
+    cbnz    w2, MterpCommonTakenBranchNoFlags
     cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
     b.eq    .L_check_not_taken_osr
     FETCH_ADVANCE_INST 2
@@ -1513,8 +1524,10 @@
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    .if 0
     cmp     w2, #0                      // compare (vA, 0)
-    b.lt MterpCommonTakenBranchNoFlags
+    .endif
+    tbnz    w2, #31, MterpCommonTakenBranchNoFlags
     cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
     b.eq    .L_check_not_taken_osr
     FETCH_ADVANCE_INST 2
@@ -1537,8 +1550,10 @@
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    .if 0
     cmp     w2, #0                      // compare (vA, 0)
-    b.ge MterpCommonTakenBranchNoFlags
+    .endif
+    tbz     w2, #31, MterpCommonTakenBranchNoFlags
     cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
     b.eq    .L_check_not_taken_osr
     FETCH_ADVANCE_INST 2
@@ -1561,7 +1576,9 @@
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    .if 1
     cmp     w2, #0                      // compare (vA, 0)
+    .endif
     b.gt MterpCommonTakenBranchNoFlags
     cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
     b.eq    .L_check_not_taken_osr
@@ -1585,7 +1602,9 @@
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
     FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    .if 1
     cmp     w2, #0                      // compare (vA, 0)
+    .endif
     b.le MterpCommonTakenBranchNoFlags
     cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
     b.eq    .L_check_not_taken_osr
@@ -3192,7 +3211,6 @@
     lsr     w3, wINST, #12              // w3<- B
     GET_VREG w0, w3                     // w0<- vB
     ubfx    w9, wINST, #8, #4           // w9<- A
-                               // optional op; may set condition codes
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
     sub     w0, wzr, w0                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
@@ -3218,7 +3236,6 @@
     lsr     w3, wINST, #12              // w3<- B
     GET_VREG w0, w3                     // w0<- vB
     ubfx    w9, wINST, #8, #4           // w9<- A
-                               // optional op; may set condition codes
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
     mvn     w0, w0                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
@@ -3243,7 +3260,6 @@
     ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG_WIDE x0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    
     sub x0, xzr, x0
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG_WIDE x0, w4
@@ -3267,7 +3283,6 @@
     ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG_WIDE x0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    
     mvn     x0, x0
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG_WIDE x0, w4
@@ -3292,9 +3307,8 @@
     lsr     w3, wINST, #12              // w3<- B
     GET_VREG w0, w3                     // w0<- vB
     ubfx    w9, wINST, #8, #4           // w9<- A
-    mov w4, #0x80000000                           // optional op; may set condition codes
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
-    add     w0, w0, w4                              // w0<- op, w0-w3 changed
+    eor     w0, w0, #0x80000000                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
     SET_VREG w0, w9                     // vAA<- w0
     GOTO_OPCODE ip                      // jump to next instruction
@@ -3317,8 +3331,7 @@
     ubfx    w4, wINST, #8, #4           // w4<- A
     GET_VREG_WIDE x0, w3
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    mov x1, #0x8000000000000000
-    add     x0, x0, x1
+    eor     x0, x0, #0x8000000000000000
     GET_INST_OPCODE ip                  // extract opcode from wINST
     SET_VREG_WIDE x0, w4
     GOTO_OPCODE ip                      // jump to next instruction
@@ -3329,24 +3342,15 @@
     .balign 128
 .L_op_int_to_long: /* 0x81 */
 /* File: arm64/op_int_to_long.S */
-/* File: arm64/funopWider.S */
-    /*
-     * Generic 32bit-to-64bit floating point unary operation.  Provide an
-     * "instr" line that specifies an instruction that performs "x0 = op w0".
-     *
-     * For: int-to-double, float-to-double, float-to-long
-     */
-    /* unop vA, vB */
+    /* int-to-long vA, vB */
     lsr     w3, wINST, #12              // w3<- B
     ubfx    w4, wINST, #8, #4           // w4<- A
-    GET_VREG w0, w3
+    GET_VREG_S x0, w3                   // x0<- sign_extend(fp[B])
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-    sxtw x0, w0                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
-    SET_VREG_WIDE x0, w4           // vA<- d0
+    SET_VREG_WIDE x0, w4                // fp[A]<- x0
     GOTO_OPCODE ip                      // jump to next instruction
 
-
 /* ------------------------------ */
     .balign 128
 .L_op_int_to_float: /* 0x82 */
@@ -3396,22 +3400,21 @@
     .balign 128
 .L_op_long_to_int: /* 0x84 */
 /* File: arm64/op_long_to_int.S */
-/* File: arm64/funopNarrower.S */
-    /*
-     * Generic 64bit-to-32bit floating point unary operation.  Provide an
-     * "instr" line that specifies an instruction that performs "w0 = op x0".
-     *
-     * For: int-to-double, float-to-double, float-to-long
-     */
-    /* unop vA, vB */
-    lsr     w3, wINST, #12              // w3<- B
-    ubfx    w4, wINST, #8, #4           // w4<- A
-    GET_VREG_WIDE x0, w3
+/* we ignore the high word, making this equivalent to a 32-bit reg move */
+/* File: arm64/op_move.S */
+    /* for move, move-object, long-to-int */
+    /* op vA, vB */
+    lsr     w1, wINST, #12              // x1<- B from 15:12
+    ubfx    w0, wINST, #8, #4           // x0<- A from 11:8
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
-                                  // d0<- op
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    SET_VREG w0, w4                // vA<- d0
-    GOTO_OPCODE ip                      // jump to next instruction
+    GET_VREG w2, w1                     // x2<- fp[B]
+    GET_INST_OPCODE ip                  // ip<- opcode from wINST
+    .if 0
+    SET_VREG_OBJECT w2, w0              // fp[A]<- x2
+    .else
+    SET_VREG w2, w0                     // fp[A]<- x2
+    .endif
+    GOTO_OPCODE ip                      // execute next instruction
 
 
 /* ------------------------------ */
@@ -3608,7 +3611,6 @@
     lsr     w3, wINST, #12              // w3<- B
     GET_VREG w0, w3                     // w0<- vB
     ubfx    w9, wINST, #8, #4           // w9<- A
-                               // optional op; may set condition codes
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
     sxtb    w0, w0                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
@@ -3634,7 +3636,6 @@
     lsr     w3, wINST, #12              // w3<- B
     GET_VREG w0, w3                     // w0<- vB
     ubfx    w9, wINST, #8, #4           // w9<- A
-                               // optional op; may set condition codes
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
     uxth    w0, w0                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
@@ -3660,7 +3661,6 @@
     lsr     w3, wINST, #12              // w3<- B
     GET_VREG w0, w3                     // w0<- vB
     ubfx    w9, wINST, #8, #4           // w9<- A
-                               // optional op; may set condition codes
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
     sxth    w0, w0                              // w0<- op, w0-w3 changed
     GET_INST_OPCODE ip                  // extract opcode from rINST
@@ -6052,7 +6052,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
@@ -6088,7 +6088,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
@@ -6125,7 +6125,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
@@ -6161,7 +6161,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
@@ -6197,7 +6197,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
@@ -6233,7 +6233,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
@@ -6269,7 +6269,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
@@ -6305,7 +6305,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
@@ -6341,7 +6341,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
@@ -6377,7 +6377,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
@@ -6413,7 +6413,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC
+    FETCH_S w3, 1                       // w3<- ssssCCBB (sign-extended for CC)
     lsr     w9, wINST, #8               // w9<- AA
     and     w2, w3, #255                // w2<- BB
     GET_VREG w0, w2                     // w0<- vBB
@@ -6458,8 +6458,7 @@
     GET_VREG w3, w2                     // w3<- object we're operating on
     ubfx    w2, wINST, #8, #4           // w2<- A
     cbz     w3, common_errNullObject    // object was null
-    add     x4, x3, x4                  // create direct pointer
-    ldr     x0, [x4]
+    ldr     x0, [x3, x4]                // x0<- obj.field
     FETCH_ADVANCE_INST 2                // advance rPC, load wINST
     SET_VREG_WIDE x0, w2
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -6495,7 +6494,6 @@
     FETCH w1, 1                         // w1<- field byte offset
     GET_VREG w3, w2                     // w3<- fp[B], the object pointer
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cmp     w3, #0                      // check object for null
     cbz     w3, common_errNullObject    // object was null
     GET_VREG w0, w2                     // w0<- fp[A]
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
@@ -6515,8 +6513,7 @@
     cbz     w2, common_errNullObject    // object was null
     GET_VREG_WIDE x0, w0                // x0-< fp[A]
     FETCH_ADVANCE_INST 2                // advance rPC, load wINST
-    add     x1, x2, x3                  // create a direct pointer
-    str     x0, [x1]
+    str     x0, [x2, x3]                // obj.field<- x0
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -6597,7 +6594,6 @@
     FETCH w1, 1                         // w1<- field byte offset
     GET_VREG w3, w2                     // w3<- fp[B], the object pointer
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cmp     w3, #0                      // check object for null
     cbz     w3, common_errNullObject    // object was null
     GET_VREG w0, w2                     // w0<- fp[A]
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
@@ -6617,7 +6613,6 @@
     FETCH w1, 1                         // w1<- field byte offset
     GET_VREG w3, w2                     // w3<- fp[B], the object pointer
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cmp     w3, #0                      // check object for null
     cbz     w3, common_errNullObject    // object was null
     GET_VREG w0, w2                     // w0<- fp[A]
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
@@ -6637,7 +6632,6 @@
     FETCH w1, 1                         // w1<- field byte offset
     GET_VREG w3, w2                     // w3<- fp[B], the object pointer
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cmp     w3, #0                      // check object for null
     cbz     w3, common_errNullObject    // object was null
     GET_VREG w0, w2                     // w0<- fp[A]
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
@@ -6657,7 +6651,6 @@
     FETCH w1, 1                         // w1<- field byte offset
     GET_VREG w3, w2                     // w3<- fp[B], the object pointer
     ubfx    w2, wINST, #8, #4           // w2<- A
-    cmp     w3, #0                      // check object for null
     cbz     w3, common_errNullObject    // object was null
     GET_VREG w0, w2                     // w0<- fp[A]
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
@@ -6885,6 +6878,321 @@
     .global artMterpAsmSisterEnd
 artMterpAsmSisterEnd:
 
+/* File: arm64/footer.S */
+/*
+ * ===========================================================================
+ *  Common subroutines and data
+ * ===========================================================================
+ */
+
+
+/*
+ * We've detected a condition that will result in an exception, but the exception
+ * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
+ * TUNING: for consistency, we may want to just go ahead and handle these here.
+ */
+common_errDivideByZero:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogDivideByZeroException
+#endif
+    b MterpCommonFallback
+
+common_errArrayIndex:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogArrayIndexException
+#endif
+    b MterpCommonFallback
+
+common_errNegativeArraySize:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNegativeArraySizeException
+#endif
+    b MterpCommonFallback
+
+common_errNoSuchMethod:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNoSuchMethodException
+#endif
+    b MterpCommonFallback
+
+common_errNullObject:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogNullObjectException
+#endif
+    b MterpCommonFallback
+
+common_exceptionThrown:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogExceptionThrownException
+#endif
+    b MterpCommonFallback
+
+MterpSuspendFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    ldr  x2, [xSELF, #THREAD_FLAGS_OFFSET]
+    bl MterpLogSuspendFallback
+#endif
+    b MterpCommonFallback
+
+/*
+ * If we're here, something is out of the ordinary.  If there is a pending
+ * exception, handle it.  Otherwise, roll back and retry with the reference
+ * interpreter.
+ */
+MterpPossibleException:
+    ldr     x0, [xSELF, #THREAD_EXCEPTION_OFFSET]
+    cbz     x0, MterpFallback                       // If not, fall back to reference interpreter.
+    /* intentional fallthrough - handle pending exception. */
+/*
+ * On return from a runtime helper routine, we've found a pending exception.
+ * Can we handle it here - or need to bail out to caller?
+ *
+ */
+MterpException:
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    bl      MterpHandleException                    // (self, shadow_frame)
+    cbz     w0, MterpExceptionReturn                // no local catch, back to caller.
+    ldr     x0, [xFP, #OFF_FP_CODE_ITEM]
+    ldr     w1, [xFP, #OFF_FP_DEX_PC]
+    ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
+    add     xPC, x0, #CODEITEM_INSNS_OFFSET
+    add     xPC, xPC, x1, lsl #1                    // generate new dex_pc_ptr
+    /* Do we need to switch interpreters? */
+    bl      MterpShouldSwitchInterpreters
+    cbnz    w0, MterpFallback
+    /* resume execution at catch block */
+    EXPORT_PC
+    FETCH_INST
+    GET_INST_OPCODE ip
+    GOTO_OPCODE ip
+    /* NOTE: no fallthrough */
+/*
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    wINST          <= signed offset
+ *    wPROFILE       <= signed hotness countdown (expanded to 32 bits)
+ *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ *
+ */
+MterpCommonTakenBranchNoFlags:
+    cmp     wINST, #0
+    b.gt    .L_forward_branch           // don't add forward branches to hotness
+    tbnz    wPROFILE, #31, .L_no_count_backwards  // go if negative
+    subs    wPROFILE, wPROFILE, #1      // countdown
+    b.eq    .L_add_batch                // counted down to zero - report
+.L_resume_backward_branch:
+    ldr     lr, [xSELF, #THREAD_FLAGS_OFFSET]
+    add     w2, wINST, wINST            // w2<- byte offset
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    REFRESH_IBASE
+    ands    lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    .L_suspend_request_pending
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+.L_suspend_request_pending:
+    EXPORT_PC
+    mov     x0, xSELF
+    bl      MterpSuspendCheck           // (self)
+    cbnz    x0, MterpFallback
+    REFRESH_IBASE                       // might have changed during suspend
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+.L_no_count_backwards:
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.ne    .L_resume_backward_branch
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xINST
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
+    cbnz    x0, MterpOnStackReplacement
+    b       .L_resume_backward_branch
+
+.L_forward_branch:
+    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
+    b.eq    .L_check_osr_forward
+.L_resume_forward_branch:
+    add     w2, wINST, wINST            // w2<- byte offset
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+.L_check_osr_forward:
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xINST
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
+    cbnz    x0, MterpOnStackReplacement
+    b       .L_resume_forward_branch
+
+.L_add_batch:
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    strh    wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
+    ldr     x0, [xFP, #OFF_FP_METHOD]
+    mov     x2, xSELF
+    bl      MterpAddHotnessBatch        // (method, shadow_frame, self)
+    mov     wPROFILE, w0                // restore new hotness countdown to wPROFILE
+    b       .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, #2
+    EXPORT_PC
+    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
+    cbnz    x0, MterpOnStackReplacement
+    FETCH_ADVANCE_INST 2
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+
+/*
+ * Check for suspend check request.  Assumes wINST already loaded, xPC advanced and
+ * still needs to get the opcode and branch to it, and flags are in lr.
+ */
+MterpCheckSuspendAndContinue:
+    ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh xIBASE
+    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.ne    check1
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+check1:
+    EXPORT_PC
+    mov     x0, xSELF
+    bl      MterpSuspendCheck           // (self)
+    cbnz    x0, MterpFallback           // Something in the environment changed, switch interpreters
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
+
+/*
+ * On-stack replacement has happened, and now we've returned from the compiled method.
+ */
+MterpOnStackReplacement:
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    sxtw x2, wINST
+    bl MterpLogOSR
+#endif
+    mov  x0, #1                         // Signal normal return
+    b    MterpDone
+
+/*
+ * Bail out to reference interpreter.
+ */
+MterpFallback:
+    EXPORT_PC
+#if MTERP_LOGGING
+    mov  x0, xSELF
+    add  x1, xFP, #OFF_FP_SHADOWFRAME
+    bl MterpLogFallback
+#endif
+MterpCommonFallback:
+    mov     x0, #0                                  // signal retry with reference interpreter.
+    b       MterpDone
+
+/*
+ * We pushed some registers on the stack in ExecuteMterpImpl, then saved
+ * SP and LR.  Here we restore SP, restore the registers, and then restore
+ * LR to PC.
+ *
+ * On entry:
+ *  uint32_t* xFP  (should still be live, pointer to base of vregs)
+ */
+MterpExceptionReturn:
+    mov     x0, #1                                  // signal return to caller.
+    b MterpDone
+MterpReturn:
+    ldr     x2, [xFP, #OFF_FP_RESULT_REGISTER]
+    ldr     lr, [xSELF, #THREAD_FLAGS_OFFSET]
+    str     x0, [x2]
+    mov     x0, xSELF
+    ands    lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    b.eq    check2
+    bl      MterpSuspendCheck                       // (self)
+check2:
+    mov     x0, #1                                  // signal return to caller.
+MterpDone:
+/*
+ * At this point, we expect wPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending wPROFILE and the cached hotness counter).  wPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    cmp     wPROFILE, #0
+    bgt     MterpProfileActive                      // if > 0, we may have some counts to report.
+    ldp     fp, lr, [sp, #64]
+    ldp     xPC, xFP, [sp, #48]
+    ldp     xSELF, xINST, [sp, #32]
+    ldp     xIBASE, xREFS, [sp, #16]
+    ldp     xPROFILE, x27, [sp], #80
+    ret
+
+MterpProfileActive:
+    mov     xINST, x0                               // stash return value
+    /* Report cached hotness counts */
+    ldr     x0, [xFP, #OFF_FP_METHOD]
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xSELF
+    strh    wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
+    bl      MterpAddHotnessBatch                    // (method, shadow_frame, self)
+    mov     x0, xINST                               // restore return value
+    ldp     fp, lr, [sp, #64]
+    ldp     xPC, xFP, [sp, #48]
+    ldp     xSELF, xINST, [sp, #32]
+    ldp     xIBASE, xREFS, [sp, #16]
+    ldp     xPROFILE, x27, [sp], #80
+    ret
+
+    .cfi_endproc
+    .size   ExecuteMterpImpl, .-ExecuteMterpImpl
+
+
 
     .global artMterpAsmAltInstructionStart
     .type   artMterpAsmAltInstructionStart, %function
@@ -11247,318 +11555,3 @@
     .size   artMterpAsmAltInstructionStart, .-artMterpAsmAltInstructionStart
     .global artMterpAsmAltInstructionEnd
 artMterpAsmAltInstructionEnd:
-/* File: arm64/footer.S */
-/*
- * ===========================================================================
- *  Common subroutines and data
- * ===========================================================================
- */
-
-
-/*
- * We've detected a condition that will result in an exception, but the exception
- * has not yet been thrown.  Just bail out to the reference interpreter to deal with it.
- * TUNING: for consistency, we may want to just go ahead and handle these here.
- */
-common_errDivideByZero:
-    EXPORT_PC
-#if MTERP_LOGGING
-    mov  x0, xSELF
-    add  x1, xFP, #OFF_FP_SHADOWFRAME
-    bl MterpLogDivideByZeroException
-#endif
-    b MterpCommonFallback
-
-common_errArrayIndex:
-    EXPORT_PC
-#if MTERP_LOGGING
-    mov  x0, xSELF
-    add  x1, xFP, #OFF_FP_SHADOWFRAME
-    bl MterpLogArrayIndexException
-#endif
-    b MterpCommonFallback
-
-common_errNegativeArraySize:
-    EXPORT_PC
-#if MTERP_LOGGING
-    mov  x0, xSELF
-    add  x1, xFP, #OFF_FP_SHADOWFRAME
-    bl MterpLogNegativeArraySizeException
-#endif
-    b MterpCommonFallback
-
-common_errNoSuchMethod:
-    EXPORT_PC
-#if MTERP_LOGGING
-    mov  x0, xSELF
-    add  x1, xFP, #OFF_FP_SHADOWFRAME
-    bl MterpLogNoSuchMethodException
-#endif
-    b MterpCommonFallback
-
-common_errNullObject:
-    EXPORT_PC
-#if MTERP_LOGGING
-    mov  x0, xSELF
-    add  x1, xFP, #OFF_FP_SHADOWFRAME
-    bl MterpLogNullObjectException
-#endif
-    b MterpCommonFallback
-
-common_exceptionThrown:
-    EXPORT_PC
-#if MTERP_LOGGING
-    mov  x0, xSELF
-    add  x1, xFP, #OFF_FP_SHADOWFRAME
-    bl MterpLogExceptionThrownException
-#endif
-    b MterpCommonFallback
-
-MterpSuspendFallback:
-    EXPORT_PC
-#if MTERP_LOGGING
-    mov  x0, xSELF
-    add  x1, xFP, #OFF_FP_SHADOWFRAME
-    ldr  x2, [xSELF, #THREAD_FLAGS_OFFSET]
-    bl MterpLogSuspendFallback
-#endif
-    b MterpCommonFallback
-
-/*
- * If we're here, something is out of the ordinary.  If there is a pending
- * exception, handle it.  Otherwise, roll back and retry with the reference
- * interpreter.
- */
-MterpPossibleException:
-    ldr     x0, [xSELF, #THREAD_EXCEPTION_OFFSET]
-    cbz     x0, MterpFallback                       // If not, fall back to reference interpreter.
-    /* intentional fallthrough - handle pending exception. */
-/*
- * On return from a runtime helper routine, we've found a pending exception.
- * Can we handle it here - or need to bail out to caller?
- *
- */
-MterpException:
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    bl      MterpHandleException                    // (self, shadow_frame)
-    cbz     w0, MterpExceptionReturn                // no local catch, back to caller.
-    ldr     x0, [xFP, #OFF_FP_CODE_ITEM]
-    ldr     w1, [xFP, #OFF_FP_DEX_PC]
-    ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
-    add     xPC, x0, #CODEITEM_INSNS_OFFSET
-    add     xPC, xPC, x1, lsl #1                    // generate new dex_pc_ptr
-    /* Do we need to switch interpreters? */
-    bl      MterpShouldSwitchInterpreters
-    cbnz    w0, MterpFallback
-    /* resume execution at catch block */
-    EXPORT_PC
-    FETCH_INST
-    GET_INST_OPCODE ip
-    GOTO_OPCODE ip
-    /* NOTE: no fallthrough */
-/*
- * Common handling for branches with support for Jit profiling.
- * On entry:
- *    wINST          <= signed offset
- *    wPROFILE       <= signed hotness countdown (expanded to 32 bits)
- *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
- *
- * We have quite a few different cases for branch profiling, OSR detection and
- * suspend check support here.
- *
- * Taken backward branches:
- *    If profiling active, do hotness countdown and report if we hit zero.
- *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
- *    Is there a pending suspend request?  If so, suspend.
- *
- * Taken forward branches and not-taken backward branches:
- *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
- *
- * Our most common case is expected to be a taken backward branch with active jit profiling,
- * but no full OSR check and no pending suspend request.
- * Next most common case is not-taken branch with no full OSR check.
- *
- */
-MterpCommonTakenBranchNoFlags:
-    cmp     wINST, #0
-    b.gt    .L_forward_branch           // don't add forward branches to hotness
-    tbnz    wPROFILE, #31, .L_no_count_backwards  // go if negative
-    subs    wPROFILE, wPROFILE, #1      // countdown
-    b.eq    .L_add_batch                // counted down to zero - report
-.L_resume_backward_branch:
-    ldr     lr, [xSELF, #THREAD_FLAGS_OFFSET]
-    add     w2, wINST, wINST            // w2<- byte offset
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    REFRESH_IBASE
-    ands    lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
-    b.ne    .L_suspend_request_pending
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-
-.L_suspend_request_pending:
-    EXPORT_PC
-    mov     x0, xSELF
-    bl      MterpSuspendCheck           // (self)
-    cbnz    x0, MterpFallback
-    REFRESH_IBASE                       // might have changed during suspend
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-
-.L_no_count_backwards:
-    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
-    b.ne    .L_resume_backward_branch
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    mov     x2, xINST
-    EXPORT_PC
-    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
-    cbnz    x0, MterpOnStackReplacement
-    b       .L_resume_backward_branch
-
-.L_forward_branch:
-    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
-    b.eq    .L_check_osr_forward
-.L_resume_forward_branch:
-    add     w2, wINST, wINST            // w2<- byte offset
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-
-.L_check_osr_forward:
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    mov     x2, xINST
-    EXPORT_PC
-    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
-    cbnz    x0, MterpOnStackReplacement
-    b       .L_resume_forward_branch
-
-.L_add_batch:
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    strh    wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
-    ldr     x0, [xFP, #OFF_FP_METHOD]
-    mov     x2, xSELF
-    bl      MterpAddHotnessBatch        // (method, shadow_frame, self)
-    mov     wPROFILE, w0                // restore new hotness countdown to wPROFILE
-    b       .L_no_count_backwards
-
-/*
- * Entered from the conditional branch handlers when OSR check request active on
- * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
- */
-.L_check_not_taken_osr:
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    mov     x2, #2
-    EXPORT_PC
-    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
-    cbnz    x0, MterpOnStackReplacement
-    FETCH_ADVANCE_INST 2
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-
-
-/*
- * Check for suspend check request.  Assumes wINST already loaded, xPC advanced and
- * still needs to get the opcode and branch to it, and flags are in lr.
- */
-MterpCheckSuspendAndContinue:
-    ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh xIBASE
-    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
-    b.ne    check1
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-check1:
-    EXPORT_PC
-    mov     x0, xSELF
-    bl      MterpSuspendCheck           // (self)
-    cbnz    x0, MterpFallback           // Something in the environment changed, switch interpreters
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-
-/*
- * On-stack replacement has happened, and now we've returned from the compiled method.
- */
-MterpOnStackReplacement:
-#if MTERP_LOGGING
-    mov  x0, xSELF
-    add  x1, xFP, #OFF_FP_SHADOWFRAME
-    sxtw x2, wINST
-    bl MterpLogOSR
-#endif
-    mov  x0, #1                         // Signal normal return
-    b    MterpDone
-
-/*
- * Bail out to reference interpreter.
- */
-MterpFallback:
-    EXPORT_PC
-#if MTERP_LOGGING
-    mov  x0, xSELF
-    add  x1, xFP, #OFF_FP_SHADOWFRAME
-    bl MterpLogFallback
-#endif
-MterpCommonFallback:
-    mov     x0, #0                                  // signal retry with reference interpreter.
-    b       MterpDone
-
-/*
- * We pushed some registers on the stack in ExecuteMterpImpl, then saved
- * SP and LR.  Here we restore SP, restore the registers, and then restore
- * LR to PC.
- *
- * On entry:
- *  uint32_t* xFP  (should still be live, pointer to base of vregs)
- */
-MterpExceptionReturn:
-    mov     x0, #1                                  // signal return to caller.
-    b MterpDone
-MterpReturn:
-    ldr     x2, [xFP, #OFF_FP_RESULT_REGISTER]
-    ldr     lr, [xSELF, #THREAD_FLAGS_OFFSET]
-    str     x0, [x2]
-    mov     x0, xSELF
-    ands    lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
-    b.eq    check2
-    bl      MterpSuspendCheck                       // (self)
-check2:
-    mov     x0, #1                                  // signal return to caller.
-MterpDone:
-/*
- * At this point, we expect wPROFILE to be non-zero.  If negative, hotness is disabled or we're
- * checking for OSR.  If greater than zero, we might have unreported hotness to register
- * (the difference between the ending wPROFILE and the cached hotness counter).  wPROFILE
- * should only reach zero immediately after a hotness decrement, and is then reset to either
- * a negative special state or the new non-zero countdown value.
- */
-    cmp     wPROFILE, #0
-    bgt     MterpProfileActive                      // if > 0, we may have some counts to report.
-    ldp     fp, lr, [sp, #64]
-    ldp     xPC, xFP, [sp, #48]
-    ldp     xSELF, xINST, [sp, #32]
-    ldp     xIBASE, xREFS, [sp, #16]
-    ldp     xPROFILE, x27, [sp], #80
-    ret
-
-MterpProfileActive:
-    mov     xINST, x0                               // stash return value
-    /* Report cached hotness counts */
-    ldr     x0, [xFP, #OFF_FP_METHOD]
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    mov     x2, xSELF
-    strh    wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
-    bl      MterpAddHotnessBatch                    // (method, shadow_frame, self)
-    mov     x0, xINST                               // restore return value
-    ldp     fp, lr, [sp, #64]
-    ldp     xPC, xFP, [sp, #48]
-    ldp     xSELF, xINST, [sp, #32]
-    ldp     xIBASE, xREFS, [sp, #16]
-    ldp     xPROFILE, x27, [sp], #80
-    ret
-
-    .cfi_endproc
-    .size   ExecuteMterpImpl, .-ExecuteMterpImpl
-
-
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 32ed337..3ba9e1a 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -471,7 +471,7 @@
 
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   size_t GetComponentSize() SHARED_REQUIRES(Locks::mutator_lock_) {
-    return 1U << GetComponentSizeShift();
+    return 1U << GetComponentSizeShift<kReadBarrierOption>();
   }
 
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
diff --git a/runtime/mirror/reference.h b/runtime/mirror/reference.h
index 3baa12e..e8ad5fa 100644
--- a/runtime/mirror/reference.h
+++ b/runtime/mirror/reference.h
@@ -76,8 +76,9 @@
     SetFieldObjectVolatile<kTransactionActive>(ReferentOffset(), nullptr);
   }
 
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   Reference* GetPendingNext() SHARED_REQUIRES(Locks::mutator_lock_) {
-    return GetFieldObject<Reference>(PendingNextOffset());
+    return GetFieldObject<Reference, kDefaultVerifyFlags, kReadBarrierOption>(PendingNextOffset());
   }
 
   void SetPendingNext(Reference* pending_next)
@@ -102,7 +103,7 @@
   // removed from the list after having determined the reference is not ready
   // to be enqueued on a java ReferenceQueue.
   bool IsUnprocessed() SHARED_REQUIRES(Locks::mutator_lock_) {
-    return GetPendingNext() == nullptr;
+    return GetPendingNext<kWithoutReadBarrier>() == nullptr;
   }
 
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
diff --git a/runtime/oat.h b/runtime/oat.h
index 9b8f545..2c5c3e6 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '8', '4', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '8', '5', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 50f76da..9cce171 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -2604,8 +2604,6 @@
   QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg27)
   QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg28)
   QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg29)
-  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg30)
-  QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg31)
   QUICK_ENTRY_POINT_INFO(pReadBarrierSlow)
   QUICK_ENTRY_POINT_INFO(pReadBarrierForRootSlow)
 #undef QUICK_ENTRY_POINT_INFO
diff --git a/test/496-checker-inlining-and-class-loader/src/Main.java b/test/496-checker-inlining-and-class-loader/src/Main.java
index 8de6318..78e8a40 100644
--- a/test/496-checker-inlining-and-class-loader/src/Main.java
+++ b/test/496-checker-inlining-and-class-loader/src/Main.java
@@ -107,7 +107,8 @@
                 /* Load and initialize FirstSeenByMyClassLoader */
   /// CHECK:      LoadClass gen_clinit_check:true
                 /* Load and initialize System */
-  /// CHECK-NEXT: LoadClass gen_clinit_check:true
+  // There may be MipsComputeBaseMethodAddress here.
+  /// CHECK:      LoadClass gen_clinit_check:true
   /// CHECK-NEXT: StaticFieldGet
   // There may be HArmDexCacheArraysBase or HX86ComputeBaseMethodAddress here.
   /// CHECK:      LoadString
diff --git a/test/552-checker-sharpening/src/Main.java b/test/552-checker-sharpening/src/Main.java
index 09a77ed..2232ff4 100644
--- a/test/552-checker-sharpening/src/Main.java
+++ b/test/552-checker-sharpening/src/Main.java
@@ -51,6 +51,10 @@
   /// CHECK-START-ARM64: int Main.testSimple(int) sharpening (after)
   /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
 
+  /// CHECK-START-MIPS: int Main.testSimple(int) sharpening (after)
+  /// CHECK-NOT:            MipsDexCacheArraysBase
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
   /// CHECK-START-X86: int Main.testSimple(int) sharpening (after)
   /// CHECK-NOT:            X86ComputeBaseMethodAddress
   /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
@@ -62,6 +66,10 @@
   /// CHECK:                ArmDexCacheArraysBase
   /// CHECK-NOT:            ArmDexCacheArraysBase
 
+  /// CHECK-START-MIPS: int Main.testSimple(int) dex_cache_array_fixups_mips (after)
+  /// CHECK:                MipsDexCacheArraysBase
+  /// CHECK-NOT:            MipsDexCacheArraysBase
+
   /// CHECK-START-X86: int Main.testSimple(int) pc_relative_fixups_x86 (after)
   /// CHECK:                X86ComputeBaseMethodAddress
   /// CHECK-NOT:            X86ComputeBaseMethodAddress
@@ -83,6 +91,11 @@
   /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
   /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
 
+  /// CHECK-START-MIPS: int Main.testDiamond(boolean, int) sharpening (after)
+  /// CHECK-NOT:            MipsDexCacheArraysBase
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
   /// CHECK-START-X86: int Main.testDiamond(boolean, int) sharpening (after)
   /// CHECK-NOT:            X86ComputeBaseMethodAddress
   /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
@@ -100,6 +113,14 @@
   /// CHECK:                ArmDexCacheArraysBase
   /// CHECK-NEXT:           If
 
+  /// CHECK-START-MIPS: int Main.testDiamond(boolean, int) dex_cache_array_fixups_mips (after)
+  /// CHECK:                MipsDexCacheArraysBase
+  /// CHECK-NOT:            MipsDexCacheArraysBase
+
+  /// CHECK-START-MIPS: int Main.testDiamond(boolean, int) dex_cache_array_fixups_mips (after)
+  /// CHECK:                MipsDexCacheArraysBase
+  /// CHECK-NEXT:           If
+
   /// CHECK-START-X86: int Main.testDiamond(boolean, int) pc_relative_fixups_x86 (after)
   /// CHECK:                X86ComputeBaseMethodAddress
   /// CHECK-NOT:            X86ComputeBaseMethodAddress
@@ -110,7 +131,7 @@
 
   public static int testDiamond(boolean negate, int x) {
     // These calls should use PC-relative dex cache array loads to retrieve the target method.
-    // PC-relative bases used by X86 and ARM should be pulled before the If.
+    // PC-relative bases used by ARM, MIPS and X86 should be pulled before the If.
     if (negate) {
       return $noinline$foo(-x);
     } else {
@@ -154,8 +175,26 @@
   /// CHECK:                begin_block
   /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
 
+  /// CHECK-START-MIPS: int Main.testLoop(int[], int) dex_cache_array_fixups_mips (before)
+  /// CHECK-NOT:            MipsDexCacheArraysBase
+
+  /// CHECK-START-MIPS: int Main.testLoop(int[], int) dex_cache_array_fixups_mips (after)
+  /// CHECK:                MipsDexCacheArraysBase
+  /// CHECK-NOT:            MipsDexCacheArraysBase
+
+  /// CHECK-START-MIPS: int Main.testLoop(int[], int) dex_cache_array_fixups_mips (after)
+  /// CHECK:                InvokeStaticOrDirect
+  /// CHECK-NOT:            InvokeStaticOrDirect
+
+  /// CHECK-START-MIPS: int Main.testLoop(int[], int) dex_cache_array_fixups_mips (after)
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           MipsDexCacheArraysBase
+  /// CHECK-NEXT:           Goto
+  /// CHECK:                begin_block
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
   public static int testLoop(int[] array, int x) {
-    // PC-relative bases used by X86 and ARM should be pulled before the loop.
+    // PC-relative bases used by ARM, MIPS and X86 should be pulled before the loop.
     for (int i : array) {
       x += $noinline$foo(i);
     }
@@ -182,8 +221,18 @@
   /// CHECK-NEXT:           ArmDexCacheArraysBase
   /// CHECK-NEXT:           Goto
 
+  /// CHECK-START-MIPS: int Main.testLoopWithDiamond(int[], boolean, int) dex_cache_array_fixups_mips (before)
+  /// CHECK-NOT:            MipsDexCacheArraysBase
+
+  /// CHECK-START-MIPS: int Main.testLoopWithDiamond(int[], boolean, int) dex_cache_array_fixups_mips (after)
+  /// CHECK:                If
+  /// CHECK:                begin_block
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           MipsDexCacheArraysBase
+  /// CHECK-NEXT:           Goto
+
   public static int testLoopWithDiamond(int[] array, boolean negate, int x) {
-    // PC-relative bases used by X86 and ARM should be pulled before the loop
+    // PC-relative bases used by ARM, MIPS and X86 should be pulled before the loop
     // but not outside the if.
     if (array != null) {
       for (int i : array) {
@@ -220,6 +269,11 @@
   // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
   /// CHECK:                LoadString load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}}
 
+  /// CHECK-START-MIPS: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
+  // Note: load kind depends on PIC/non-PIC
+  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}}
+
   public static String $noinline$getBootImageString() {
     // Prevent inlining to avoid the string comparison being optimized away.
     if (doThrow) { throw new Error(); }
@@ -250,6 +304,13 @@
   /// CHECK-START-ARM64: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after)
   /// CHECK:                LoadString load_kind:DexCachePcRelative
 
+  /// CHECK-START-MIPS: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after)
+  /// CHECK:                LoadString load_kind:DexCachePcRelative
+
+  /// CHECK-START-MIPS: java.lang.String Main.$noinline$getNonBootImageString() dex_cache_array_fixups_mips (after)
+  /// CHECK-DAG:            MipsDexCacheArraysBase
+  /// CHECK-DAG:            LoadString load_kind:DexCachePcRelative
+
   public static String $noinline$getNonBootImageString() {
     // Prevent inlining to avoid the string comparison being optimized away.
     if (doThrow) { throw new Error(); }
@@ -280,6 +341,11 @@
   // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
   /// CHECK:                LoadClass load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}} class_name:java.lang.String
 
+  /// CHECK-START-MIPS: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
+  // Note: load kind depends on PIC/non-PIC
+  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}} class_name:java.lang.String
+
   public static Class<?> $noinline$getStringClass() {
     // Prevent inlining to avoid the string comparison being optimized away.
     if (doThrow) { throw new Error(); }
@@ -310,6 +376,13 @@
   /// CHECK-START-ARM64: java.lang.Class Main.$noinline$getOtherClass() sharpening (after)
   /// CHECK:                LoadClass load_kind:DexCachePcRelative class_name:Other
 
+  /// CHECK-START-MIPS: java.lang.Class Main.$noinline$getOtherClass() sharpening (after)
+  /// CHECK:                LoadClass load_kind:DexCachePcRelative class_name:Other
+
+  /// CHECK-START-MIPS: java.lang.Class Main.$noinline$getOtherClass() dex_cache_array_fixups_mips (after)
+  /// CHECK-DAG:            MipsDexCacheArraysBase
+  /// CHECK-DAG:            LoadClass load_kind:DexCachePcRelative class_name:Other
+
   public static Class<?> $noinline$getOtherClass() {
     // Prevent inlining to avoid the string comparison being optimized away.
     if (doThrow) { throw new Error(); }
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index 996f2f8..bf8d12b 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -260,5 +260,12 @@
   bug: 30107038,
   modes: [device],
   names: ["org.apache.harmony.tests.java.lang.ProcessTest#test_destroyForcibly"]
+},
+{
+  description: "Flaky failure, possibly caused by a kernel bug accessing /proc/",
+  result: EXEC_FAILED,
+  bug: 27464570,
+  modes: [device],
+  names: ["libcore.java.lang.ProcessBuilderTest#testRedirectInherit"]
 }
 ]