Numerous fixes to enable PromoteRegs, though it's still broken.

- Fixed ThrowNullPointerFromCode launchpad to load the array length
  directly into the necessary arg reg without clobbering the array
  pointer, since that value may be live afterwards.

- genArrayPut use a temporary reg for bytes if the source reg is >= 4,
  since x86 can't express this.

- Fixed the order that core regs are spilled and unspilled.

- Correctly emit instructions when base == rBP and disp == 0.

- Added checks to the compiler to ensure that byte opcodes aren't used
  on registers that can't be byte accessed.

- Fixed generation of a number of ops which use byte opcodes, including
  floating point comparison, int-to-byte, and and-int/lit16.

- Added rBP, rSI, and rDI to spill registers for the x86 jni compiler.

- Various fixes and additions to the x86 disassembler.

Change-Id: I365fe7dec5cc64d181248fd58e90789f100b45e7
diff --git a/src/compiler/codegen/GenCommon.cc b/src/compiler/codegen/GenCommon.cc
index b4b0f6a..baa4b48 100644
--- a/src/compiler/codegen/GenCommon.cc
+++ b/src/compiler/codegen/GenCommon.cc
@@ -898,22 +898,33 @@
         funcOffset = ENTRYPOINT_OFFSET(pThrowNullPointerFromCode);
         break;
       case kThrowArrayBounds:
-#if defined (TARGET_X86)
-        // x86 leaves the array pointer in v2, so load the array length that the handler expects
-        opRegMem(cUnit, kOpMov, v2, v2, Array::LengthOffset().Int32Value());
-#endif
         // Move v1 (array index) to rARG0 and v2 (array length) to rARG1
         if (v2 != rARG0) {
           opRegCopy(cUnit, rARG0, v1);
+#if defined (TARGET_X86)
+          // x86 leaves the array pointer in v2, so load the array length that the handler expects
+          opRegMem(cUnit, kOpMov, rARG1, v2, Array::LengthOffset().Int32Value());
+#else
           opRegCopy(cUnit, rARG1, v2);
+#endif
         } else {
           if (v1 == rARG1) {
             // Swap v1 and v2, using rARG2 as a temp
             opRegCopy(cUnit, rARG2, v1);
+#if defined (TARGET_X86)
+            // x86 leaves the array pointer in v2, so load the array length that the handler expects
+            opRegMem(cUnit, kOpMov, rARG1, v2, Array::LengthOffset().Int32Value());
+#else
             opRegCopy(cUnit, rARG1, v2);
+#endif
             opRegCopy(cUnit, rARG0, rARG2);
           } else {
+#if defined (TARGET_X86)
+            // x86 leaves the array pointer in v2, so load the array length that the handler expects
+            opRegMem(cUnit, kOpMov, rARG1, v2, Array::LengthOffset().Int32Value());
+#else
             opRegCopy(cUnit, rARG1, v2);
+#endif
             opRegCopy(cUnit, rARG0, v1);
           }
         }
@@ -1598,9 +1609,18 @@
   } else {
     rlSrc = loadValue(cUnit, rlSrc, regClass);
   }
-  storeBaseIndexedDisp(cUnit, rlArray.lowReg, rlIndex.lowReg, scale,
-                       dataOffset, rlSrc.lowReg, rlSrc.highReg, size,
-                       INVALID_SREG);
+  // If the src reg can't be byte accessed, move it to a temp first.
+  if ((size == kSignedByte || size == kUnsignedByte) && rlSrc.lowReg >= 4) {
+    int temp = oatAllocTemp(cUnit);
+    opRegCopy(cUnit, temp, rlSrc.lowReg);
+    storeBaseIndexedDisp(cUnit, rlArray.lowReg, rlIndex.lowReg, scale,
+                         dataOffset, temp, INVALID_REG, size,
+                         INVALID_SREG);
+  } else {
+    storeBaseIndexedDisp(cUnit, rlArray.lowReg, rlIndex.lowReg, scale,
+                         dataOffset, rlSrc.lowReg, rlSrc.highReg, size,
+                         INVALID_SREG);
+  }
 #else
   bool needsRangeCheck = (!(optFlags & MIR_IGNORE_RANGE_CHECK));
   int regLen = INVALID_REG;