Fixes to x86 compilation.

- div-long and rem-long were calling the wrong helper methods.
- genCmpFP was grabbing new src and destination registers instead of
  using the ones it was passed. Also, it wasn't writing its result back.
- gave mul-long its own assembly helper method to help it marshall its
  sources properly.
- fixed assembly of shifts for longs.
- updated IntMath's testFloatCompare to exit on the first error.

Change-Id: Iacecf07d3c7135d865728c18551c7989e7e0276b
diff --git a/src/compiler/codegen/GenCommon.cc b/src/compiler/codegen/GenCommon.cc
index 9e21aea..14eaf1d 100644
--- a/src/compiler/codegen/GenCommon.cc
+++ b/src/compiler/codegen/GenCommon.cc
@@ -2193,13 +2193,13 @@
       callOut = true;
       checkZero = true;
       retReg = rRET0;
-      funcOffset = ENTRYPOINT_OFFSET(pLdivmod);
+      funcOffset = ENTRYPOINT_OFFSET(pLdiv);
       break;
     case Instruction::REM_LONG:
     case Instruction::REM_LONG_2ADDR:
       callOut = true;
       checkZero = true;
-      funcOffset = ENTRYPOINT_OFFSET(pLdiv);
+      funcOffset = ENTRYPOINT_OFFSET(pLdivmod);
 #if defined(TARGET_ARM)
       /* NOTE - result is in rARG2/rARG3 instead of rRET0/rRET1 */
       retReg = rARG2;
diff --git a/src/compiler/codegen/x86/FP/X86FP.cc b/src/compiler/codegen/x86/FP/X86FP.cc
index f45a099..f5a030b 100644
--- a/src/compiler/codegen/x86/FP/X86FP.cc
+++ b/src/compiler/codegen/x86/FP/X86FP.cc
@@ -190,21 +190,16 @@
   int srcReg1;
   int srcReg2;
   if (single) {
-    rlSrc1 = oatGetSrc(cUnit, mir, 0);
     rlSrc1 = loadValue(cUnit, rlSrc1, kFPReg);
     srcReg1 = rlSrc1.lowReg;
-    rlSrc2 = oatGetSrc(cUnit, mir, 0);
-    rlSrc2 = loadValue(cUnit, rlSrc1, kFPReg);
-    srcReg2 = rlSrc1.lowReg;
+    rlSrc2 = loadValue(cUnit, rlSrc2, kFPReg);
+    srcReg2 = rlSrc2.lowReg;
   } else {
-    rlSrc1 = oatGetSrcWide(cUnit, mir, 0, 1);
     rlSrc1 = loadValueWide(cUnit, rlSrc1, kFPReg);
     srcReg1 = S2D(rlSrc1.lowReg, rlSrc1.highReg);
-    rlSrc2 = oatGetSrcWide(cUnit, mir, 0, 1);
     rlSrc2 = loadValueWide(cUnit, rlSrc2, kFPReg);
     srcReg2 = S2D(rlSrc2.lowReg, rlSrc2.highReg);
   }
-  rlDest = oatGetDest(cUnit, mir, 0);
   RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
   loadConstantNoClobber(cUnit, rlResult.lowReg, unorderedGt ? 1 : 0);
   if (single) {
@@ -221,6 +216,7 @@
   if (unorderedGt) {
     branch->target = newLIR0(cUnit, kPseudoTargetLabel);
   }
+  storeValue(cUnit, rlDest, rlResult);
   return false;
 }
 
diff --git a/src/compiler/codegen/x86/X86/Gen.cc b/src/compiler/codegen/x86/X86/Gen.cc
index 62ff3ad..46c98ad 100644
--- a/src/compiler/codegen/x86/X86/Gen.cc
+++ b/src/compiler/codegen/x86/X86/Gen.cc
@@ -255,12 +255,12 @@
   oatFlushAllRegs(cUnit);
   oatLockCallTemps(cUnit);  // Prepare for explicit register usage
   loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
-  loadValueDirectWideFixed(cUnit, rlSrc1, r2, r3);
-  // Compute (r1:r0) = (r1:r0) - (r2:r3)
+  loadValueDirectWideFixed(cUnit, rlSrc2, r2, r3);
+  // Compute (r1:r0) = (r1:r0) - (r3:r2)
   opRegReg(cUnit, kOpSub, r0, r2);  // r0 = r0 - r2
   opRegReg(cUnit, kOpSbc, r1, r3);  // r1 = r1 - r3 - CF
   opRegReg(cUnit, kOpOr, r0, r1);   // r0 = high | low - sets ZF
-  newLIR2(cUnit, kX86Set8R, r0, kX86CondNz);  // r0 = (r1:r0) != (r2:r3) ? 1 : 0
+  newLIR2(cUnit, kX86Set8R, r0, kX86CondNz);  // r0 = (r1:r0) != (r3:r2) ? 1 : 0
   newLIR2(cUnit, kX86Movzx8RR, r0, r0);
   opRegImm(cUnit, kOpAsr, r1, 31);  // r1 = high >> 31
   opRegReg(cUnit, kOpOr, r0, r1);   // r0 holds result
diff --git a/src/oat/runtime/support_math.cc b/src/oat/runtime/support_math.cc
index cbffc2b..0bfe59d 100644
--- a/src/oat/runtime/support_math.cc
+++ b/src/oat/runtime/support_math.cc
@@ -62,7 +62,7 @@
   return -1;
 }
 
-int64_t Lmul(int64_t a, int64_t b) {
+extern "C" int64_t artLmulFromCode(int64_t a, int64_t b) {
   return a * b;
 }
 
diff --git a/src/oat/runtime/x86/oat_support_entrypoints_x86.cc b/src/oat/runtime/x86/oat_support_entrypoints_x86.cc
index 8a68858..95b479b 100644
--- a/src/oat/runtime/x86/oat_support_entrypoints_x86.cc
+++ b/src/oat/runtime/x86/oat_support_entrypoints_x86.cc
@@ -71,10 +71,10 @@
 extern int32_t CmplDouble(double a, double b);
 extern int32_t CmpgFloat(float a, float b);
 extern int32_t CmplFloat(float a, float b);
-extern int64_t Lmul(int64_t a, int64_t b);
 extern "C" int32_t art_idivmod_from_code(int32_t, int32_t);
 extern "C" int64_t art_ldiv_from_code(int64_t, int64_t);
 extern "C" int64_t art_ldivmod_from_code(int64_t, int64_t);
+extern "C" int64_t art_lmul_from_code(int64_t a, int64_t b);
 extern "C" uint64_t art_lshl_from_code(uint64_t, uint32_t);
 extern "C" uint64_t art_lshr_from_code(uint64_t, uint32_t);
 extern "C" uint64_t art_lushr_from_code(uint64_t, uint32_t);
@@ -186,7 +186,7 @@
   points->pF2l = F2L;
   points->pLdiv = art_ldiv_from_code;
   points->pLdivmod = art_ldivmod_from_code;
-  points->pLmul = Lmul;
+  points->pLmul = art_lmul_from_code;
   points->pShlLong = art_lshl_from_code;
   points->pShrLong = art_lshr_from_code;
   points->pUshrLong = art_lushr_from_code;
diff --git a/src/oat/runtime/x86/runtime_support_x86.S b/src/oat/runtime/x86/runtime_support_x86.S
index 109321e..e58efd3 100644
--- a/src/oat/runtime/x86/runtime_support_x86.S
+++ b/src/oat/runtime/x86/runtime_support_x86.S
@@ -428,8 +428,18 @@
     addl LITERAL(28), %esp        // pop arguments
     ret
 
+DEFINE_FUNCTION art_lmul_from_code
+    subl LITERAL(12), %esp        // alignment padding
+    pushl %ebx                    // pass arg4
+    pushl %edx                    // pass arg3
+    pushl %ecx                    // pass arg2
+    pushl %eax                    // pass arg1
+    call SYMBOL(artLmulFromCode)  // (jlong a, jlong b, Thread*, SP)
+    addl LITERAL(28), %esp        // pop arguments
+    ret
+
 DEFINE_FUNCTION art_lshl_from_code
-    // eax:ecx << edx
+    // ecx:eax << edx
     xchg %edx, %ecx
     shld %cl,%eax,%edx
     shl  %cl,%eax
@@ -441,26 +451,26 @@
     ret
 
 DEFINE_FUNCTION art_lshr_from_code
-    // eax:ecx >> edx
+    // ecx:eax >> edx
     xchg %edx, %ecx
-    shrd %cl,%eax,%edx
-    sar  %cl,%eax
+    shrd %cl,%edx,%eax
+    sar  %cl,%edx
     test LITERAL(32),%cl
     jz  1f
-    mov %edx, %eax
-    sar LITERAL(31), %edx
+    mov %eax, %edx
+    sar LITERAL(31), %eax
 1:
     ret
 
 DEFINE_FUNCTION art_lushr_from_code
-    // eax:ecx >>> edx
+    // ecx:eax >>> edx
     xchg %edx, %ecx
-    shrd %cl,%eax,%edx
-    shr  %cl,%eax
-    test   $0x20,%cl
+    shrd %cl,%edx,%eax
+    shr  %cl,%edx
+    test LITERAL(32),%cl
     jz  1f
-    mov %edx, %eax
-    xor %edx, %edx
+    mov %eax, %edx
+    xor %eax, %eax
 1:
     ret
 
diff --git a/test/IntMath/IntMath.java b/test/IntMath/IntMath.java
index c1cb976..65894a3 100644
--- a/test/IntMath/IntMath.java
+++ b/test/IntMath/IntMath.java
@@ -639,34 +639,32 @@
      */
     static int testFloatCompare(float minus, float plus, float plus2,
                                 float nan) {
-
-        int res = 3333;
         if (minus > plus)
-            res = 1;
+            return 1;
         if (plus < minus)
-            res = 2;
+            return 2;
         if (plus == minus)
-            res = 3;
+            return 3;
         if (plus != plus2)
-            res = 4;
+            return 4;
 
         if (plus <= nan)
-            res = 5;
+            return 5;
         if (plus >= nan)
-            res = 6;
+            return 6;
         if (minus <= nan)
-            res = 7;
+            return 7;
         if (minus >= nan)
-            res = 8;
+            return 8;
         if (nan >= plus)
-            res = 9;
+            return 9;
         if (nan <= plus)
-            res = 10;
+            return 10;
 
         if (nan == nan)
-            res = 1212;
+            return 11;
 
-        return res;
+        return 3333;
     }
 
     static int testDoubleCompare(double minus, double plus, double plus2,