AArch64: Enable MOVE_*, some CONST_*, CMP_*.

With the fixes of GenArithImmOpLong, GenShiftOpLong, OpRegImm,
OpRegRegImm, OpRegRegImm64, EncodeLogicalImmediate and fmov.

Change-Id: I8cae4f921d5150a6b8e4803ca4dee553928d1a58
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 8218cf1..d453797 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -143,6 +143,12 @@
     Instruction::MOVE,
     Instruction::MOVE_FROM16,
     Instruction::MOVE_16,
+    Instruction::MOVE_WIDE,
+    Instruction::MOVE_WIDE_FROM16,
+    Instruction::MOVE_WIDE_16,
+    Instruction::MOVE_OBJECT,
+    Instruction::MOVE_OBJECT_FROM16,
+    Instruction::MOVE_OBJECT_16,
     Instruction::MOVE_EXCEPTION,
     Instruction::RETURN_VOID,
     Instruction::RETURN,
@@ -151,6 +157,11 @@
     Instruction::CONST_4,
     Instruction::CONST_16,
     Instruction::CONST,
+    Instruction::CONST_HIGH16,
+    Instruction::CONST_WIDE_16,
+    Instruction::CONST_WIDE_32,
+    Instruction::CONST_WIDE,
+    Instruction::CONST_WIDE_HIGH16,
     Instruction::CONST_STRING,
     Instruction::MONITOR_ENTER,
     Instruction::MONITOR_EXIT,
@@ -160,6 +171,11 @@
     Instruction::GOTO_32,
     Instruction::PACKED_SWITCH,
     Instruction::SPARSE_SWITCH,
+    Instruction::CMPL_FLOAT,
+    Instruction::CMPG_FLOAT,
+    Instruction::CMPL_DOUBLE,
+    Instruction::CMPG_DOUBLE,
+    Instruction::CMP_LONG,
     Instruction::IF_EQ,
     Instruction::IF_NE,
     Instruction::IF_LT,
@@ -239,12 +255,6 @@
     Instruction::SPUT_BYTE,
     Instruction::SPUT_CHAR,
     Instruction::SPUT_SHORT,
-    Instruction::MOVE_WIDE,
-    Instruction::MOVE_WIDE_FROM16,
-    Instruction::MOVE_WIDE_16,
-    Instruction::MOVE_OBJECT,
-    Instruction::MOVE_OBJECT_FROM16,
-    Instruction::MOVE_OBJECT_16,
     Instruction::CMPL_FLOAT,
     Instruction::CMPG_FLOAT,
     Instruction::IGET,
@@ -259,7 +269,55 @@
     Instruction::IPUT_BYTE,
     Instruction::IPUT_CHAR,
     Instruction::IPUT_SHORT,
-
+    Instruction::NEG_LONG,
+    Instruction::NOT_LONG,
+    Instruction::NEG_DOUBLE,
+    Instruction::INT_TO_LONG,
+    Instruction::INT_TO_FLOAT,
+    Instruction::INT_TO_DOUBLE,
+    Instruction::LONG_TO_INT,
+    Instruction::LONG_TO_FLOAT,
+    Instruction::LONG_TO_DOUBLE,
+    Instruction::FLOAT_TO_INT,
+    Instruction::FLOAT_TO_LONG,
+    Instruction::FLOAT_TO_DOUBLE,
+    Instruction::DOUBLE_TO_INT,
+    Instruction::DOUBLE_TO_LONG,
+    Instruction::DOUBLE_TO_FLOAT,
+    Instruction::ADD_LONG,
+    Instruction::SUB_LONG,
+    Instruction::MUL_LONG,
+    Instruction::DIV_LONG,
+    Instruction::REM_LONG,
+    Instruction::AND_LONG,
+    Instruction::OR_LONG,
+    Instruction::XOR_LONG,
+    Instruction::SHL_LONG,
+    Instruction::SHR_LONG,
+    Instruction::USHR_LONG,
+    Instruction::REM_FLOAT,
+    Instruction::ADD_DOUBLE,
+    Instruction::SUB_DOUBLE,
+    Instruction::MUL_DOUBLE,
+    Instruction::DIV_DOUBLE,
+    Instruction::REM_DOUBLE,
+    Instruction::ADD_LONG_2ADDR,
+    Instruction::SUB_LONG_2ADDR,
+    Instruction::MUL_LONG_2ADDR,
+    Instruction::DIV_LONG_2ADDR,
+    Instruction::REM_LONG_2ADDR,
+    Instruction::AND_LONG_2ADDR,
+    Instruction::OR_LONG_2ADDR,
+    Instruction::XOR_LONG_2ADDR,
+    Instruction::SHL_LONG_2ADDR,
+    Instruction::SHR_LONG_2ADDR,
+    Instruction::USHR_LONG_2ADDR,
+    Instruction::REM_FLOAT_2ADDR,
+    Instruction::ADD_DOUBLE_2ADDR,
+    Instruction::SUB_DOUBLE_2ADDR,
+    Instruction::MUL_DOUBLE_2ADDR,
+    Instruction::DIV_DOUBLE_2ADDR,
+    Instruction::REM_DOUBLE_2ADDR,
     // TODO(Arm64): Enable compiler pass
     // ----- ExtendedMIROpcode -----
     kMirOpPhi,
@@ -281,11 +339,6 @@
     // Instruction::MOVE_RESULT,
     // Instruction::MOVE_RESULT_WIDE,
     // Instruction::MOVE_RESULT_OBJECT,
-    // Instruction::CONST_HIGH16,
-    // Instruction::CONST_WIDE_16,
-    // Instruction::CONST_WIDE_32,
-    // Instruction::CONST_WIDE,
-    // Instruction::CONST_WIDE_HIGH16,
     // Instruction::CONST_STRING_JUMBO,
     // Instruction::CONST_CLASS,
     // Instruction::CHECK_CAST,
@@ -296,9 +349,6 @@
     // Instruction::FILLED_NEW_ARRAY,
     // Instruction::FILLED_NEW_ARRAY_RANGE,
     // Instruction::FILL_ARRAY_DATA,
-    Instruction::CMPL_DOUBLE,
-    Instruction::CMPG_DOUBLE,
-    Instruction::CMP_LONG,
     // Instruction::UNUSED_3E,
     // Instruction::UNUSED_3F,
     // Instruction::UNUSED_40,
@@ -336,55 +386,6 @@
     // Instruction::INVOKE_INTERFACE_RANGE,
     // Instruction::UNUSED_79,
     // Instruction::UNUSED_7A,
-    Instruction::NEG_LONG,
-    Instruction::NOT_LONG,
-    Instruction::NEG_DOUBLE,
-    Instruction::INT_TO_LONG,
-    Instruction::INT_TO_FLOAT,
-    Instruction::INT_TO_DOUBLE,
-    Instruction::LONG_TO_INT,
-    Instruction::LONG_TO_FLOAT,
-    Instruction::LONG_TO_DOUBLE,
-    Instruction::FLOAT_TO_INT,
-    Instruction::FLOAT_TO_LONG,
-    Instruction::FLOAT_TO_DOUBLE,
-    Instruction::DOUBLE_TO_INT,
-    Instruction::DOUBLE_TO_LONG,
-    Instruction::DOUBLE_TO_FLOAT,
-    Instruction::ADD_LONG,
-    Instruction::SUB_LONG,
-    Instruction::MUL_LONG,
-    Instruction::DIV_LONG,
-    Instruction::REM_LONG,
-    Instruction::AND_LONG,
-    Instruction::OR_LONG,
-    Instruction::XOR_LONG,
-    Instruction::SHL_LONG,
-    Instruction::SHR_LONG,
-    Instruction::USHR_LONG,
-    // Instruction::REM_FLOAT,
-    Instruction::ADD_DOUBLE,
-    Instruction::SUB_DOUBLE,
-    Instruction::MUL_DOUBLE,
-    Instruction::DIV_DOUBLE,
-    // Instruction::REM_DOUBLE,
-    Instruction::ADD_LONG_2ADDR,
-    Instruction::SUB_LONG_2ADDR,
-    Instruction::MUL_LONG_2ADDR,
-    Instruction::DIV_LONG_2ADDR,
-    Instruction::REM_LONG_2ADDR,
-    Instruction::AND_LONG_2ADDR,
-    Instruction::OR_LONG_2ADDR,
-    Instruction::XOR_LONG_2ADDR,
-    Instruction::SHL_LONG_2ADDR,
-    Instruction::SHR_LONG_2ADDR,
-    Instruction::USHR_LONG_2ADDR,
-    // Instruction::REM_FLOAT_2ADDR,
-    Instruction::ADD_DOUBLE_2ADDR,
-    Instruction::SUB_DOUBLE_2ADDR,
-    Instruction::MUL_DOUBLE_2ADDR,
-    Instruction::DIV_DOUBLE_2ADDR,
-    // Instruction::REM_DOUBLE_2ADDR,
     // Instruction::IGET_QUICK,
     // Instruction::IGET_WIDE_QUICK,
     // Instruction::IGET_OBJECT_QUICK,
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index 4a0c055..fcaaba5 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -268,7 +268,7 @@
                  kFmtRegS, 4, 0, kFmtRegW, 9, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
                  "fmov", "!0s, !1w", kFixupNone),
-    ENCODING_MAP(kA64Fmov2Sx, NO_VARIANTS(0x9e6f0000),
+    ENCODING_MAP(kA64Fmov2Sx, NO_VARIANTS(0x9e670000),
                  kFmtRegD, 4, 0, kFmtRegX, 9, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
                  "fmov", "!0S, !1x", kFixupNone),
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index c92832e..8ecc393 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -185,6 +185,7 @@
     LIR* OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type);
     LIR* OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type);
     LIR* OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src);
+    LIR* OpRegRegImm64(OpKind op, RegStorage r_dest, RegStorage r_src1, int64_t value);
     LIR* OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value);
     LIR* OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2);
     LIR* OpTestSuspend(LIR* target);
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 1ad0435..a18cc82 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -77,10 +77,10 @@
   default:
     LOG(FATAL) << "Unexpected case: " << opcode;
   }
-  rl_shift = LoadValueWide(rl_shift, kCoreReg);
+  rl_shift = LoadValue(rl_shift, kCoreReg);
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-  OpRegRegReg(op, rl_result.reg, rl_src1.reg, rl_shift.reg);
+  OpRegRegReg(op, rl_result.reg, rl_src1.reg, As64BitReg(rl_shift.reg));
   StoreValueWide(rl_dest, rl_result);
 }
 
@@ -1021,7 +1021,7 @@
 
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-  OpRegRegImm(op, rl_result.reg, rl_src1.reg, val);
+  OpRegRegImm64(op, rl_result.reg, rl_src1.reg, val);
   StoreValueWide(rl_dest, rl_result);
 }
 
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index 954360d..bb8b7e3 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -146,7 +146,7 @@
 
 static int CountSetBits(bool is_wide, uint64_t value) {
   return ((is_wide) ?
-          __builtin_popcountl(value) : __builtin_popcount((uint32_t)value));
+          __builtin_popcountll(value) : __builtin_popcount((uint32_t)value));
 }
 
 /**
@@ -552,8 +552,11 @@
   return OpRegRegRegShift(op, r_dest, r_src1, r_src2, ENCODE_NO_SHIFT);
 }
 
-// Should be taking an int64_t value ?
 LIR* Arm64Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) {
+  return OpRegRegImm64(op, r_dest, r_src1, static_cast<int64_t>(value));
+}
+
+LIR* Arm64Mir2Lir::OpRegRegImm64(OpKind op, RegStorage r_dest, RegStorage r_src1, int64_t value) {
   LIR* res;
   bool neg = (value < 0);
   int64_t abs_value = (neg) ? -value : value;
@@ -637,11 +640,17 @@
     return NewLIR3(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), log_imm);
   } else {
     RegStorage r_scratch = AllocTemp();
-    LoadConstant(r_scratch, value);
+    if (IS_WIDE(wide)) {
+      r_scratch = AllocTempWide();
+      LoadConstantWide(r_scratch, value);
+    } else {
+      r_scratch = AllocTemp();
+      LoadConstant(r_scratch, value);
+    }
     if (EncodingMap[alt_opcode].flags & IS_QUAD_OP)
-      res = NewLIR4(alt_opcode, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), 0);
+      res = NewLIR4(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), 0);
     else
-      res = NewLIR3(alt_opcode, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg());
+      res = NewLIR3(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg());
     FreeTemp(r_scratch);
     return res;
   }
@@ -666,9 +675,36 @@
     // abs_value is a shifted 12-bit immediate.
     shift = true;
     abs_value >>= 12;
+  } else if (LIKELY(abs_value < 0x1000000 && (op == kOpAdd || op == kOpSub))) {
+    // Note: It is better to use two ADD/SUB instead of loading a number to a temp register.
+    // This works for both normal registers and SP.
+    // For a frame size == 0x2468, it will be encoded as:
+    //   sub sp, #0x2000
+    //   sub sp, #0x468
+    if (neg) {
+      op = (op == kOpAdd) ? kOpSub : kOpAdd;
+    }
+    OpRegImm64(op, r_dest_src1, abs_value & (~INT64_C(0xfff)));
+    return OpRegImm64(op, r_dest_src1, abs_value & 0xfff);
+  } else if (LIKELY(A64_REG_IS_SP(r_dest_src1.GetReg()) && (op == kOpAdd || op == kOpSub))) {
+    // Note: "sub sp, sp, Xm" is not correct on arm64.
+    // We need special instructions for SP.
+    // Also operation on 32-bit SP should be avoided.
+    DCHECK(IS_WIDE(wide));
+    RegStorage r_tmp = AllocTempWide();
+    OpRegRegImm(kOpAdd, r_tmp, r_dest_src1, 0);
+    OpRegImm64(op, r_tmp, value);
+    return OpRegRegImm(kOpAdd, r_dest_src1, r_tmp, 0);
   } else {
-    RegStorage r_tmp = AllocTemp();
-    LIR* res = LoadConstant(r_tmp, value);
+    RegStorage r_tmp;
+    LIR* res;
+    if (IS_WIDE(wide)) {
+      r_tmp = AllocTempWide();
+      res = LoadConstantWide(r_tmp, value);
+    } else {
+      r_tmp = AllocTemp();
+      res = LoadConstant(r_tmp, value);
+    }
     OpRegReg(op, r_dest_src1, r_tmp);
     FreeTemp(r_tmp);
     return res;