Merge "X86: EmitArrayImm shouldn't truncate to 16 bits"
diff --git a/compiler/Android.mk b/compiler/Android.mk
index a993251..1b70d59 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -27,6 +27,12 @@
dex/quick/arm/int_arm.cc \
dex/quick/arm/target_arm.cc \
dex/quick/arm/utility_arm.cc \
+ dex/quick/arm64/assemble_arm64.cc \
+ dex/quick/arm64/call_arm64.cc \
+ dex/quick/arm64/fp_arm64.cc \
+ dex/quick/arm64/int_arm64.cc \
+ dex/quick/arm64/target_arm64.cc \
+ dex/quick/arm64/utility_arm64.cc \
dex/quick/codegen_util.cc \
dex/quick/dex_file_method_inliner.cc \
dex/quick/dex_file_to_method_inliner_map.cc \
diff --git a/compiler/compilers.cc b/compiler/compilers.cc
index 188ce6f..79a85db 100644
--- a/compiler/compilers.cc
+++ b/compiler/compilers.cc
@@ -101,6 +101,9 @@
case kThumb2:
mir_to_lir = ArmCodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
break;
+ case kArm64:
+ mir_to_lir = Arm64CodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
+ break;
case kMips:
mir_to_lir = MipsCodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
break;
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 5e13722..89c642d 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -131,6 +131,370 @@
}
}
+// TODO: Remove this when we are able to compile everything.
+int arm64_support_list[] = {
+ Instruction::NOP,
+ // Instruction::MOVE,
+ // Instruction::MOVE_FROM16,
+ // Instruction::MOVE_16,
+ // Instruction::MOVE_WIDE,
+ // Instruction::MOVE_WIDE_FROM16,
+ // Instruction::MOVE_WIDE_16,
+ // Instruction::MOVE_OBJECT,
+ // Instruction::MOVE_OBJECT_FROM16,
+ // Instruction::MOVE_OBJECT_16,
+ // Instruction::MOVE_RESULT,
+ // Instruction::MOVE_RESULT_WIDE,
+ // Instruction::MOVE_RESULT_OBJECT,
+ // Instruction::MOVE_EXCEPTION,
+ // Instruction::RETURN_VOID,
+ // Instruction::RETURN,
+ // Instruction::RETURN_WIDE,
+ // Instruction::RETURN_OBJECT,
+ // Instruction::CONST_4,
+ // Instruction::CONST_16,
+ // Instruction::CONST,
+ // Instruction::CONST_HIGH16,
+ // Instruction::CONST_WIDE_16,
+ // Instruction::CONST_WIDE_32,
+ // Instruction::CONST_WIDE,
+ // Instruction::CONST_WIDE_HIGH16,
+ // Instruction::CONST_STRING,
+ // Instruction::CONST_STRING_JUMBO,
+ // Instruction::CONST_CLASS,
+ // Instruction::MONITOR_ENTER,
+ // Instruction::MONITOR_EXIT,
+ // Instruction::CHECK_CAST,
+ // Instruction::INSTANCE_OF,
+ // Instruction::ARRAY_LENGTH,
+ // Instruction::NEW_INSTANCE,
+ // Instruction::NEW_ARRAY,
+ // Instruction::FILLED_NEW_ARRAY,
+ // Instruction::FILLED_NEW_ARRAY_RANGE,
+ // Instruction::FILL_ARRAY_DATA,
+ // Instruction::THROW,
+ // Instruction::GOTO,
+ // Instruction::GOTO_16,
+ // Instruction::GOTO_32,
+ // Instruction::PACKED_SWITCH,
+ // Instruction::SPARSE_SWITCH,
+ // Instruction::CMPL_FLOAT,
+ // Instruction::CMPG_FLOAT,
+ // Instruction::CMPL_DOUBLE,
+ // Instruction::CMPG_DOUBLE,
+ // Instruction::CMP_LONG,
+ // Instruction::IF_EQ,
+ // Instruction::IF_NE,
+ // Instruction::IF_LT,
+ // Instruction::IF_GE,
+ // Instruction::IF_GT,
+ // Instruction::IF_LE,
+ // Instruction::IF_EQZ,
+ // Instruction::IF_NEZ,
+ // Instruction::IF_LTZ,
+ // Instruction::IF_GEZ,
+ // Instruction::IF_GTZ,
+ // Instruction::IF_LEZ,
+ // Instruction::UNUSED_3E,
+ // Instruction::UNUSED_3F,
+ // Instruction::UNUSED_40,
+ // Instruction::UNUSED_41,
+ // Instruction::UNUSED_42,
+ // Instruction::UNUSED_43,
+ // Instruction::AGET,
+ // Instruction::AGET_WIDE,
+ // Instruction::AGET_OBJECT,
+ // Instruction::AGET_BOOLEAN,
+ // Instruction::AGET_BYTE,
+ // Instruction::AGET_CHAR,
+ // Instruction::AGET_SHORT,
+ // Instruction::APUT,
+ // Instruction::APUT_WIDE,
+ // Instruction::APUT_OBJECT,
+ // Instruction::APUT_BOOLEAN,
+ // Instruction::APUT_BYTE,
+ // Instruction::APUT_CHAR,
+ // Instruction::APUT_SHORT,
+ // Instruction::IGET,
+ // Instruction::IGET_WIDE,
+ // Instruction::IGET_OBJECT,
+ // Instruction::IGET_BOOLEAN,
+ // Instruction::IGET_BYTE,
+ // Instruction::IGET_CHAR,
+ // Instruction::IGET_SHORT,
+ // Instruction::IPUT,
+ // Instruction::IPUT_WIDE,
+ // Instruction::IPUT_OBJECT,
+ // Instruction::IPUT_BOOLEAN,
+ // Instruction::IPUT_BYTE,
+ // Instruction::IPUT_CHAR,
+ // Instruction::IPUT_SHORT,
+ // Instruction::SGET,
+ // Instruction::SGET_WIDE,
+ // Instruction::SGET_OBJECT,
+ // Instruction::SGET_BOOLEAN,
+ // Instruction::SGET_BYTE,
+ // Instruction::SGET_CHAR,
+ // Instruction::SGET_SHORT,
+ // Instruction::SPUT,
+ // Instruction::SPUT_WIDE,
+ // Instruction::SPUT_OBJECT,
+ // Instruction::SPUT_BOOLEAN,
+ // Instruction::SPUT_BYTE,
+ // Instruction::SPUT_CHAR,
+ // Instruction::SPUT_SHORT,
+ Instruction::INVOKE_VIRTUAL,
+ Instruction::INVOKE_SUPER,
+ Instruction::INVOKE_DIRECT,
+ Instruction::INVOKE_STATIC,
+ Instruction::INVOKE_INTERFACE,
+ // Instruction::RETURN_VOID_BARRIER,
+ // Instruction::INVOKE_VIRTUAL_RANGE,
+ // Instruction::INVOKE_SUPER_RANGE,
+ // Instruction::INVOKE_DIRECT_RANGE,
+ // Instruction::INVOKE_STATIC_RANGE,
+ // Instruction::INVOKE_INTERFACE_RANGE,
+ // Instruction::UNUSED_79,
+ // Instruction::UNUSED_7A,
+ // Instruction::NEG_INT,
+ // Instruction::NOT_INT,
+ // Instruction::NEG_LONG,
+ // Instruction::NOT_LONG,
+ // Instruction::NEG_FLOAT,
+ // Instruction::NEG_DOUBLE,
+ // Instruction::INT_TO_LONG,
+ // Instruction::INT_TO_FLOAT,
+ // Instruction::INT_TO_DOUBLE,
+ // Instruction::LONG_TO_INT,
+ // Instruction::LONG_TO_FLOAT,
+ // Instruction::LONG_TO_DOUBLE,
+ // Instruction::FLOAT_TO_INT,
+ // Instruction::FLOAT_TO_LONG,
+ // Instruction::FLOAT_TO_DOUBLE,
+ // Instruction::DOUBLE_TO_INT,
+ // Instruction::DOUBLE_TO_LONG,
+ // Instruction::DOUBLE_TO_FLOAT,
+ // Instruction::INT_TO_BYTE,
+ // Instruction::INT_TO_CHAR,
+ // Instruction::INT_TO_SHORT,
+ // Instruction::ADD_INT,
+ // Instruction::SUB_INT,
+ // Instruction::MUL_INT,
+ // Instruction::DIV_INT,
+ // Instruction::REM_INT,
+ // Instruction::AND_INT,
+ // Instruction::OR_INT,
+ // Instruction::XOR_INT,
+ // Instruction::SHL_INT,
+ // Instruction::SHR_INT,
+ // Instruction::USHR_INT,
+ // Instruction::ADD_LONG,
+ // Instruction::SUB_LONG,
+ // Instruction::MUL_LONG,
+ // Instruction::DIV_LONG,
+ // Instruction::REM_LONG,
+ // Instruction::AND_LONG,
+ // Instruction::OR_LONG,
+ // Instruction::XOR_LONG,
+ // Instruction::SHL_LONG,
+ // Instruction::SHR_LONG,
+ // Instruction::USHR_LONG,
+ // Instruction::ADD_FLOAT,
+ // Instruction::SUB_FLOAT,
+ // Instruction::MUL_FLOAT,
+ // Instruction::DIV_FLOAT,
+ // Instruction::REM_FLOAT,
+ // Instruction::ADD_DOUBLE,
+ // Instruction::SUB_DOUBLE,
+ // Instruction::MUL_DOUBLE,
+ // Instruction::DIV_DOUBLE,
+ // Instruction::REM_DOUBLE,
+ // Instruction::ADD_INT_2ADDR,
+ // Instruction::SUB_INT_2ADDR,
+ // Instruction::MUL_INT_2ADDR,
+ // Instruction::DIV_INT_2ADDR,
+ // Instruction::REM_INT_2ADDR,
+ // Instruction::AND_INT_2ADDR,
+ // Instruction::OR_INT_2ADDR,
+ // Instruction::XOR_INT_2ADDR,
+ // Instruction::SHL_INT_2ADDR,
+ // Instruction::SHR_INT_2ADDR,
+ // Instruction::USHR_INT_2ADDR,
+ // Instruction::ADD_LONG_2ADDR,
+ // Instruction::SUB_LONG_2ADDR,
+ // Instruction::MUL_LONG_2ADDR,
+ // Instruction::DIV_LONG_2ADDR,
+ // Instruction::REM_LONG_2ADDR,
+ // Instruction::AND_LONG_2ADDR,
+ // Instruction::OR_LONG_2ADDR,
+ // Instruction::XOR_LONG_2ADDR,
+ // Instruction::SHL_LONG_2ADDR,
+ // Instruction::SHR_LONG_2ADDR,
+ // Instruction::USHR_LONG_2ADDR,
+ // Instruction::ADD_FLOAT_2ADDR,
+ // Instruction::SUB_FLOAT_2ADDR,
+ // Instruction::MUL_FLOAT_2ADDR,
+ // Instruction::DIV_FLOAT_2ADDR,
+ // Instruction::REM_FLOAT_2ADDR,
+ // Instruction::ADD_DOUBLE_2ADDR,
+ // Instruction::SUB_DOUBLE_2ADDR,
+ // Instruction::MUL_DOUBLE_2ADDR,
+ // Instruction::DIV_DOUBLE_2ADDR,
+ // Instruction::REM_DOUBLE_2ADDR,
+ // Instruction::ADD_INT_LIT16,
+ // Instruction::RSUB_INT,
+ // Instruction::MUL_INT_LIT16,
+ // Instruction::DIV_INT_LIT16,
+ // Instruction::REM_INT_LIT16,
+ // Instruction::AND_INT_LIT16,
+ // Instruction::OR_INT_LIT16,
+ // Instruction::XOR_INT_LIT16,
+ // Instruction::ADD_INT_LIT8,
+ // Instruction::RSUB_INT_LIT8,
+ // Instruction::MUL_INT_LIT8,
+ // Instruction::DIV_INT_LIT8,
+ // Instruction::REM_INT_LIT8,
+ // Instruction::AND_INT_LIT8,
+ // Instruction::OR_INT_LIT8,
+ // Instruction::XOR_INT_LIT8,
+ // Instruction::SHL_INT_LIT8,
+ // Instruction::SHR_INT_LIT8,
+ // Instruction::USHR_INT_LIT8,
+ // Instruction::IGET_QUICK,
+ // Instruction::IGET_WIDE_QUICK,
+ // Instruction::IGET_OBJECT_QUICK,
+ // Instruction::IPUT_QUICK,
+ // Instruction::IPUT_WIDE_QUICK,
+ // Instruction::IPUT_OBJECT_QUICK,
+ // Instruction::INVOKE_VIRTUAL_QUICK,
+ // Instruction::INVOKE_VIRTUAL_RANGE_QUICK,
+ // Instruction::UNUSED_EB,
+ // Instruction::UNUSED_EC,
+ // Instruction::UNUSED_ED,
+ // Instruction::UNUSED_EE,
+ // Instruction::UNUSED_EF,
+ // Instruction::UNUSED_F0,
+ // Instruction::UNUSED_F1,
+ // Instruction::UNUSED_F2,
+ // Instruction::UNUSED_F3,
+ // Instruction::UNUSED_F4,
+ // Instruction::UNUSED_F5,
+ // Instruction::UNUSED_F6,
+ // Instruction::UNUSED_F7,
+ // Instruction::UNUSED_F8,
+ // Instruction::UNUSED_F9,
+ // Instruction::UNUSED_FA,
+ // Instruction::UNUSED_FB,
+ // Instruction::UNUSED_FC,
+ // Instruction::UNUSED_FD,
+ // Instruction::UNUSED_FE,
+ // Instruction::UNUSED_FF,
+
+ // ----- ExtendedMIROpcode -----
+ // kMirOpPhi,
+ // kMirOpCopy,
+ // kMirOpFusedCmplFloat,
+ // kMirOpFusedCmpgFloat,
+ // kMirOpFusedCmplDouble,
+ // kMirOpFusedCmpgDouble,
+ // kMirOpFusedCmpLong,
+ // kMirOpNop,
+ // kMirOpNullCheck,
+ // kMirOpRangeCheck,
+ // kMirOpDivZeroCheck,
+ // kMirOpCheck,
+ // kMirOpCheckPart2,
+ // kMirOpSelect,
+ // kMirOpLast,
+};
+
+// TODO: Remove this when we are able to compile everything.
+static bool CanCompileShorty(const char* shorty) {
+ uint32_t shorty_size = strlen(shorty);
+ CHECK_GE(shorty_size, 1u);
+ // Set a limitation on maximum number of parameters.
+ // Note : there is an implied "method*" parameter, and probably "this" as well.
+ // 1 is for the return type. Currently, we only accept 2 parameters at the most.
+ if (shorty_size > (1 + 2)) {
+ return false;
+ }
+ // Z : boolean
+ // B : byte
+ // S : short
+ // C : char
+ // I : int
+ // L : long
+ // F : float
+ // D : double
+ // L : reference(object, array)
+ // V : void
+ // Current calling conversion only support 32bit softfp
+ // which has problems with long, float, double
+ constexpr char supported_types[] = "ZBSCILV";
+ for (uint32_t i = 0; i < shorty_size; i++) {
+ if (strchr(supported_types, shorty[i]) == nullptr) {
+ return false;
+ }
+ }
+ return true;
+};
+
+// TODO: Remove this when we are able to compile everything.
+// Skip the method that we do not support currently.
+static bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file,
+ CompilationUnit& cu) {
+ // There is some limitation with current ARM 64 backend.
+ if (cu.instruction_set == kArm64) {
+ // Check if we can compile the prototype.
+ const char* shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx));
+ if (!CanCompileShorty(shorty)) {
+ VLOG(compiler) << "Unsupported shorty : " << shorty;
+ return false;
+ }
+
+ for (int idx = 0; idx < cu.mir_graph->GetNumBlocks(); idx++) {
+ BasicBlock *bb = cu.mir_graph->GetBasicBlock(idx);
+ if (bb == NULL) continue;
+ if (bb->block_type == kDead) continue;
+ for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
+ int opcode = mir->dalvikInsn.opcode;
+ // Check if we support the byte code.
+ if (std::find(arm64_support_list, arm64_support_list + arraysize(arm64_support_list),
+ opcode) == arm64_support_list + arraysize(arm64_support_list)) {
+ if (opcode < kMirOpFirst) {
+ VLOG(compiler) << "Unsupported dalvik byte code : "
+ << mir->dalvikInsn.opcode;
+ } else {
+ VLOG(compiler) << "Unsupported extended MIR opcode : "
+ << MIRGraph::extended_mir_op_names_[opcode - kMirOpFirst];
+ }
+ return false;
+ }
+ // Check if it invokes a prototype that we cannot support.
+ if (Instruction::INVOKE_VIRTUAL == opcode ||
+ Instruction::INVOKE_SUPER == opcode ||
+ Instruction::INVOKE_DIRECT == opcode ||
+ Instruction::INVOKE_STATIC == opcode ||
+ Instruction::INVOKE_INTERFACE == opcode) {
+ uint32_t invoke_method_idx = mir->dalvikInsn.vB;
+ const char* invoke_method_shorty = dex_file.GetMethodShorty(
+ dex_file.GetMethodId(invoke_method_idx));
+ if (!CanCompileShorty(invoke_method_shorty)) {
+ VLOG(compiler) << "Unsupported to invoke '"
+ << PrettyMethod(invoke_method_idx, dex_file)
+ << "' with shorty : " << invoke_method_shorty;
+ return false;
+ }
+ }
+ }
+ }
+
+ LOG(INFO) << "Using experimental instruction set A64 for "
+ << PrettyMethod(method_idx, dex_file);
+ }
+ return true;
+}
+
static CompiledMethod* CompileMethod(CompilerDriver& driver,
Compiler* compiler,
const DexFile::CodeItem* code_item,
@@ -162,6 +526,7 @@
cu.compiler = compiler;
// TODO: x86_64 & arm64 are not yet implemented.
CHECK((cu.instruction_set == kThumb2) ||
+ (cu.instruction_set == kArm64) ||
(cu.instruction_set == kX86) ||
(cu.instruction_set == kX86_64) ||
(cu.instruction_set == kMips));
@@ -214,6 +579,11 @@
(1 << kPromoteCompilerTemps));
}
+ if (cu.instruction_set == kArm64) {
+ // TODO(Arm64): enable optimizations once backend is mature enough.
+ cu.disable_opt = ~(uint32_t)0;
+ }
+
cu.StartTimingSplit("BuildMIRGraph");
cu.mir_graph.reset(new MIRGraph(&cu, &cu.arena));
@@ -241,6 +611,12 @@
cu.mir_graph->InlineMethod(code_item, access_flags, invoke_type, class_def_idx, method_idx,
class_loader, dex_file);
+ // TODO(Arm64): Remove this when we are able to compile everything.
+ if (!CanCompileMethod(method_idx, dex_file, cu)) {
+ VLOG(compiler) << "Cannot compile method : " << PrettyMethod(method_idx, dex_file);
+ return nullptr;
+ }
+
cu.NewTimingSplit("MIROpt:CheckFilters");
if (cu.mir_graph->SkipCompilation()) {
return NULL;
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index 5cc994f..413b4e0 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -735,18 +735,20 @@
if (pred_bb->block_type == kDalvikByteCode) {
// Check to see if predecessor had an explicit null-check.
MIR* last_insn = pred_bb->last_mir_insn;
- Instruction::Code last_opcode = last_insn->dalvikInsn.opcode;
- if (last_opcode == Instruction::IF_EQZ) {
- if (pred_bb->fall_through == bb->id) {
- // The fall-through of a block following a IF_EQZ, set the vA of the IF_EQZ to show that
- // it can't be null.
- ssa_regs_to_check->ClearBit(last_insn->ssa_rep->uses[0]);
- }
- } else if (last_opcode == Instruction::IF_NEZ) {
- if (pred_bb->taken == bb->id) {
- // The taken block following a IF_NEZ, set the vA of the IF_NEZ to show that it can't be
- // null.
- ssa_regs_to_check->ClearBit(last_insn->ssa_rep->uses[0]);
+ if (last_insn != nullptr) {
+ Instruction::Code last_opcode = last_insn->dalvikInsn.opcode;
+ if (last_opcode == Instruction::IF_EQZ) {
+ if (pred_bb->fall_through == bb->id) {
+ // The fall-through of a block following a IF_EQZ, set the vA of the IF_EQZ to show that
+ // it can't be null.
+ ssa_regs_to_check->ClearBit(last_insn->ssa_rep->uses[0]);
+ }
+ } else if (last_opcode == Instruction::IF_NEZ) {
+ if (pred_bb->taken == bb->id) {
+ // The taken block following a IF_NEZ, set the vA of the IF_NEZ to show that it can't be
+ // null.
+ ssa_regs_to_check->ClearBit(last_insn->ssa_rep->uses[0]);
+ }
}
}
}
@@ -895,7 +897,7 @@
temp_scoped_alloc_.get(), temp_bit_vector_size_, false, kBitMapNullCheck);
nce_changed = ssa_regs_to_check->GetHighestBitSet() != -1;
bb->data_flow_info->ending_check_v->Copy(ssa_regs_to_check);
- } else if (!ssa_regs_to_check->Equal(bb->data_flow_info->ending_check_v)) {
+ } else if (!ssa_regs_to_check->SameBitsSet(bb->data_flow_info->ending_check_v)) {
nce_changed = true;
bb->data_flow_info->ending_check_v->Copy(ssa_regs_to_check);
}
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 8b4576c..aab6b46 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -32,19 +32,20 @@
bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
LIR* CheckSuspendUsingLoad() OVERRIDE;
RegStorage LoadHelper(ThreadOffset<4> offset);
- LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size,
- int s_reg);
+ LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
+ OpSize size) OVERRIDE;
LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale,
- OpSize size);
+ OpSize size) OVERRIDE;
LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
- RegStorage r_dest, OpSize size, int s_reg);
+ RegStorage r_dest, OpSize size) OVERRIDE;
LIR* LoadConstantNoClobber(RegStorage r_dest, int value);
LIR* LoadConstantWide(RegStorage r_dest, int64_t value);
- LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, OpSize size);
+ LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
+ OpSize size) OVERRIDE;
LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale,
- OpSize size);
+ OpSize size) OVERRIDE;
LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
- RegStorage r_src, OpSize size, int s_reg);
+ RegStorage r_src, OpSize size) OVERRIDE;
void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
// Required for target - register utilities.
@@ -171,8 +172,7 @@
void OpRegCopyWide(RegStorage dest, RegStorage src);
void OpTlsCmp(ThreadOffset<4> offset, int val);
- LIR* LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size,
- int s_reg);
+ LIR* LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size);
LIR* StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src, OpSize size);
LIR* OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2,
int shift);
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 8dd31d1..0948ce3 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -692,7 +692,7 @@
} else {
DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
// Unaligned load with LDR and LDRSH is allowed on ARMv7 with SCTLR.A set to 0.
- LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, INVALID_SREG);
+ LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size);
StoreValue(rl_dest, rl_result);
}
return true;
@@ -1170,7 +1170,7 @@
}
FreeTemp(reg_len);
}
- LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size, INVALID_SREG);
+ LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size);
MarkPossibleNullPointerException(opt_flags);
if (!constant_index) {
FreeTemp(reg_ptr);
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index b7b9093..1745c18 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -825,7 +825,7 @@
* performing null check, incoming MIR can be null.
*/
LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest,
- OpSize size, int s_reg) {
+ OpSize size) {
LIR* load = NULL;
ArmOpcode opcode = kThumbBkpt;
bool short_form = false;
@@ -833,30 +833,32 @@
bool all_low = r_dest.Is32Bit() && r_base.Low8() && r_dest.Low8();
int encoded_disp = displacement;
bool already_generated = false;
- bool null_pointer_safepoint = false;
switch (size) {
case kDouble:
// Intentional fall-though.
- case k64:
+ case k64: {
+ DCHECK_EQ(displacement & 3, 0);
+ encoded_disp = (displacement & 1020) >> 2; // Within range of kThumb2Vldrd/kThumb2LdrdI8.
+ RegStorage r_ptr = r_base;
+ if ((displacement & ~1020) != 0) {
+ // For core register load, use the r_dest.GetLow() for the temporary pointer.
+ r_ptr = r_dest.IsFloat() ? AllocTemp() : r_dest.GetLow();
+ // Add displacement & ~1020 to base, it's a single instruction for up to +-256KiB.
+ OpRegRegImm(kOpAdd, r_ptr, r_base, displacement & ~1020);
+ }
if (r_dest.IsFloat()) {
DCHECK(!r_dest.IsPair());
- opcode = kThumb2Vldrd;
- if (displacement <= 1020) {
- short_form = true;
- encoded_disp >>= 2;
- }
+ load = NewLIR3(kThumb2Vldrd, r_dest.GetReg(), r_ptr.GetReg(), encoded_disp);
} else {
- if (displacement <= 1020) {
- load = NewLIR4(kThumb2LdrdI8, r_dest.GetLowReg(), r_dest.GetHighReg(), r_base.GetReg(),
- displacement >> 2);
- } else {
- load = LoadBaseDispBody(r_base, displacement, r_dest.GetLow(), k32, s_reg);
- null_pointer_safepoint = true;
- LoadBaseDispBody(r_base, displacement + 4, r_dest.GetHigh(), k32, INVALID_SREG);
- }
- already_generated = true;
+ load = NewLIR4(kThumb2LdrdI8, r_dest.GetLowReg(), r_dest.GetHighReg(), r_base.GetReg(),
+ encoded_disp);
}
+ if ((displacement & ~1020) != 0 && !r_dest.IsFloat()) {
+ FreeTemp(r_ptr);
+ }
+ already_generated = true;
break;
+ }
case kSingle:
// Intentional fall-though.
case k32:
@@ -935,7 +937,7 @@
if (r_dest.IsFloat()) {
// No index ops - must use a long sequence. Turn the offset into a direct pointer.
OpRegReg(kOpAdd, reg_offset, r_base);
- load = LoadBaseDispBody(reg_offset, 0, r_dest, size, s_reg);
+ load = LoadBaseDispBody(reg_offset, 0, r_dest, size);
} else {
load = LoadBaseIndexed(r_base, reg_offset, r_dest, 0, size);
}
@@ -946,22 +948,16 @@
// TODO: in future may need to differentiate Dalvik accesses w/ spills
if (r_base == rs_rARM_SP) {
AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit());
- } else {
- // We might need to generate a safepoint if we have two store instructions (wide or double).
- if (!Runtime::Current()->ExplicitNullChecks() && null_pointer_safepoint) {
- MarkSafepointPC(load);
- }
}
return load;
}
-LIR* ArmMir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size,
- int s_reg) {
+LIR* ArmMir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size) {
// TODO: base this on target.
if (size == kWord) {
size = k32;
}
- return LoadBaseDispBody(r_base, displacement, r_dest, size, s_reg);
+ return LoadBaseDispBody(r_base, displacement, r_dest, size);
}
@@ -974,29 +970,31 @@
bool all_low = r_src.Is32Bit() && r_base.Low8() && r_src.Low8();
int encoded_disp = displacement;
bool already_generated = false;
- bool null_pointer_safepoint = false;
switch (size) {
- case k64:
case kDouble:
- if (!r_src.IsFloat()) {
- if (displacement <= 1020) {
- store = NewLIR4(kThumb2StrdI8, r_src.GetLowReg(), r_src.GetHighReg(), r_base.GetReg(),
- displacement >> 2);
- } else {
- store = StoreBaseDispBody(r_base, displacement, r_src.GetLow(), k32);
- null_pointer_safepoint = true;
- StoreBaseDispBody(r_base, displacement + 4, r_src.GetHigh(), k32);
- }
- already_generated = true;
- } else {
- DCHECK(!r_src.IsPair());
- opcode = kThumb2Vstrd;
- if (displacement <= 1020) {
- short_form = true;
- encoded_disp >>= 2;
- }
+ // Intentional fall-though.
+ case k64: {
+ DCHECK_EQ(displacement & 3, 0);
+ encoded_disp = (displacement & 1020) >> 2; // Within range of kThumb2Vstrd/kThumb2StrdI8.
+ RegStorage r_ptr = r_base;
+ if ((displacement & ~1020) != 0) {
+ r_ptr = AllocTemp();
+ // Add displacement & ~1020 to base, it's a single instruction for up to +-256KiB.
+ OpRegRegImm(kOpAdd, r_ptr, r_base, displacement & ~1020);
}
+ if (r_src.IsFloat()) {
+ DCHECK(!r_src.IsPair());
+ store = NewLIR3(kThumb2Vstrd, r_src.GetReg(), r_ptr.GetReg(), encoded_disp);
+ } else {
+ store = NewLIR4(kThumb2StrdI8, r_src.GetLowReg(), r_src.GetHighReg(), r_ptr.GetReg(),
+ encoded_disp);
+ }
+ if ((displacement & ~1020) != 0) {
+ FreeTemp(r_ptr);
+ }
+ already_generated = true;
break;
+ }
case kSingle:
// Intentional fall-through.
case k32:
@@ -1070,11 +1068,6 @@
// TODO: In future, may need to differentiate Dalvik & spill accesses
if (r_base == rs_rARM_SP) {
AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit());
- } else {
- // We might need to generate a safepoint if we have two store instructions (wide or double).
- if (!Runtime::Current()->ExplicitNullChecks() && null_pointer_safepoint) {
- MarkSafepointPC(store);
- }
}
return store;
}
@@ -1119,7 +1112,7 @@
}
LIR* ArmMir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
- int displacement, RegStorage r_src, OpSize size, int s_reg) {
+ int displacement, RegStorage r_src, OpSize size) {
LOG(FATAL) << "Unexpected use of StoreBaseIndexedDisp for Arm";
return NULL;
}
@@ -1130,7 +1123,7 @@
}
LIR* ArmMir2Lir::LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
- int displacement, RegStorage r_dest, OpSize size, int s_reg) {
+ int displacement, RegStorage r_dest, OpSize size) {
LOG(FATAL) << "Unexpected use of LoadBaseIndexedDisp for Arm";
return NULL;
}
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index c6d6295..452c8d7 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -22,6 +22,8 @@
namespace art {
/*
+ * TODO(Arm64): the comments below are outdated.
+ *
* Runtime register usage conventions.
*
* r0-r3: Argument registers in both Dalvik and C/C++ conventions.
@@ -29,12 +31,12 @@
* pointer in r0 as a hidden arg0. Otherwise used as codegen scratch
* registers.
* r0-r1: As in C/C++ r0 is 32-bit return register and r0/r1 is 64-bit
- * r4 : (rARM_SUSPEND) is reserved (suspend check/debugger assist)
+ * r4 : (rA64_SUSPEND) is reserved (suspend check/debugger assist)
* r5 : Callee save (promotion target)
* r6 : Callee save (promotion target)
* r7 : Callee save (promotion target)
* r8 : Callee save (promotion target)
- * r9 : (rARM_SELF) is reserved (pointer to thread-local storage)
+ * r9 : (rA64_SELF) is reserved (pointer to thread-local storage)
* r10 : Callee save (promotion target)
* r11 : Callee save (promotion target)
* r12 : Scratch, may be trashed by linkage stubs
@@ -93,452 +95,284 @@
* +========================+
*/
+#if 1
+#define A64_PTR_SIZE 4
+#define A64_GET_INT_OFFS(offs) ((offs).Int32Value())
+#else
+// Not yet ready for this.
+#define A64_PTR_SIZE 8
+#define A64_GET_INT_OFFS(offs) ((offs).Int32Value())
+#endif
+
+#define A64_QUICK_ENTRYPOINT_OFFSET(name) QUICK_ENTRYPOINT_OFFSET(A64_PTR_SIZE, name)
+#define A64_QUICK_ENTRYPOINT_INT_OFFS(name) A64_GET_INT_OFFS(A64_QUICK_ENTRYPOINT_OFFSET(name))
+#define A64_THREAD_THIN_LOCK_ID_OFFSET A64_GET_INT_OFFS(Thread::ThinLockIdOffset<A64_PTR_SIZE>())
+#define A64_THREAD_EXCEPTION_INT_OFFS A64_GET_INT_OFFS(Thread::ExceptionOffset<A64_PTR_SIZE>())
+#define A64_THREAD_CARD_TABLE_INT_OFFS A64_GET_INT_OFFS(Thread::CardTableOffset<A64_PTR_SIZE>())
+#define A64_THREAD_STACK_END_INT_OFFS A64_GET_INT_OFFS(Thread::StackEndOffset<A64_PTR_SIZE>())
+#define A64_THREAD_SUSPEND_TRIGGER_OFFSET \
+ A64_GET_INT_OFFS(Thread::ThreadSuspendTriggerOffset<A64_PTR_SIZE>())
+typedef ThreadOffset<A64_PTR_SIZE> A64ThreadOffset;
+
+// Offset to distinguish FP regs.
+#define ARM_FP_REG_OFFSET 32
// First FP callee save.
#define ARM_FP_CALLEE_SAVE_BASE 16
+// Mask to strip off fp flags.
+#define ARM_FP_REG_MASK (ARM_FP_REG_OFFSET - 1)
+
+// Temporary macros, used to mark code which wants to distinguish betweek zr/sp.
+#define A64_REG_IS_SP(reg_num) ((reg_num) == rwsp || (reg_num) == rsp)
+#define A64_REG_IS_ZR(reg_num) ((reg_num) == rwzr || (reg_num) == rxzr)
+
enum ArmResourceEncodingPos {
kArmGPReg0 = 0,
- kArmRegSP = 13,
- kArmRegLR = 14,
- kArmRegPC = 15,
- kArmFPReg0 = 16,
- kArmFPReg16 = 32,
- kArmRegEnd = 48,
+ kArmRegLR = 30,
+ kArmRegSP = 31,
+ kArmFPReg0 = 32,
+ kArmRegEnd = 64,
};
-#define ENCODE_ARM_REG_LIST(N) (static_cast<uint64_t>(N))
#define ENCODE_ARM_REG_SP (1ULL << kArmRegSP)
#define ENCODE_ARM_REG_LR (1ULL << kArmRegLR)
-#define ENCODE_ARM_REG_PC (1ULL << kArmRegPC)
-#define ENCODE_ARM_REG_FPCS_LIST(N) (static_cast<uint64_t>(N) << kArmFPReg16)
-enum ArmNativeRegisterPool {
- r0 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 0,
- r1 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 1,
- r2 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 2,
- r3 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 3,
- rARM_SUSPEND = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 4,
- r5 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 5,
- r6 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 6,
- r7 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 7,
- r8 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 8,
- rARM_SELF = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 9,
- r10 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 10,
- r11 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 11,
- r12 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 12,
- r13sp = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 13,
- rARM_SP = r13sp,
- r14lr = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 14,
- rARM_LR = r14lr,
- r15pc = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 15,
- rARM_PC = r15pc,
+#define IS_SIGNED_IMM(size, value) \
+ ((value) >= -(1 << ((size) - 1)) && (value) < (1 << ((size) - 1)))
+#define IS_SIGNED_IMM7(value) IS_SIGNED_IMM(7, value)
+#define IS_SIGNED_IMM9(value) IS_SIGNED_IMM(9, value)
+#define IS_SIGNED_IMM12(value) IS_SIGNED_IMM(12, value)
+#define IS_SIGNED_IMM19(value) IS_SIGNED_IMM(19, value)
+#define IS_SIGNED_IMM21(value) IS_SIGNED_IMM(21, value)
- fr0 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 0,
- fr1 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 1,
- fr2 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 2,
- fr3 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 3,
- fr4 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 4,
- fr5 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 5,
- fr6 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 6,
- fr7 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 7,
- fr8 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 8,
- fr9 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 9,
- fr10 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 10,
- fr11 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 11,
- fr12 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 12,
- fr13 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 13,
- fr14 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 14,
- fr15 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 15,
- fr16 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 16,
- fr17 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 17,
- fr18 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 18,
- fr19 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 19,
- fr20 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 20,
- fr21 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 21,
- fr22 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 22,
- fr23 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 23,
- fr24 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 24,
- fr25 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 25,
- fr26 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 26,
- fr27 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 27,
- fr28 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 28,
- fr29 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 29,
- fr30 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 30,
- fr31 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 31,
+// Quick macro used to define the registers.
+#define A64_REGISTER_CODE_LIST(R) \
+ R(0) R(1) R(2) R(3) R(4) R(5) R(6) R(7) \
+ R(8) R(9) R(10) R(11) R(12) R(13) R(14) R(15) \
+ R(16) R(17) R(18) R(19) R(20) R(21) R(22) R(23) \
+ R(24) R(25) R(26) R(27) R(28) R(29) R(30) R(31)
- dr0 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 0,
- dr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 1,
- dr2 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 2,
- dr3 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 3,
- dr4 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 4,
- dr5 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 5,
- dr6 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 6,
- dr7 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 7,
- dr8 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 8,
- dr9 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 9,
- dr10 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 10,
- dr11 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 11,
- dr12 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 12,
- dr13 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 13,
- dr14 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14,
- dr15 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 15,
-#if 0
- // Enable when def/use and runtime able to handle these.
- dr16 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 16,
- dr17 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 17,
- dr18 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 18,
- dr19 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 19,
- dr20 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 20,
- dr21 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 21,
- dr22 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 22,
- dr23 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 23,
- dr24 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 24,
- dr25 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 25,
- dr26 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 26,
- dr27 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 27,
- dr28 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 28,
- dr29 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 29,
- dr30 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 30,
- dr31 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 31,
-#endif
+// Registers (integer) values.
+// TODO(Arm64): for now we define rx##nr identically to rw##nr. We should rather define rx##nr as
+// a k64BitSolo. We should do this once the register allocator is ready.
+enum A64NativeRegisterPool {
+# define A64_DEFINE_REGISTERS(nr) \
+ rw##nr = RegStorage::k32BitSolo | RegStorage::kCoreRegister | nr, \
+ rx##nr = RegStorage::k32BitSolo | RegStorage::kCoreRegister | nr, \
+ rf##nr = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | nr, \
+ rd##nr = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | nr,
+ A64_REGISTER_CODE_LIST(A64_DEFINE_REGISTERS)
+#undef A64_DEFINE_REGISTERS
+
+ // TODO(Arm64): can we change the lines below such that rwzr != rwsp && rxzr != rsp?
+ // This would be desirable to allow detecting usage-errors in the assembler.
+ rwzr = rw31,
+ rxzr = rx31,
+ rwsp = rw31,
+ rsp = rx31,
+ rA64_SUSPEND = rx4,
+ rA64_SELF = rx18,
+ rA64_SP = rx31,
+ rA64_LR = rx30
};
-constexpr RegStorage rs_r0(RegStorage::kValid | r0);
-constexpr RegStorage rs_r1(RegStorage::kValid | r1);
-constexpr RegStorage rs_r2(RegStorage::kValid | r2);
-constexpr RegStorage rs_r3(RegStorage::kValid | r3);
-constexpr RegStorage rs_rARM_SUSPEND(RegStorage::kValid | rARM_SUSPEND);
-constexpr RegStorage rs_r5(RegStorage::kValid | r5);
-constexpr RegStorage rs_r6(RegStorage::kValid | r6);
-constexpr RegStorage rs_r7(RegStorage::kValid | r7);
-constexpr RegStorage rs_r8(RegStorage::kValid | r8);
-constexpr RegStorage rs_rARM_SELF(RegStorage::kValid | rARM_SELF);
-constexpr RegStorage rs_r10(RegStorage::kValid | r10);
-constexpr RegStorage rs_r11(RegStorage::kValid | r11);
-constexpr RegStorage rs_r12(RegStorage::kValid | r12);
-constexpr RegStorage rs_r13sp(RegStorage::kValid | r13sp);
-constexpr RegStorage rs_rARM_SP(RegStorage::kValid | rARM_SP);
-constexpr RegStorage rs_r14lr(RegStorage::kValid | r14lr);
-constexpr RegStorage rs_rARM_LR(RegStorage::kValid | rARM_LR);
-constexpr RegStorage rs_r15pc(RegStorage::kValid | r15pc);
-constexpr RegStorage rs_rARM_PC(RegStorage::kValid | rARM_PC);
-constexpr RegStorage rs_invalid(RegStorage::kInvalid);
+#define A64_DEFINE_REGSTORAGES(nr) \
+ constexpr RegStorage rs_w##nr(RegStorage::kValid | rw##nr); \
+ constexpr RegStorage rs_x##nr(RegStorage::kValid | rx##nr); \
+ constexpr RegStorage rs_f##nr(RegStorage::kValid | rf##nr); \
+ constexpr RegStorage rs_d##nr(RegStorage::kValid | rd##nr);
+A64_REGISTER_CODE_LIST(A64_DEFINE_REGSTORAGES)
+#undef A64_DEFINE_REGSTORAGES
-constexpr RegStorage rs_fr0(RegStorage::kValid | fr0);
-constexpr RegStorage rs_fr1(RegStorage::kValid | fr1);
-constexpr RegStorage rs_fr2(RegStorage::kValid | fr2);
-constexpr RegStorage rs_fr3(RegStorage::kValid | fr3);
-constexpr RegStorage rs_fr4(RegStorage::kValid | fr4);
-constexpr RegStorage rs_fr5(RegStorage::kValid | fr5);
-constexpr RegStorage rs_fr6(RegStorage::kValid | fr6);
-constexpr RegStorage rs_fr7(RegStorage::kValid | fr7);
-constexpr RegStorage rs_fr8(RegStorage::kValid | fr8);
-constexpr RegStorage rs_fr9(RegStorage::kValid | fr9);
-constexpr RegStorage rs_fr10(RegStorage::kValid | fr10);
-constexpr RegStorage rs_fr11(RegStorage::kValid | fr11);
-constexpr RegStorage rs_fr12(RegStorage::kValid | fr12);
-constexpr RegStorage rs_fr13(RegStorage::kValid | fr13);
-constexpr RegStorage rs_fr14(RegStorage::kValid | fr14);
-constexpr RegStorage rs_fr15(RegStorage::kValid | fr15);
-constexpr RegStorage rs_fr16(RegStorage::kValid | fr16);
-constexpr RegStorage rs_fr17(RegStorage::kValid | fr17);
-constexpr RegStorage rs_fr18(RegStorage::kValid | fr18);
-constexpr RegStorage rs_fr19(RegStorage::kValid | fr19);
-constexpr RegStorage rs_fr20(RegStorage::kValid | fr20);
-constexpr RegStorage rs_fr21(RegStorage::kValid | fr21);
-constexpr RegStorage rs_fr22(RegStorage::kValid | fr22);
-constexpr RegStorage rs_fr23(RegStorage::kValid | fr23);
-constexpr RegStorage rs_fr24(RegStorage::kValid | fr24);
-constexpr RegStorage rs_fr25(RegStorage::kValid | fr25);
-constexpr RegStorage rs_fr26(RegStorage::kValid | fr26);
-constexpr RegStorage rs_fr27(RegStorage::kValid | fr27);
-constexpr RegStorage rs_fr28(RegStorage::kValid | fr28);
-constexpr RegStorage rs_fr29(RegStorage::kValid | fr29);
-constexpr RegStorage rs_fr30(RegStorage::kValid | fr30);
-constexpr RegStorage rs_fr31(RegStorage::kValid | fr31);
+constexpr RegStorage rs_wzr(RegStorage::kValid | rwzr);
+constexpr RegStorage rs_xzr(RegStorage::kValid | rxzr);
+constexpr RegStorage rs_rA64_SUSPEND(RegStorage::kValid | rA64_SUSPEND);
+constexpr RegStorage rs_rA64_SELF(RegStorage::kValid | rA64_SELF);
+constexpr RegStorage rs_rA64_SP(RegStorage::kValid | rA64_SP);
+constexpr RegStorage rs_rA64_LR(RegStorage::kValid | rA64_LR);
-constexpr RegStorage rs_dr0(RegStorage::kValid | dr0);
-constexpr RegStorage rs_dr1(RegStorage::kValid | dr1);
-constexpr RegStorage rs_dr2(RegStorage::kValid | dr2);
-constexpr RegStorage rs_dr3(RegStorage::kValid | dr3);
-constexpr RegStorage rs_dr4(RegStorage::kValid | dr4);
-constexpr RegStorage rs_dr5(RegStorage::kValid | dr5);
-constexpr RegStorage rs_dr6(RegStorage::kValid | dr6);
-constexpr RegStorage rs_dr7(RegStorage::kValid | dr7);
-constexpr RegStorage rs_dr8(RegStorage::kValid | dr8);
-constexpr RegStorage rs_dr9(RegStorage::kValid | dr9);
-constexpr RegStorage rs_dr10(RegStorage::kValid | dr10);
-constexpr RegStorage rs_dr11(RegStorage::kValid | dr11);
-constexpr RegStorage rs_dr12(RegStorage::kValid | dr12);
-constexpr RegStorage rs_dr13(RegStorage::kValid | dr13);
-constexpr RegStorage rs_dr14(RegStorage::kValid | dr14);
-constexpr RegStorage rs_dr15(RegStorage::kValid | dr15);
-#if 0
-constexpr RegStorage rs_dr16(RegStorage::kValid | dr16);
-constexpr RegStorage rs_dr17(RegStorage::kValid | dr17);
-constexpr RegStorage rs_dr18(RegStorage::kValid | dr18);
-constexpr RegStorage rs_dr19(RegStorage::kValid | dr19);
-constexpr RegStorage rs_dr20(RegStorage::kValid | dr20);
-constexpr RegStorage rs_dr21(RegStorage::kValid | dr21);
-constexpr RegStorage rs_dr22(RegStorage::kValid | dr22);
-constexpr RegStorage rs_dr23(RegStorage::kValid | dr23);
-constexpr RegStorage rs_dr24(RegStorage::kValid | dr24);
-constexpr RegStorage rs_dr25(RegStorage::kValid | dr25);
-constexpr RegStorage rs_dr26(RegStorage::kValid | dr26);
-constexpr RegStorage rs_dr27(RegStorage::kValid | dr27);
-constexpr RegStorage rs_dr28(RegStorage::kValid | dr28);
-constexpr RegStorage rs_dr29(RegStorage::kValid | dr29);
-constexpr RegStorage rs_dr30(RegStorage::kValid | dr30);
-constexpr RegStorage rs_dr31(RegStorage::kValid | dr31);
-#endif
+// RegisterLocation templates return values (following the hard-float calling convention).
+const RegLocation arm_loc_c_return =
+ {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_w0, INVALID_SREG, INVALID_SREG};
+const RegLocation arm_loc_c_return_wide =
+ {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_x0, INVALID_SREG, INVALID_SREG};
+const RegLocation arm_loc_c_return_float =
+ {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_f0, INVALID_SREG, INVALID_SREG};
+const RegLocation arm_loc_c_return_double =
+ {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_d0, INVALID_SREG, INVALID_SREG};
-// RegisterLocation templates return values (r0, or r0/r1).
-const RegLocation arm_loc_c_return
- {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1,
- RegStorage(RegStorage::k32BitSolo, r0), INVALID_SREG, INVALID_SREG};
-const RegLocation arm_loc_c_return_wide
- {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
- RegStorage(RegStorage::k64BitPair, r0, r1), INVALID_SREG, INVALID_SREG};
-const RegLocation arm_loc_c_return_float
- {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1,
- RegStorage(RegStorage::k32BitSolo, r0), INVALID_SREG, INVALID_SREG};
-const RegLocation arm_loc_c_return_double
- {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
- RegStorage(RegStorage::k64BitPair, r0, r1), INVALID_SREG, INVALID_SREG};
+/**
+ * @brief Shift-type to be applied to a register via EncodeShift().
+ */
+enum A64ShiftEncodings {
+ kA64Lsl = 0x0,
+ kA64Lsr = 0x1,
+ kA64Asr = 0x2,
+ kA64Ror = 0x3
+};
-enum ArmShiftEncodings {
- kArmLsl = 0x0,
- kArmLsr = 0x1,
- kArmAsr = 0x2,
- kArmRor = 0x3
+/**
+ * @brief Extend-type to be applied to a register via EncodeExtend().
+ */
+enum A64RegExtEncodings {
+ kA64Uxtb = 0x0,
+ kA64Uxth = 0x1,
+ kA64Uxtw = 0x2,
+ kA64Uxtx = 0x3,
+ kA64Sxtb = 0x4,
+ kA64Sxth = 0x5,
+ kA64Sxtw = 0x6,
+ kA64Sxtx = 0x7
+};
+
+#define ENCODE_NO_SHIFT (EncodeShift(kA64Lsl, 0))
+
+/*
+ * The following enum defines the list of supported A64 instructions by the
+ * assembler. Their corresponding EncodingMap positions will be defined in
+ * assemble_arm64.cc.
+ */
+enum ArmOpcode {
+ kA64First = 0,
+ kA64Adc3rrr = kA64First, // adc [00011010000] rm[20-16] [000000] rn[9-5] rd[4-0].
+ kA64Add4RRdT, // add [s001000100] imm_12[21-10] rn[9-5] rd[4-0].
+ kA64Add4rrro, // add [00001011000] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0].
+ kA64Adr2xd, // adr [0] immlo[30-29] [10000] immhi[23-5] rd[4-0].
+ kA64And3Rrl, // and [00010010] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
+ kA64And4rrro, // and [00001010] shift[23-22] [N=0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
+ kA64Asr3rrd, // asr [0001001100] immr[21-16] imms[15-10] rn[9-5] rd[4-0].
+ kA64Asr3rrr, // asr alias of "sbfm arg0, arg1, arg2, {#31/#63}".
+ kA64B2ct, // b.cond [01010100] imm_19[23-5] [0] cond[3-0].
+ kA64Blr1x, // blr [1101011000111111000000] rn[9-5] [00000].
+ kA64Br1x, // br [1101011000011111000000] rn[9-5] [00000].
+ kA64Brk1d, // brk [11010100001] imm_16[20-5] [00000].
+ kA64B1t, // b [00010100] offset_26[25-0].
+ kA64Cbnz2rt, // cbnz[00110101] imm_19[23-5] rt[4-0].
+ kA64Cbz2rt, // cbz [00110100] imm_19[23-5] rt[4-0].
+ kA64Cmn3Rro, // cmn [s0101011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] [11111].
+ kA64Cmn3RdT, // cmn [00110001] shift[23-22] imm_12[21-10] rn[9-5] [11111].
+ kA64Cmp3Rro, // cmp [s1101011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] [11111].
+ kA64Cmp3RdT, // cmp [01110001] shift[23-22] imm_12[21-10] rn[9-5] [11111].
+ kA64Csel4rrrc, // csel[s0011010100] rm[20-16] cond[15-12] [00] rn[9-5] rd[4-0].
+ kA64Csinc4rrrc, // csinc [s0011010100] rm[20-16] cond[15-12] [01] rn[9-5] rd[4-0].
+ kA64Csneg4rrrc, // csneg [s1011010100] rm[20-16] cond[15-12] [01] rn[9-5] rd[4-0].
+ kA64Dmb1B, // dmb [11010101000000110011] CRm[11-8] [10111111].
+ kA64Eor3Rrl, // eor [s10100100] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
+ kA64Eor4rrro, // eor [s1001010] shift[23-22] [0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
+ kA64Extr4rrrd, // extr[s00100111N0] rm[20-16] imm_s[15-10] rn[9-5] rd[4-0].
+ kA64Fabs2ff, // fabs[000111100s100000110000] rn[9-5] rd[4-0].
+ kA64Fadd3fff, // fadd[000111100s1] rm[20-16] [001010] rn[9-5] rd[4-0].
+ kA64Fcmp1f, // fcmp[000111100s100000001000] rn[9-5] [01000].
+ kA64Fcmp2ff, // fcmp[000111100s1] rm[20-16] [001000] rn[9-5] [00000].
+ kA64Fcvtzs2wf, // fcvtzs [000111100s111000000000] rn[9-5] rd[4-0].
+ kA64Fcvtzs2xf, // fcvtzs [100111100s111000000000] rn[9-5] rd[4-0].
+ kA64Fcvt2Ss, // fcvt [0001111000100010110000] rn[9-5] rd[4-0].
+ kA64Fcvt2sS, // fcvt [0001111001100010010000] rn[9-5] rd[4-0].
+ kA64Fdiv3fff, // fdiv[000111100s1] rm[20-16] [000110] rn[9-5] rd[4-0].
+ kA64Fmov2ff, // fmov[000111100s100000010000] rn[9-5] rd[4-0].
+ kA64Fmov2fI, // fmov[000111100s1] imm_8[20-13] [10000000] rd[4-0].
+ kA64Fmov2sw, // fmov[0001111000100111000000] rn[9-5] rd[4-0].
+ kA64Fmov2Sx, // fmov[1001111001100111000000] rn[9-5] rd[4-0].
+ kA64Fmov2ws, // fmov[0001111001101110000000] rn[9-5] rd[4-0].
+ kA64Fmov2xS, // fmov[1001111001101111000000] rn[9-5] rd[4-0].
+ kA64Fmul3fff, // fmul[000111100s1] rm[20-16] [000010] rn[9-5] rd[4-0].
+ kA64Fneg2ff, // fneg[000111100s100001010000] rn[9-5] rd[4-0].
+ kA64Frintz2ff, // frintz [000111100s100101110000] rn[9-5] rd[4-0].
+ kA64Fsqrt2ff, // fsqrt[000111100s100001110000] rn[9-5] rd[4-0].
+ kA64Fsub3fff, // fsub[000111100s1] rm[20-16] [001110] rn[9-5] rd[4-0].
+ kA64Ldrb3wXd, // ldrb[0011100101] imm_12[21-10] rn[9-5] rt[4-0].
+ kA64Ldrb3wXx, // ldrb[00111000011] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+ kA64Ldrsb3rXd, // ldrsb[001110011s] imm_12[21-10] rn[9-5] rt[4-0].
+ kA64Ldrsb3rXx, // ldrsb[0011 1000 1s1] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+ kA64Ldrh3wXF, // ldrh[0111100101] imm_12[21-10] rn[9-5] rt[4-0].
+ kA64Ldrh4wXxd, // ldrh[01111000011] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+ kA64Ldrsh3rXF, // ldrsh[011110011s] imm_12[21-10] rn[9-5] rt[4-0].
+ kA64Ldrsh4rXxd, // ldrsh[011110001s1] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0]
+ kA64Ldr2fp, // ldr [0s011100] imm_19[23-5] rt[4-0].
+ kA64Ldr2rp, // ldr [0s011000] imm_19[23-5] rt[4-0].
+ kA64Ldr3fXD, // ldr [1s11110100] imm_12[21-10] rn[9-5] rt[4-0].
+ kA64Ldr3rXD, // ldr [1s111000010] imm_9[20-12] [01] rn[9-5] rt[4-0].
+ kA64Ldr4fXxG, // ldr [1s111100011] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+ kA64Ldr4rXxG, // ldr [1s111000011] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+ kA64LdrPost3rXd, // ldr [1s111000010] imm_9[20-12] [01] rn[9-5] rt[4-0].
+ kA64Ldp4rrXD, // ldp [s010100101] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
+ kA64LdpPost4rrXD, // ldp [s010100011] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
+ kA64Ldur3fXd, // ldur[1s111100010] imm_9[20-12] [00] rn[9-5] rt[4-0].
+ kA64Ldur3rXd, // ldur[1s111000010] imm_9[20-12] [00] rn[9-5] rt[4-0].
+ kA64Ldxr2rX, // ldxr[1s00100001011111011111] rn[9-5] rt[4-0].
+ kA64Lsl3rrr, // lsl [s0011010110] rm[20-16] [001000] rn[9-5] rd[4-0].
+ kA64Lsr3rrd, // lsr alias of "ubfm arg0, arg1, arg2, #{31/63}".
+ kA64Lsr3rrr, // lsr [s0011010110] rm[20-16] [001001] rn[9-5] rd[4-0].
+ kA64Movk3rdM, // mov [010100101] hw[22-21] imm_16[20-5] rd[4-0].
+ kA64Movn3rdM, // mov [000100101] hw[22-21] imm_16[20-5] rd[4-0].
+ kA64Movz3rdM, // mov [011100101] hw[22-21] imm_16[20-5] rd[4-0].
+ kA64Mov2rr, // mov [00101010000] rm[20-16] [000000] [11111] rd[4-0].
+ kA64Mvn2rr, // mov [00101010001] rm[20-16] [000000] [11111] rd[4-0].
+ kA64Mul3rrr, // mul [00011011000] rm[20-16] [011111] rn[9-5] rd[4-0].
+ kA64Neg3rro, // neg alias of "sub arg0, rzr, arg1, arg2".
+ kA64Orr3Rrl, // orr [s01100100] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
+ kA64Orr4rrro, // orr [s0101010] shift[23-22] [0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
+ kA64Ret, // ret [11010110010111110000001111000000].
+ kA64Rev2rr, // rev [s10110101100000000001x] rn[9-5] rd[4-0].
+ kA64Rev162rr, // rev16[s101101011000000000001] rn[9-5] rd[4-0].
+ kA64Ror3rrr, // ror [s0011010110] rm[20-16] [001011] rn[9-5] rd[4-0].
+ kA64Sbc3rrr, // sbc [s0011010000] rm[20-16] [000000] rn[9-5] rd[4-0].
+ kA64Sbfm4rrdd, // sbfm[0001001100] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
+ kA64Scvtf2fw, // scvtf [000111100s100010000000] rn[9-5] rd[4-0].
+ kA64Scvtf2fx, // scvtf [100111100s100010000000] rn[9-5] rd[4-0].
+ kA64Sdiv3rrr, // sdiv[s0011010110] rm[20-16] [000011] rn[9-5] rd[4-0].
+ kA64Smaddl4xwwx, // smaddl [10011011001] rm[20-16] [0] ra[14-10] rn[9-5] rd[4-0].
+ kA64Stp4rrXD, // stp [s010100101] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
+ kA64StpPost4rrXD, // stp [s010100010] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
+ kA64StpPre4rrXD, // stp [s010100110] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
+ kA64Str3fXD, // str [1s11110100] imm_12[21-10] rn[9-5] rt[4-0].
+ kA64Str4fXxG, // str [1s111100001] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+ kA64Str3rXD, // str [1s11100100] imm_12[21-10] rn[9-5] rt[4-0].
+ kA64Str4rXxG, // str [1s111000001] rm[20-16] option[15-13] S[12-12] [10] rn[9-5] rt[4-0].
+ kA64Strb3wXd, // strb[0011100100] imm_12[21-10] rn[9-5] rt[4-0].
+ kA64Strb3wXx, // strb[00111000001] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+ kA64Strh3wXF, // strh[0111100100] imm_12[21-10] rn[9-5] rt[4-0].
+ kA64Strh4wXxd, // strh[01111000001] rm[20-16] [011] S[12] [10] rn[9-5] rt[4-0].
+ kA64StrPost3rXd, // str [1s111000000] imm_9[20-12] [01] rn[9-5] rt[4-0].
+ kA64Stur3fXd, // stur[1s111100000] imm_9[20-12] [00] rn[9-5] rt[4-0].
+ kA64Stur3rXd, // stur[1s111000000] imm_9[20-12] [00] rn[9-5] rt[4-0].
+ kA64Stxr3wrX, // stxr[11001000000] rs[20-16] [011111] rn[9-5] rt[4-0].
+ kA64Sub4RRdT, // sub [s101000100] imm_12[21-10] rn[9-5] rd[4-0].
+ kA64Sub4rrro, // sub [s1001011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0].
+ kA64Subs3rRd, // subs[s111000100] imm_12[21-10] rn[9-5] rd[4-0].
+ kA64Tst3rro, // tst alias of "ands rzr, arg1, arg2, arg3".
+ kA64Ubfm4rrdd, // ubfm[s10100110] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
+ kA64Last,
+ kA64NotWide = 0, // Flag used to select the first instruction variant.
+ kA64Wide = 0x1000 // Flag used to select the second instruction variant.
};
/*
- * The following enum defines the list of supported Thumb instructions by the
- * assembler. Their corresponding EncodingMap positions will be defined in
- * Assemble.cc.
+ * The A64 instruction set provides two variants for many instructions. For example, "mov wN, wM"
+ * and "mov xN, xM" or - for floating point instructions - "mov sN, sM" and "mov dN, dM".
+ * It definitely makes sense to exploit this symmetries of the instruction set. We do this via the
+ * WIDE, UNWIDE macros. For opcodes that allow it, the wide variant can be obtained by applying the
+ * WIDE macro to the non-wide opcode. E.g. WIDE(kA64Sub4RRdT).
*/
-enum ArmOpcode {
- kArmFirst = 0,
- kArm16BitData = kArmFirst, // DATA [0] rd[15..0].
- kThumbAdcRR, // adc [0100000101] rm[5..3] rd[2..0].
- kThumbAddRRI3, // add(1) [0001110] imm_3[8..6] rn[5..3] rd[2..0].
- kThumbAddRI8, // add(2) [00110] rd[10..8] imm_8[7..0].
- kThumbAddRRR, // add(3) [0001100] rm[8..6] rn[5..3] rd[2..0].
- kThumbAddRRLH, // add(4) [01000100] H12[01] rm[5..3] rd[2..0].
- kThumbAddRRHL, // add(4) [01001000] H12[10] rm[5..3] rd[2..0].
- kThumbAddRRHH, // add(4) [01001100] H12[11] rm[5..3] rd[2..0].
- kThumbAddPcRel, // add(5) [10100] rd[10..8] imm_8[7..0].
- kThumbAddSpRel, // add(6) [10101] rd[10..8] imm_8[7..0].
- kThumbAddSpI7, // add(7) [101100000] imm_7[6..0].
- kThumbAndRR, // and [0100000000] rm[5..3] rd[2..0].
- kThumbAsrRRI5, // asr(1) [00010] imm_5[10..6] rm[5..3] rd[2..0].
- kThumbAsrRR, // asr(2) [0100000100] rs[5..3] rd[2..0].
- kThumbBCond, // b(1) [1101] cond[11..8] offset_8[7..0].
- kThumbBUncond, // b(2) [11100] offset_11[10..0].
- kThumbBicRR, // bic [0100001110] rm[5..3] rd[2..0].
- kThumbBkpt, // bkpt [10111110] imm_8[7..0].
- kThumbBlx1, // blx(1) [111] H[10] offset_11[10..0].
- kThumbBlx2, // blx(1) [111] H[01] offset_11[10..0].
- kThumbBl1, // blx(1) [111] H[10] offset_11[10..0].
- kThumbBl2, // blx(1) [111] H[11] offset_11[10..0].
- kThumbBlxR, // blx(2) [010001111] rm[6..3] [000].
- kThumbBx, // bx [010001110] H2[6..6] rm[5..3] SBZ[000].
- kThumbCmnRR, // cmn [0100001011] rm[5..3] rd[2..0].
- kThumbCmpRI8, // cmp(1) [00101] rn[10..8] imm_8[7..0].
- kThumbCmpRR, // cmp(2) [0100001010] rm[5..3] rd[2..0].
- kThumbCmpLH, // cmp(3) [01000101] H12[01] rm[5..3] rd[2..0].
- kThumbCmpHL, // cmp(3) [01000110] H12[10] rm[5..3] rd[2..0].
- kThumbCmpHH, // cmp(3) [01000111] H12[11] rm[5..3] rd[2..0].
- kThumbEorRR, // eor [0100000001] rm[5..3] rd[2..0].
- kThumbLdmia, // ldmia [11001] rn[10..8] reglist [7..0].
- kThumbLdrRRI5, // ldr(1) [01101] imm_5[10..6] rn[5..3] rd[2..0].
- kThumbLdrRRR, // ldr(2) [0101100] rm[8..6] rn[5..3] rd[2..0].
- kThumbLdrPcRel, // ldr(3) [01001] rd[10..8] imm_8[7..0].
- kThumbLdrSpRel, // ldr(4) [10011] rd[10..8] imm_8[7..0].
- kThumbLdrbRRI5, // ldrb(1) [01111] imm_5[10..6] rn[5..3] rd[2..0].
- kThumbLdrbRRR, // ldrb(2) [0101110] rm[8..6] rn[5..3] rd[2..0].
- kThumbLdrhRRI5, // ldrh(1) [10001] imm_5[10..6] rn[5..3] rd[2..0].
- kThumbLdrhRRR, // ldrh(2) [0101101] rm[8..6] rn[5..3] rd[2..0].
- kThumbLdrsbRRR, // ldrsb [0101011] rm[8..6] rn[5..3] rd[2..0].
- kThumbLdrshRRR, // ldrsh [0101111] rm[8..6] rn[5..3] rd[2..0].
- kThumbLslRRI5, // lsl(1) [00000] imm_5[10..6] rm[5..3] rd[2..0].
- kThumbLslRR, // lsl(2) [0100000010] rs[5..3] rd[2..0].
- kThumbLsrRRI5, // lsr(1) [00001] imm_5[10..6] rm[5..3] rd[2..0].
- kThumbLsrRR, // lsr(2) [0100000011] rs[5..3] rd[2..0].
- kThumbMovImm, // mov(1) [00100] rd[10..8] imm_8[7..0].
- kThumbMovRR, // mov(2) [0001110000] rn[5..3] rd[2..0].
- kThumbMovRR_H2H, // mov(3) [01000111] H12[11] rm[5..3] rd[2..0].
- kThumbMovRR_H2L, // mov(3) [01000110] H12[01] rm[5..3] rd[2..0].
- kThumbMovRR_L2H, // mov(3) [01000101] H12[10] rm[5..3] rd[2..0].
- kThumbMul, // mul [0100001101] rm[5..3] rd[2..0].
- kThumbMvn, // mvn [0100001111] rm[5..3] rd[2..0].
- kThumbNeg, // neg [0100001001] rm[5..3] rd[2..0].
- kThumbOrr, // orr [0100001100] rm[5..3] rd[2..0].
- kThumbPop, // pop [1011110] r[8..8] rl[7..0].
- kThumbPush, // push [1011010] r[8..8] rl[7..0].
- kThumbRev, // rev [1011101000] rm[5..3] rd[2..0]
- kThumbRevsh, // revsh [1011101011] rm[5..3] rd[2..0]
- kThumbRorRR, // ror [0100000111] rs[5..3] rd[2..0].
- kThumbSbc, // sbc [0100000110] rm[5..3] rd[2..0].
- kThumbStmia, // stmia [11000] rn[10..8] reglist [7.. 0].
- kThumbStrRRI5, // str(1) [01100] imm_5[10..6] rn[5..3] rd[2..0].
- kThumbStrRRR, // str(2) [0101000] rm[8..6] rn[5..3] rd[2..0].
- kThumbStrSpRel, // str(3) [10010] rd[10..8] imm_8[7..0].
- kThumbStrbRRI5, // strb(1) [01110] imm_5[10..6] rn[5..3] rd[2..0].
- kThumbStrbRRR, // strb(2) [0101010] rm[8..6] rn[5..3] rd[2..0].
- kThumbStrhRRI5, // strh(1) [10000] imm_5[10..6] rn[5..3] rd[2..0].
- kThumbStrhRRR, // strh(2) [0101001] rm[8..6] rn[5..3] rd[2..0].
- kThumbSubRRI3, // sub(1) [0001111] imm_3[8..6] rn[5..3] rd[2..0]*/
- kThumbSubRI8, // sub(2) [00111] rd[10..8] imm_8[7..0].
- kThumbSubRRR, // sub(3) [0001101] rm[8..6] rn[5..3] rd[2..0].
- kThumbSubSpI7, // sub(4) [101100001] imm_7[6..0].
- kThumbSwi, // swi [11011111] imm_8[7..0].
- kThumbTst, // tst [0100001000] rm[5..3] rn[2..0].
- kThumb2Vldrs, // vldr low sx [111011011001] rn[19..16] rd[15-12] [1010] imm_8[7..0].
- kThumb2Vldrd, // vldr low dx [111011011001] rn[19..16] rd[15-12] [1011] imm_8[7..0].
- kThumb2Vmuls, // vmul vd, vn, vm [111011100010] rn[19..16] rd[15-12] [10100000] rm[3..0].
- kThumb2Vmuld, // vmul vd, vn, vm [111011100010] rn[19..16] rd[15-12] [10110000] rm[3..0].
- kThumb2Vstrs, // vstr low sx [111011011000] rn[19..16] rd[15-12] [1010] imm_8[7..0].
- kThumb2Vstrd, // vstr low dx [111011011000] rn[19..16] rd[15-12] [1011] imm_8[7..0].
- kThumb2Vsubs, // vsub vd, vn, vm [111011100011] rn[19..16] rd[15-12] [10100040] rm[3..0].
- kThumb2Vsubd, // vsub vd, vn, vm [111011100011] rn[19..16] rd[15-12] [10110040] rm[3..0].
- kThumb2Vadds, // vadd vd, vn, vm [111011100011] rn[19..16] rd[15-12] [10100000] rm[3..0].
- kThumb2Vaddd, // vadd vd, vn, vm [111011100011] rn[19..16] rd[15-12] [10110000] rm[3..0].
- kThumb2Vdivs, // vdiv vd, vn, vm [111011101000] rn[19..16] rd[15-12] [10100000] rm[3..0].
- kThumb2Vdivd, // vdiv vd, vn, vm [111011101000] rn[19..16] rd[15-12] [10110000] rm[3..0].
- kThumb2VmlaF64, // vmla.F64 vd, vn, vm [111011100000] vn[19..16] vd[15..12] [10110000] vm[3..0].
- kThumb2VcvtIF, // vcvt.F32.S32 vd, vm [1110111010111000] vd[15..12] [10101100] vm[3..0].
- kThumb2VcvtFI, // vcvt.S32.F32 vd, vm [1110111010111101] vd[15..12] [10101100] vm[3..0].
- kThumb2VcvtDI, // vcvt.S32.F32 vd, vm [1110111010111101] vd[15..12] [10111100] vm[3..0].
- kThumb2VcvtFd, // vcvt.F64.F32 vd, vm [1110111010110111] vd[15..12] [10101100] vm[3..0].
- kThumb2VcvtDF, // vcvt.F32.F64 vd, vm [1110111010110111] vd[15..12] [10111100] vm[3..0].
- kThumb2VcvtF64S32, // vcvt.F64.S32 vd, vm [1110111010111000] vd[15..12] [10111100] vm[3..0].
- kThumb2VcvtF64U32, // vcvt.F64.U32 vd, vm [1110111010111000] vd[15..12] [10110100] vm[3..0].
- kThumb2Vsqrts, // vsqrt.f32 vd, vm [1110111010110001] vd[15..12] [10101100] vm[3..0].
- kThumb2Vsqrtd, // vsqrt.f64 vd, vm [1110111010110001] vd[15..12] [10111100] vm[3..0].
- kThumb2MovI8M, // mov(T2) rd, #<const> [11110] i [00001001111] imm3 rd[11..8] imm8.
- kThumb2MovImm16, // mov(T3) rd, #<const> [11110] i [0010100] imm4 [0] imm3 rd[11..8] imm8.
- kThumb2StrRRI12, // str(Imm,T3) rd,[rn,#imm12] [111110001100] rn[19..16] rt[15..12] imm12[11..0].
- kThumb2LdrRRI12, // str(Imm,T3) rd,[rn,#imm12] [111110001100] rn[19..16] rt[15..12] imm12[11..0].
- kThumb2StrRRI8Predec, // str(Imm,T4) rd,[rn,#-imm8] [111110000100] rn[19..16] rt[15..12] [1100] imm[7..0].
- kThumb2LdrRRI8Predec, // ldr(Imm,T4) rd,[rn,#-imm8] [111110000101] rn[19..16] rt[15..12] [1100] imm[7..0].
- kThumb2Cbnz, // cbnz rd,<label> [101110] i [1] imm5[7..3] rn[2..0].
- kThumb2Cbz, // cbn rd,<label> [101100] i [1] imm5[7..3] rn[2..0].
- kThumb2AddRRI12, // add rd, rn, #imm12 [11110] i [100000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
- kThumb2MovRR, // mov rd, rm [11101010010011110000] rd[11..8] [0000] rm[3..0].
- kThumb2Vmovs, // vmov.f32 vd, vm [111011101] D [110000] vd[15..12] 101001] M [0] vm[3..0].
- kThumb2Vmovd, // vmov.f64 vd, vm [111011101] D [110000] vd[15..12] 101101] M [0] vm[3..0].
- kThumb2Ldmia, // ldmia [111010001001] rn[19..16] mask[15..0].
- kThumb2Stmia, // stmia [111010001000] rn[19..16] mask[15..0].
- kThumb2AddRRR, // add [111010110000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
- kThumb2SubRRR, // sub [111010111010] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
- kThumb2SbcRRR, // sbc [111010110110] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
- kThumb2CmpRR, // cmp [111010111011] rn[19..16] [0000] [1111] [0000] rm[3..0].
- kThumb2SubRRI12, // sub rd, rn, #imm12 [11110] i [101010] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
- kThumb2MvnI8M, // mov(T2) rd, #<const> [11110] i [00011011110] imm3 rd[11..8] imm8.
- kThumb2Sel, // sel rd, rn, rm [111110101010] rn[19-16] rd[11-8] rm[3-0].
- kThumb2Ubfx, // ubfx rd,rn,#lsb,#width [111100111100] rn[19..16] [0] imm3[14-12] rd[11-8] w[4-0].
- kThumb2Sbfx, // ubfx rd,rn,#lsb,#width [111100110100] rn[19..16] [0] imm3[14-12] rd[11-8] w[4-0].
- kThumb2LdrRRR, // ldr rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
- kThumb2LdrhRRR, // ldrh rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
- kThumb2LdrshRRR, // ldrsh rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
- kThumb2LdrbRRR, // ldrb rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
- kThumb2LdrsbRRR, // ldrsb rt,[rn,rm,LSL #imm] [111110000101] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
- kThumb2StrRRR, // str rt,[rn,rm,LSL #imm] [111110000100] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
- kThumb2StrhRRR, // str rt,[rn,rm,LSL #imm] [111110000010] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
- kThumb2StrbRRR, // str rt,[rn,rm,LSL #imm] [111110000000] rn[19-16] rt[15-12] [000000] imm[5-4] rm[3-0].
- kThumb2LdrhRRI12, // ldrh rt,[rn,#imm12] [111110001011] rt[15..12] rn[19..16] imm12[11..0].
- kThumb2LdrshRRI12, // ldrsh rt,[rn,#imm12] [111110011011] rt[15..12] rn[19..16] imm12[11..0].
- kThumb2LdrbRRI12, // ldrb rt,[rn,#imm12] [111110001001] rt[15..12] rn[19..16] imm12[11..0].
- kThumb2LdrsbRRI12, // ldrsb rt,[rn,#imm12] [111110011001] rt[15..12] rn[19..16] imm12[11..0].
- kThumb2StrhRRI12, // strh rt,[rn,#imm12] [111110001010] rt[15..12] rn[19..16] imm12[11..0].
- kThumb2StrbRRI12, // strb rt,[rn,#imm12] [111110001000] rt[15..12] rn[19..16] imm12[11..0].
- kThumb2Pop, // pop [1110100010111101] list[15-0]*/
- kThumb2Push, // push [1110100100101101] list[15-0]*/
- kThumb2CmpRI8M, // cmp rn, #<const> [11110] i [011011] rn[19-16] [0] imm3 [1111] imm8[7..0].
- kThumb2CmnRI8M, // cmn rn, #<const> [11110] i [010001] rn[19-16] [0] imm3 [1111] imm8[7..0].
- kThumb2AdcRRR, // adc [111010110101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
- kThumb2AndRRR, // and [111010100000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
- kThumb2BicRRR, // bic [111010100010] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
- kThumb2CmnRR, // cmn [111010110001] rn[19..16] [0000] [1111] [0000] rm[3..0].
- kThumb2EorRRR, // eor [111010101000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
- kThumb2MulRRR, // mul [111110110000] rn[19..16] [1111] rd[11..8] [0000] rm[3..0].
- kThumb2SdivRRR, // sdiv [111110111001] rn[19..16] [1111] rd[11..8] [1111] rm[3..0].
- kThumb2UdivRRR, // udiv [111110111011] rn[19..16] [1111] rd[11..8] [1111] rm[3..0].
- kThumb2MnvRR, // mvn [11101010011011110] rd[11-8] [0000] rm[3..0].
- kThumb2RsubRRI8M, // rsb rd, rn, #<const> [11110] i [011101] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
- kThumb2NegRR, // actually rsub rd, rn, #0.
- kThumb2OrrRRR, // orr [111010100100] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
- kThumb2TstRR, // tst [111010100001] rn[19..16] [0000] [1111] [0000] rm[3..0].
- kThumb2LslRRR, // lsl [111110100000] rn[19..16] [1111] rd[11..8] [0000] rm[3..0].
- kThumb2LsrRRR, // lsr [111110100010] rn[19..16] [1111] rd[11..8] [0000] rm[3..0].
- kThumb2AsrRRR, // asr [111110100100] rn[19..16] [1111] rd[11..8] [0000] rm[3..0].
- kThumb2RorRRR, // ror [111110100110] rn[19..16] [1111] rd[11..8] [0000] rm[3..0].
- kThumb2LslRRI5, // lsl [11101010010011110] imm[14.12] rd[11..8] [00] rm[3..0].
- kThumb2LsrRRI5, // lsr [11101010010011110] imm[14.12] rd[11..8] [01] rm[3..0].
- kThumb2AsrRRI5, // asr [11101010010011110] imm[14.12] rd[11..8] [10] rm[3..0].
- kThumb2RorRRI5, // ror [11101010010011110] imm[14.12] rd[11..8] [11] rm[3..0].
- kThumb2BicRRI8M, // bic rd, rn, #<const> [11110] i [000010] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
- kThumb2AndRRI8M, // and rd, rn, #<const> [11110] i [000000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
- kThumb2OrrRRI8M, // orr rd, rn, #<const> [11110] i [000100] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
- kThumb2EorRRI8M, // eor rd, rn, #<const> [11110] i [001000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
- kThumb2AddRRI8M, // add rd, rn, #<const> [11110] i [010001] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
- kThumb2AdcRRI8M, // adc rd, rn, #<const> [11110] i [010101] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
- kThumb2SubRRI8M, // sub rd, rn, #<const> [11110] i [011011] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
- kThumb2SbcRRI8M, // sub rd, rn, #<const> [11110] i [010111] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
- kThumb2RevRR, // rev [111110101001] rm[19..16] [1111] rd[11..8] 1000 rm[3..0]
- kThumb2RevshRR, // rev [111110101001] rm[19..16] [1111] rd[11..8] 1011 rm[3..0]
- kThumb2It, // it [10111111] firstcond[7-4] mask[3-0].
- kThumb2Fmstat, // fmstat [11101110111100011111101000010000].
- kThumb2Vcmpd, // vcmp [111011101] D [11011] rd[15-12] [1011] E [1] M [0] rm[3-0].
- kThumb2Vcmps, // vcmp [111011101] D [11010] rd[15-12] [1011] E [1] M [0] rm[3-0].
- kThumb2LdrPcRel12, // ldr rd,[pc,#imm12] [1111100011011111] rt[15-12] imm12[11-0].
- kThumb2BCond, // b<c> [1110] S cond[25-22] imm6[21-16] [10] J1 [0] J2 imm11[10..0].
- kThumb2Fmrs, // vmov [111011100000] vn[19-16] rt[15-12] [1010] N [0010000].
- kThumb2Fmsr, // vmov [111011100001] vn[19-16] rt[15-12] [1010] N [0010000].
- kThumb2Fmrrd, // vmov [111011000100] rt2[19-16] rt[15-12] [101100] M [1] vm[3-0].
- kThumb2Fmdrr, // vmov [111011000101] rt2[19-16] rt[15-12] [101100] M [1] vm[3-0].
- kThumb2Vabsd, // vabs.f64 [111011101] D [110000] rd[15-12] [1011110] M [0] vm[3-0].
- kThumb2Vabss, // vabs.f32 [111011101] D [110000] rd[15-12] [1010110] M [0] vm[3-0].
- kThumb2Vnegd, // vneg.f64 [111011101] D [110000] rd[15-12] [1011110] M [0] vm[3-0].
- kThumb2Vnegs, // vneg.f32 [111011101] D [110000] rd[15-12] [1010110] M [0] vm[3-0].
- kThumb2Vmovs_IMM8, // vmov.f32 [111011101] D [11] imm4h[19-16] vd[15-12] [10100000] imm4l[3-0].
- kThumb2Vmovd_IMM8, // vmov.f64 [111011101] D [11] imm4h[19-16] vd[15-12] [10110000] imm4l[3-0].
- kThumb2Mla, // mla [111110110000] rn[19-16] ra[15-12] rd[7-4] [0000] rm[3-0].
- kThumb2Umull, // umull [111110111010] rn[19-16], rdlo[15-12] rdhi[11-8] [0000] rm[3-0].
- kThumb2Ldrex, // ldrex [111010000101] rn[19-16] rt[15-12] [1111] imm8[7-0].
- kThumb2Ldrexd, // ldrexd [111010001101] rn[19-16] rt[15-12] rt2[11-8] [11111111].
- kThumb2Strex, // strex [111010000100] rn[19-16] rt[15-12] rd[11-8] imm8[7-0].
- kThumb2Strexd, // strexd [111010001100] rn[19-16] rt[15-12] rt2[11-8] [0111] Rd[3-0].
- kThumb2Clrex, // clrex [11110011101111111000111100101111].
- kThumb2Bfi, // bfi [111100110110] rn[19-16] [0] imm3[14-12] rd[11-8] imm2[7-6] [0] msb[4-0].
- kThumb2Bfc, // bfc [11110011011011110] [0] imm3[14-12] rd[11-8] imm2[7-6] [0] msb[4-0].
- kThumb2Dmb, // dmb [1111001110111111100011110101] option[3-0].
- kThumb2LdrPcReln12, // ldr rd,[pc,-#imm12] [1111100011011111] rt[15-12] imm12[11-0].
- kThumb2Stm, // stm <list> [111010010000] rn[19-16] 000 rl[12-0].
- kThumbUndefined, // undefined [11011110xxxxxxxx].
- kThumb2VPopCS, // vpop <list of callee save fp singles (s16+).
- kThumb2VPushCS, // vpush <list callee save fp singles (s16+).
- kThumb2Vldms, // vldms rd, <list>.
- kThumb2Vstms, // vstms rd, <list>.
- kThumb2BUncond, // b <label>.
- kThumb2MovImm16H, // similar to kThumb2MovImm16, but target high hw.
- kThumb2AddPCR, // Thumb2 2-operand add with hard-coded PC target.
- kThumb2Adr, // Special purpose encoding of ADR for switch tables.
- kThumb2MovImm16LST, // Special purpose version for switch table use.
- kThumb2MovImm16HST, // Special purpose version for switch table use.
- kThumb2LdmiaWB, // ldmia [111010011001[ rn[19..16] mask[15..0].
- kThumb2OrrRRRs, // orrs [111010100101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
- kThumb2Push1, // t3 encoding of push.
- kThumb2Pop1, // t3 encoding of pop.
- kThumb2RsubRRR, // rsb [111010111101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
- kThumb2Smull, // smull [111110111000] rn[19-16], rdlo[15-12] rdhi[11-8] [0000] rm[3-0].
- kThumb2LdrdPcRel8, // ldrd rt, rt2, pc +-/1024.
- kThumb2LdrdI8, // ldrd rt, rt2, [rn +-/1024].
- kThumb2StrdI8, // strd rt, rt2, [rn +-/1024].
- kArmLast,
-};
+
+// Return the wide and no-wide variants of the given opcode.
+#define WIDE(op) ((ArmOpcode)((op) | kA64Wide))
+#define UNWIDE(op) ((ArmOpcode)((op) & ~kA64Wide))
+
+// Whether the given opcode is wide.
+#define IS_WIDE(op) (((op) & kA64Wide) != 0)
+
+/*
+ * Floating point variants. These are just aliases of the macros above which we use for floating
+ * point instructions, just for readibility reasons.
+ * TODO(Arm64): should we remove these and use the original macros?
+ */
+#define FWIDE WIDE
+#define FUNWIDE UNWIDE
+#define IS_FWIDE IS_WIDE
+
+#define OP_KIND_UNWIDE(opcode) (opcode)
+#define OP_KIND_IS_WIDE(opcode) (false)
enum ArmOpDmbOptions {
kSY = 0xf,
@@ -551,40 +385,63 @@
// Instruction assembly field_loc kind.
enum ArmEncodingKind {
- kFmtUnused, // Unused field and marks end of formats.
+ // All the formats below are encoded in the same way (as a kFmtBitBlt).
+ // These are grouped together, for fast handling (e.g. "if (LIKELY(fmt <= kFmtBitBlt)) ...").
+ kFmtRegW = 0, // Word register (w) or wzr.
+ kFmtRegX, // Extended word register (x) or xzr.
+ kFmtRegR, // Register with same width as the instruction or zr.
+ kFmtRegWOrSp, // Word register (w) or wsp.
+ kFmtRegXOrSp, // Extended word register (x) or sp.
+ kFmtRegROrSp, // Register with same width as the instruction or sp.
+ kFmtRegS, // Single FP reg.
+ kFmtRegD, // Double FP reg.
+ kFmtRegF, // Single/double FP reg depending on the instruction width.
kFmtBitBlt, // Bit string using end/start.
- kFmtDfp, // Double FP reg.
- kFmtSfp, // Single FP reg.
- kFmtModImm, // Shifted 8-bit immed using [26,14..12,7..0].
- kFmtImm16, // Zero-extended immed using [26,19..16,14..12,7..0].
- kFmtImm6, // Encoded branch target using [9,7..3]0.
- kFmtImm12, // Zero-extended immediate using [26,14..12,7..0].
- kFmtShift, // Shift descriptor, [14..12,7..4].
- kFmtLsb, // least significant bit using [14..12][7..6].
- kFmtBWidth, // bit-field width, encoded as width-1.
- kFmtShift5, // Shift count, [14..12,7..6].
- kFmtBrOffset, // Signed extended [26,11,13,21-16,10-0]:0.
- kFmtFPImm, // Encoded floating point immediate.
- kFmtOff24, // 24-bit Thumb2 unconditional branch encoding.
+
+ // Less likely formats.
+ kFmtUnused, // Unused field and marks end of formats.
+ kFmtImm21, // Sign-extended immediate using [23..5,30..29].
+ kFmtShift, // Register shift, 9-bit at [23..21, 15..10]..
+ kFmtExtend, // Register extend, 9-bit at [23..21, 15..10].
kFmtSkip, // Unused field, but continue to next.
};
-// Struct used to define the snippet positions for each Thumb opcode.
+// Struct used to define the snippet positions for each A64 opcode.
struct ArmEncodingMap {
- uint32_t skeleton;
+ uint32_t wskeleton;
+ uint32_t xskeleton;
struct {
ArmEncodingKind kind;
- int end; // end for kFmtBitBlt, 1-bit slice end for FP regs.
- int start; // start for kFmtBitBlt, 4-bit slice end for FP regs.
+ int end; // end for kFmtBitBlt, 1-bit slice end for FP regs.
+ int start; // start for kFmtBitBlt, 4-bit slice end for FP regs.
} field_loc[4];
- ArmOpcode opcode;
+ ArmOpcode opcode; // can be WIDE()-ned to indicate it has a wide variant.
uint64_t flags;
const char* name;
const char* fmt;
- int size; // Note: size is in bytes.
+ int size; // Note: size is in bytes.
FixupKind fixup;
};
+#if 0
+// TODO(Arm64): try the following alternative, which fits exactly in one cache line (64 bytes).
+struct ArmEncodingMap {
+ uint32_t wskeleton;
+ uint32_t xskeleton;
+ uint64_t flags;
+ const char* name;
+ const char* fmt;
+ struct {
+ uint8_t kind;
+ int8_t end; // end for kFmtBitBlt, 1-bit slice end for FP regs.
+ int8_t start; // start for kFmtBitBlt, 4-bit slice end for FP regs.
+ } field_loc[4];
+ uint32_t fixup;
+ uint32_t opcode; // can be WIDE()-ned to indicate it has a wide variant.
+ uint32_t padding[3];
+};
+#endif
+
} // namespace art
#endif // ART_COMPILER_DEX_QUICK_ARM64_ARM64_LIR_H_
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index e79ebad..8accd0a 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -20,26 +20,47 @@
namespace art {
+// The macros below are exclusively used in the encoding map.
+
+// Most generic way of providing two variants for one instructions.
+#define CUSTOM_VARIANTS(variant1, variant2) variant1, variant2
+
+// Used for instructions which do not have a wide variant.
+#define NO_VARIANTS(variant) \
+ CUSTOM_VARIANTS(variant, 0)
+
+// Used for instructions which have a wide variant with the sf bit set to 1.
+#define SF_VARIANTS(sf0_skeleton) \
+ CUSTOM_VARIANTS(sf0_skeleton, (sf0_skeleton | 0x80000000))
+
+// Used for instructions which have a wide variant with the size bits set to either x0 or x1.
+#define SIZE_VARIANTS(sizex0_skeleton) \
+ CUSTOM_VARIANTS(sizex0_skeleton, (sizex0_skeleton | 0x40000000))
+
+// Used for instructions which have a wide variant with the sf and n bits set to 1.
+#define SF_N_VARIANTS(sf0_n0_skeleton) \
+ CUSTOM_VARIANTS(sf0_n0_skeleton, (sf0_n0_skeleton | 0x80400000))
+
+// Used for FP instructions which have a single and double precision variants, with he type bits set
+// to either 00 or 01.
+#define FLOAT_VARIANTS(type00_skeleton) \
+ CUSTOM_VARIANTS(type00_skeleton, (type00_skeleton | 0x00400000))
+
/*
* opcode: ArmOpcode enum
- * skeleton: pre-designated bit-pattern for this opcode
- * k0: key to applying ds/de
- * ds: dest start bit position
- * de: dest end bit position
- * k1: key to applying s1s/s1e
- * s1s: src1 start bit position
- * s1e: src1 end bit position
- * k2: key to applying s2s/s2e
- * s2s: src2 start bit position
- * s2e: src2 end bit position
- * operands: number of operands (for sanity check purposes)
+ * variants: instruction skeletons supplied via CUSTOM_VARIANTS or derived macros.
+ * a{n}k: key to applying argument {n} \
+ * a{n}s: argument {n} start bit position | n = 0, 1, 2, 3
+ * a{n}e: argument {n} end bit position /
+ * flags: instruction attributes (used in optimization)
* name: mnemonic name
* fmt: for pretty-printing
+ * fixup: used for second-pass fixes (e.g. adresses fixups in branch instructions).
*/
-#define ENCODING_MAP(opcode, skeleton, k0, ds, de, k1, s1s, s1e, k2, s2s, s2e, \
- k3, k3s, k3e, flags, name, fmt, size, fixup) \
- {skeleton, {{k0, ds, de}, {k1, s1s, s1e}, {k2, s2s, s2e}, \
- {k3, k3s, k3e}}, opcode, flags, name, fmt, size, fixup}
+#define ENCODING_MAP(opcode, variants, a0k, a0s, a0e, a1k, a1s, a1e, a2k, a2s, a2e, \
+ a3k, a3s, a3e, flags, name, fmt, fixup) \
+ {variants, {{a0k, a0s, a0e}, {a1k, a1s, a1e}, {a2k, a2s, a2e}, \
+ {a3k, a3s, a3e}}, opcode, flags, name, fmt, 4, fixup}
/* Instruction dump string format keys: !pf, where "!" is the start
* of the key, "p" is which numeric operand to use and "f" is the
@@ -52,989 +73,475 @@
* 3 -> operands[3] (extra)
*
* [f]ormats:
- * h -> 4-digit hex
* d -> decimal
+ * D -> decimal*4 or decimal*8 depending on the instruction width
* E -> decimal*4
* F -> decimal*2
- * c -> branch condition (beq, bne, etc.)
+ * G -> ", lsl #2" or ", lsl #3" depending on the instruction width
+ * c -> branch condition (eq, ne, etc.)
* t -> pc-relative target
- * u -> 1st half of bl[x] target
- * v -> 2nd half ob bl[x] target
- * R -> register list
+ * p -> pc-relative address
* s -> single precision floating point register
* S -> double precision floating point register
- * m -> Thumb2 modified immediate
- * n -> complimented Thumb2 modified immediate
- * M -> Thumb2 16-bit zero-extended immediate
- * b -> 4-digit binary
+ * f -> single or double precision register (depending on instruction width)
+ * I -> 8-bit immediate floating point number
+ * l -> logical immediate
+ * M -> 16-bit shift expression ("" or ", lsl #16" or ", lsl #32"...)
* B -> dmb option string (sy, st, ish, ishst, nsh, hshst)
* H -> operand shift
- * C -> core register name
- * P -> fp cs register list (base of s16)
- * Q -> fp cs register list (base of s0)
+ * T -> register shift (either ", lsl #0" or ", lsl #12")
+ * e -> register extend (e.g. uxtb #1)
+ * o -> register shift (e.g. lsl #1) for Word registers
+ * w -> word (32-bit) register wn, or wzr
+ * W -> word (32-bit) register wn, or wsp
+ * x -> extended (64-bit) register xn, or xzr
+ * X -> extended (64-bit) register xn, or sp
+ * r -> register with same width as instruction, r31 -> wzr, xzr
+ * R -> register with same width as instruction, r31 -> wsp, sp
*
* [!] escape. To insert "!", use "!!"
*/
-/* NOTE: must be kept in sync with enum ArmOpcode from LIR.h */
-const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kArmLast] = {
- ENCODING_MAP(kArm16BitData, 0x0000,
- kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_UNARY_OP, "data", "0x!0h(!0d)", 2, kFixupNone),
- ENCODING_MAP(kThumbAdcRR, 0x4140,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES | USES_CCODES,
- "adcs", "!0C, !1C", 2, kFixupNone),
- ENCODING_MAP(kThumbAddRRI3, 0x1c00,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
- kFmtUnused, -1, -1,
- IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
- "adds", "!0C, !1C, #!2d", 2, kFixupNone),
- ENCODING_MAP(kThumbAddRI8, 0x3000,
- kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES,
- "adds", "!0C, !0C, #!1d", 2, kFixupNone),
- ENCODING_MAP(kThumbAddRRR, 0x1800,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
- kFmtUnused, -1, -1,
- IS_TERTIARY_OP | REG_DEF0_USE12 | SETS_CCODES,
- "adds", "!0C, !1C, !2C", 2, kFixupNone),
- ENCODING_MAP(kThumbAddRRLH, 0x4440,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01,
- "add", "!0C, !1C", 2, kFixupNone),
- ENCODING_MAP(kThumbAddRRHL, 0x4480,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01,
- "add", "!0C, !1C", 2, kFixupNone),
- ENCODING_MAP(kThumbAddRRHH, 0x44c0,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01,
- "add", "!0C, !1C", 2, kFixupNone),
- ENCODING_MAP(kThumbAddPcRel, 0xa000,
- kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | IS_BRANCH | NEEDS_FIXUP,
- "add", "!0C, pc, #!1E", 2, kFixupLoad),
- ENCODING_MAP(kThumbAddSpRel, 0xa800,
- kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF_SP | REG_USE_SP,
- "add", "!0C, sp, #!2E", 2, kFixupNone),
- ENCODING_MAP(kThumbAddSpI7, 0xb000,
- kFmtBitBlt, 6, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP,
- "add", "sp, #!0d*4", 2, kFixupNone),
- ENCODING_MAP(kThumbAndRR, 0x4000,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
- "ands", "!0C, !1C", 2, kFixupNone),
- ENCODING_MAP(kThumbAsrRRI5, 0x1000,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
- kFmtUnused, -1, -1,
- IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
- "asrs", "!0C, !1C, #!2d", 2, kFixupNone),
- ENCODING_MAP(kThumbAsrRR, 0x4100,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
- "asrs", "!0C, !1C", 2, kFixupNone),
- ENCODING_MAP(kThumbBCond, 0xd000,
- kFmtBitBlt, 7, 0, kFmtBitBlt, 11, 8, kFmtUnused, -1, -1,
+/* NOTE: must be kept in sync with enum ArmOpcode from arm64_lir.h */
+const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = {
+ ENCODING_MAP(WIDE(kA64Adc3rrr), SF_VARIANTS(0x1a000000),
+ kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+ "adc", "!0r, !1r, !2r", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Add4RRdT), SF_VARIANTS(0x11000000),
+ kFmtRegROrSp, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10,
+ kFmtBitBlt, 23, 22, IS_QUAD_OP | REG_DEF0_USE1,
+ "add", "!0R, !1R, #!2d!3T", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Add4rrro), SF_VARIANTS(0x0b000000),
+ kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+ kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE1,
+ "add", "!0r, !1r, !2r!3o", kFixupNone),
+ // Note: adr is binary, but declared as tertiary. The third argument is used while doing the
+ // fixups and contains information to identify the adr label.
+ ENCODING_MAP(kA64Adr2xd, NO_VARIANTS(0x10000000),
+ kFmtRegX, 4, 0, kFmtImm21, -1, -1, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | NEEDS_FIXUP,
+ "adr", "!0x, #!1d", kFixupAdr),
+ ENCODING_MAP(WIDE(kA64And3Rrl), SF_VARIANTS(0x12000000),
+ kFmtRegROrSp, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 22, 10,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+ "and", "!0R, !1r, #!2l", kFixupNone),
+ ENCODING_MAP(WIDE(kA64And4rrro), SF_VARIANTS(0x0a000000),
+ kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+ kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
+ "and", "!0r, !1r, !2r!3o", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Asr3rrd), CUSTOM_VARIANTS(0x13007c00, 0x9340fc00),
+ kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 21, 16,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+ "asr", "!0r, !1r, #!2d", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Asr3rrr), SF_VARIANTS(0x1ac02800),
+ kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+ "asr", "!0r, !1r, !2r", kFixupNone),
+ ENCODING_MAP(kA64B2ct, NO_VARIANTS(0x54000000),
+ kFmtBitBlt, 3, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1,
kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | USES_CCODES |
- NEEDS_FIXUP, "b!1c", "!0t", 2, kFixupCondBranch),
- ENCODING_MAP(kThumbBUncond, 0xe000,
- kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP,
- "b", "!0t", 2, kFixupT1Branch),
- ENCODING_MAP(kThumbBicRR, 0x4380,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
- "bics", "!0C, !1C", 2, kFixupNone),
- ENCODING_MAP(kThumbBkpt, 0xbe00,
- kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH,
- "bkpt", "!0d", 2, kFixupNone),
- ENCODING_MAP(kThumbBlx1, 0xf000,
- kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_DEF_LR |
- NEEDS_FIXUP, "blx_1", "!0u", 2, kFixupBlx1),
- ENCODING_MAP(kThumbBlx2, 0xe800,
- kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_DEF_LR |
- NEEDS_FIXUP, "blx_2", "!0v", 2, kFixupLabel),
- ENCODING_MAP(kThumbBl1, 0xf000,
- kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR | NEEDS_FIXUP,
- "bl_1", "!0u", 2, kFixupBl1),
- ENCODING_MAP(kThumbBl2, 0xf800,
- kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR | NEEDS_FIXUP,
- "bl_2", "!0v", 2, kFixupLabel),
- ENCODING_MAP(kThumbBlxR, 0x4780,
- kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+ NEEDS_FIXUP, "b.!0c", "!1t", kFixupCondBranch),
+ ENCODING_MAP(kA64Blr1x, NO_VARIANTS(0xd63f0000),
+ kFmtRegX, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
kFmtUnused, -1, -1,
IS_UNARY_OP | REG_USE0 | IS_BRANCH | REG_DEF_LR,
- "blx", "!0C", 2, kFixupNone),
- ENCODING_MAP(kThumbBx, 0x4700,
- kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+ "blr", "!0x", kFixupNone),
+ ENCODING_MAP(kA64Br1x, NO_VARIANTS(0xd61f0000),
+ kFmtRegX, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE0 | IS_BRANCH,
+ "br", "!0x", kFixupNone),
+ ENCODING_MAP(kA64Brk1d, NO_VARIANTS(0xd4200000),
+ kFmtBitBlt, 20, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH,
- "bx", "!0C", 2, kFixupNone),
- ENCODING_MAP(kThumbCmnRR, 0x42c0,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
- "cmn", "!0C, !1C", 2, kFixupNone),
- ENCODING_MAP(kThumbCmpRI8, 0x2800,
- kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | SETS_CCODES,
- "cmp", "!0C, #!1d", 2, kFixupNone),
- ENCODING_MAP(kThumbCmpRR, 0x4280,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
- "cmp", "!0C, !1C", 2, kFixupNone),
- ENCODING_MAP(kThumbCmpLH, 0x4540,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
- "cmp", "!0C, !1C", 2, kFixupNone),
- ENCODING_MAP(kThumbCmpHL, 0x4580,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
- "cmp", "!0C, !1C", 2, kFixupNone),
- ENCODING_MAP(kThumbCmpHH, 0x45c0,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
- "cmp", "!0C, !1C", 2, kFixupNone),
- ENCODING_MAP(kThumbEorRR, 0x4040,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+ "brk", "!0d", kFixupNone),
+ ENCODING_MAP(kA64B1t, NO_VARIANTS(0x14000000),
+ kFmtBitBlt, 25, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP,
+ "b", "!0t", kFixupT1Branch),
+ ENCODING_MAP(WIDE(kA64Cbnz2rt), SF_VARIANTS(0x35000000),
+ kFmtRegR, 4, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1,
kFmtUnused, -1, -1,
- IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
- "eors", "!0C, !1C", 2, kFixupNone),
- ENCODING_MAP(kThumbLdmia, 0xc800,
- kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
+ IS_BINARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP,
+ "cbnz", "!0r, !1t", kFixupCBxZ),
+ ENCODING_MAP(WIDE(kA64Cbz2rt), SF_VARIANTS(0x34000000),
+ kFmtRegR, 4, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1,
kFmtUnused, -1, -1,
- IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD,
- "ldmia", "!0C!!, <!1R>", 2, kFixupNone),
- ENCODING_MAP(kThumbLdrRRI5, 0x6800,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
- "ldr", "!0C, [!1C, #!2E]", 2, kFixupNone),
- ENCODING_MAP(kThumbLdrRRR, 0x5800,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
- "ldr", "!0C, [!1C, !2C]", 2, kFixupNone),
- ENCODING_MAP(kThumbLdrPcRel, 0x4800,
- kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC
- | IS_LOAD | NEEDS_FIXUP, "ldr", "!0C, [pc, #!1E]", 2, kFixupLoad),
- ENCODING_MAP(kThumbLdrSpRel, 0x9800,
- kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_SP
- | IS_LOAD, "ldr", "!0C, [sp, #!2E]", 2, kFixupNone),
- ENCODING_MAP(kThumbLdrbRRI5, 0x7800,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
- "ldrb", "!0C, [!1C, #2d]", 2, kFixupNone),
- ENCODING_MAP(kThumbLdrbRRR, 0x5c00,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
- "ldrb", "!0C, [!1C, !2C]", 2, kFixupNone),
- ENCODING_MAP(kThumbLdrhRRI5, 0x8800,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
- "ldrh", "!0C, [!1C, #!2F]", 2, kFixupNone),
- ENCODING_MAP(kThumbLdrhRRR, 0x5a00,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
- "ldrh", "!0C, [!1C, !2C]", 2, kFixupNone),
- ENCODING_MAP(kThumbLdrsbRRR, 0x5600,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
- "ldrsb", "!0C, [!1C, !2C]", 2, kFixupNone),
- ENCODING_MAP(kThumbLdrshRRR, 0x5e00,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
- "ldrsh", "!0C, [!1C, !2C]", 2, kFixupNone),
- ENCODING_MAP(kThumbLslRRI5, 0x0000,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
- kFmtUnused, -1, -1,
- IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
- "lsls", "!0C, !1C, #!2d", 2, kFixupNone),
- ENCODING_MAP(kThumbLslRR, 0x4080,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
- "lsls", "!0C, !1C", 2, kFixupNone),
- ENCODING_MAP(kThumbLsrRRI5, 0x0800,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
- kFmtUnused, -1, -1,
- IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
- "lsrs", "!0C, !1C, #!2d", 2, kFixupNone),
- ENCODING_MAP(kThumbLsrRR, 0x40c0,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
- "lsrs", "!0C, !1C", 2, kFixupNone),
- ENCODING_MAP(kThumbMovImm, 0x2000,
- kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_BINARY_OP | REG_DEF0 | SETS_CCODES,
- "movs", "!0C, #!1d", 2, kFixupNone),
- ENCODING_MAP(kThumbMovRR, 0x1c00,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES,
- "movs", "!0C, !1C", 2, kFixupNone),
- ENCODING_MAP(kThumbMovRR_H2H, 0x46c0,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
- "mov", "!0C, !1C", 2, kFixupNone),
- ENCODING_MAP(kThumbMovRR_H2L, 0x4640,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
- "mov", "!0C, !1C", 2, kFixupNone),
- ENCODING_MAP(kThumbMovRR_L2H, 0x4680,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
- "mov", "!0C, !1C", 2, kFixupNone),
- ENCODING_MAP(kThumbMul, 0x4340,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
- "muls", "!0C, !1C", 2, kFixupNone),
- ENCODING_MAP(kThumbMvn, 0x43c0,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES,
- "mvns", "!0C, !1C", 2, kFixupNone),
- ENCODING_MAP(kThumbNeg, 0x4240,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES,
- "negs", "!0C, !1C", 2, kFixupNone),
- ENCODING_MAP(kThumbOrr, 0x4300,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
- "orrs", "!0C, !1C", 2, kFixupNone),
- ENCODING_MAP(kThumbPop, 0xbc00,
- kFmtBitBlt, 8, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0
- | IS_LOAD, "pop", "<!0R>", 2, kFixupNone),
- ENCODING_MAP(kThumbPush, 0xb400,
- kFmtBitBlt, 8, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0
- | IS_STORE, "push", "<!0R>", 2, kFixupNone),
- ENCODING_MAP(kThumbRev, 0xba00,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_BINARY_OP | REG_DEF0_USE1,
- "rev", "!0C, !1C", 2, kFixupNone),
- ENCODING_MAP(kThumbRevsh, 0xbac0,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_BINARY_OP | REG_DEF0_USE1,
- "rev", "!0C, !1C", 2, kFixupNone),
- ENCODING_MAP(kThumbRorRR, 0x41c0,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
- "rors", "!0C, !1C", 2, kFixupNone),
- ENCODING_MAP(kThumbSbc, 0x4180,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_BINARY_OP | REG_DEF0_USE01 | USES_CCODES | SETS_CCODES,
- "sbcs", "!0C, !1C", 2, kFixupNone),
- ENCODING_MAP(kThumbStmia, 0xc000,
- kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_BINARY_OP | REG_DEF0 | REG_USE0 | REG_USE_LIST1 | IS_STORE,
- "stmia", "!0C!!, <!1R>", 2, kFixupNone),
- ENCODING_MAP(kThumbStrRRI5, 0x6000,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
- "str", "!0C, [!1C, #!2E]", 2, kFixupNone),
- ENCODING_MAP(kThumbStrRRR, 0x5000,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE,
- "str", "!0C, [!1C, !2C]", 2, kFixupNone),
- ENCODING_MAP(kThumbStrSpRel, 0x9000,
- kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | REG_USE_SP
- | IS_STORE, "str", "!0C, [sp, #!2E]", 2, kFixupNone),
- ENCODING_MAP(kThumbStrbRRI5, 0x7000,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
- "strb", "!0C, [!1C, #!2d]", 2, kFixupNone),
- ENCODING_MAP(kThumbStrbRRR, 0x5400,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE,
- "strb", "!0C, [!1C, !2C]", 2, kFixupNone),
- ENCODING_MAP(kThumbStrhRRI5, 0x8000,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
- "strh", "!0C, [!1C, #!2F]", 2, kFixupNone),
- ENCODING_MAP(kThumbStrhRRR, 0x5200,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE,
- "strh", "!0C, [!1C, !2C]", 2, kFixupNone),
- ENCODING_MAP(kThumbSubRRI3, 0x1e00,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
- kFmtUnused, -1, -1,
- IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
- "subs", "!0C, !1C, #!2d", 2, kFixupNone),
- ENCODING_MAP(kThumbSubRI8, 0x3800,
- kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES,
- "subs", "!0C, #!1d", 2, kFixupNone),
- ENCODING_MAP(kThumbSubRRR, 0x1a00,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
- kFmtUnused, -1, -1,
- IS_TERTIARY_OP | REG_DEF0_USE12 | SETS_CCODES,
- "subs", "!0C, !1C, !2C", 2, kFixupNone),
- ENCODING_MAP(kThumbSubSpI7, 0xb080,
- kFmtBitBlt, 6, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_UNARY_OP | REG_DEF_SP | REG_USE_SP,
- "sub", "sp, #!0d*4", 2, kFixupNone),
- ENCODING_MAP(kThumbSwi, 0xdf00,
- kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH,
- "swi", "!0d", 2, kFixupNone),
- ENCODING_MAP(kThumbTst, 0x4200,
- kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE01 | SETS_CCODES,
- "tst", "!0C, !1C", 2, kFixupNone),
- /*
- * Note: The encoding map entries for vldrd and vldrs include REG_DEF_LR, even though
- * these instructions don't define lr. The reason is that these instructions
- * are used for loading values from the literal pool, and the displacement may be found
- * to be insuffient at assembly time. In that case, we need to materialize a new base
- * register - and will use lr as the temp register. This works because lr is used as
- * a temp register in very limited situations, and never in conjunction with a floating
- * point constant load. However, it is possible that during instruction scheduling,
- * another use of lr could be moved across a vldrd/vldrs. By setting REG_DEF_LR, we
- * prevent that from happening. Note that we set REG_DEF_LR on all vldrd/vldrs - even those
- * not used in a pc-relative case. It is really only needed on the pc-relative loads, but
- * the case we're handling is rare enough that it seemed not worth the trouble to distinguish.
- */
- ENCODING_MAP(kThumb2Vldrs, 0xed900a00,
- kFmtSfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD |
- REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0s, [!1C, #!2E]", 4, kFixupVLoad),
- ENCODING_MAP(kThumb2Vldrd, 0xed900b00,
- kFmtDfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD |
- REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0S, [!1C, #!2E]", 4, kFixupVLoad),
- ENCODING_MAP(kThumb2Vmuls, 0xee200a00,
- kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0,
- kFmtUnused, -1, -1,
- IS_TERTIARY_OP | REG_DEF0_USE12,
- "vmuls", "!0s, !1s, !2s", 4, kFixupNone),
- ENCODING_MAP(kThumb2Vmuld, 0xee200b00,
- kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
- "vmuld", "!0S, !1S, !2S", 4, kFixupNone),
- ENCODING_MAP(kThumb2Vstrs, 0xed800a00,
- kFmtSfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
- "vstr", "!0s, [!1C, #!2E]", 4, kFixupNone),
- ENCODING_MAP(kThumb2Vstrd, 0xed800b00,
- kFmtDfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
- "vstr", "!0S, [!1C, #!2E]", 4, kFixupNone),
- ENCODING_MAP(kThumb2Vsubs, 0xee300a40,
- kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
- "vsub", "!0s, !1s, !2s", 4, kFixupNone),
- ENCODING_MAP(kThumb2Vsubd, 0xee300b40,
- kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
- "vsub", "!0S, !1S, !2S", 4, kFixupNone),
- ENCODING_MAP(kThumb2Vadds, 0xee300a00,
- kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
- "vadd", "!0s, !1s, !2s", 4, kFixupNone),
- ENCODING_MAP(kThumb2Vaddd, 0xee300b00,
- kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
- "vadd", "!0S, !1S, !2S", 4, kFixupNone),
- ENCODING_MAP(kThumb2Vdivs, 0xee800a00,
- kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
- "vdivs", "!0s, !1s, !2s", 4, kFixupNone),
- ENCODING_MAP(kThumb2Vdivd, 0xee800b00,
- kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
- "vdivd", "!0S, !1S, !2S", 4, kFixupNone),
- ENCODING_MAP(kThumb2VmlaF64, 0xee000b00,
- kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE012,
- "vmla", "!0S, !1S, !2S", 4, kFixupNone),
- ENCODING_MAP(kThumb2VcvtIF, 0xeeb80ac0,
- kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
- "vcvt.f32.s32", "!0s, !1s", 4, kFixupNone),
- ENCODING_MAP(kThumb2VcvtFI, 0xeebd0ac0,
- kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
- "vcvt.s32.f32 ", "!0s, !1s", 4, kFixupNone),
- ENCODING_MAP(kThumb2VcvtDI, 0xeebd0bc0,
- kFmtSfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
- "vcvt.s32.f64 ", "!0s, !1S", 4, kFixupNone),
- ENCODING_MAP(kThumb2VcvtFd, 0xeeb70ac0,
- kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
- "vcvt.f64.f32 ", "!0S, !1s", 4, kFixupNone),
- ENCODING_MAP(kThumb2VcvtDF, 0xeeb70bc0,
- kFmtSfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
- "vcvt.f32.f64 ", "!0s, !1S", 4, kFixupNone),
- ENCODING_MAP(kThumb2VcvtF64S32, 0xeeb80bc0,
- kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
- "vcvt.f64.s32 ", "!0S, !1s", 4, kFixupNone),
- ENCODING_MAP(kThumb2VcvtF64U32, 0xeeb80b40,
- kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
- "vcvt.f64.u32 ", "!0S, !1s", 4, kFixupNone),
- ENCODING_MAP(kThumb2Vsqrts, 0xeeb10ac0,
- kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
- "vsqrt.f32 ", "!0s, !1s", 4, kFixupNone),
- ENCODING_MAP(kThumb2Vsqrtd, 0xeeb10bc0,
- kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
- "vsqrt.f64 ", "!0S, !1S", 4, kFixupNone),
- ENCODING_MAP(kThumb2MovI8M, 0xf04f0000, /* no setflags encoding */
- kFmtBitBlt, 11, 8, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
- "mov", "!0C, #!1m", 4, kFixupNone),
- ENCODING_MAP(kThumb2MovImm16, 0xf2400000,
- kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
- "mov", "!0C, #!1M", 4, kFixupNone),
- ENCODING_MAP(kThumb2StrRRI12, 0xf8c00000,
- kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
- "str", "!0C, [!1C, #!2d]", 4, kFixupNone),
- ENCODING_MAP(kThumb2LdrRRI12, 0xf8d00000,
- kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
- "ldr", "!0C, [!1C, #!2d]", 4, kFixupNone),
- ENCODING_MAP(kThumb2StrRRI8Predec, 0xf8400c00,
- kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 8, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
- "str", "!0C, [!1C, #-!2d]", 4, kFixupNone),
- ENCODING_MAP(kThumb2LdrRRI8Predec, 0xf8500c00,
- kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 8, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
- "ldr", "!0C, [!1C, #-!2d]", 4, kFixupNone),
- ENCODING_MAP(kThumb2Cbnz, 0xb900, /* Note: does not affect flags */
- kFmtBitBlt, 2, 0, kFmtImm6, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | IS_BRANCH |
- NEEDS_FIXUP, "cbnz", "!0C,!1t", 2, kFixupCBxZ),
- ENCODING_MAP(kThumb2Cbz, 0xb100, /* Note: does not affect flags */
- kFmtBitBlt, 2, 0, kFmtImm6, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | IS_BRANCH |
- NEEDS_FIXUP, "cbz", "!0C,!1t", 2, kFixupCBxZ),
- ENCODING_MAP(kThumb2AddRRI12, 0xf2000000,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtImm12, -1, -1,
- kFmtUnused, -1, -1,
- IS_TERTIARY_OP | REG_DEF0_USE1,/* Note: doesn't affect flags */
- "add", "!0C,!1C,#!2d", 4, kFixupNone),
- ENCODING_MAP(kThumb2MovRR, 0xea4f0000, /* no setflags encoding */
- kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
- "mov", "!0C, !1C", 4, kFixupNone),
- ENCODING_MAP(kThumb2Vmovs, 0xeeb00a40,
- kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
- "vmov.f32 ", " !0s, !1s", 4, kFixupNone),
- ENCODING_MAP(kThumb2Vmovd, 0xeeb00b40,
- kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
- "vmov.f64 ", " !0S, !1S", 4, kFixupNone),
- ENCODING_MAP(kThumb2Ldmia, 0xe8900000,
- kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD,
- "ldmia", "!0C!!, <!1R>", 4, kFixupNone),
- ENCODING_MAP(kThumb2Stmia, 0xe8800000,
- kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_BINARY_OP | REG_DEF0_USE0 | REG_USE_LIST1 | IS_STORE,
- "stmia", "!0C!!, <!1R>", 4, kFixupNone),
- ENCODING_MAP(kThumb2AddRRR, 0xeb100000, /* setflags encoding */
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
- kFmtShift, -1, -1,
- IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
- "adds", "!0C, !1C, !2C!3H", 4, kFixupNone),
- ENCODING_MAP(kThumb2SubRRR, 0xebb00000, /* setflags enconding */
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
- kFmtShift, -1, -1,
- IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
- "subs", "!0C, !1C, !2C!3H", 4, kFixupNone),
- ENCODING_MAP(kThumb2SbcRRR, 0xeb700000, /* setflags encoding */
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
- kFmtShift, -1, -1,
- IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES | SETS_CCODES,
- "sbcs", "!0C, !1C, !2C!3H", 4, kFixupNone),
- ENCODING_MAP(kThumb2CmpRR, 0xebb00f00,
- kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
- kFmtUnused, -1, -1,
- IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
- "cmp", "!0C, !1C", 4, kFixupNone),
- ENCODING_MAP(kThumb2SubRRI12, 0xf2a00000,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtImm12, -1, -1,
- kFmtUnused, -1, -1,
- IS_TERTIARY_OP | REG_DEF0_USE1,/* Note: doesn't affect flags */
- "sub", "!0C,!1C,#!2d", 4, kFixupNone),
- ENCODING_MAP(kThumb2MvnI8M, 0xf06f0000, /* no setflags encoding */
- kFmtBitBlt, 11, 8, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
- "mvn", "!0C, #!1n", 4, kFixupNone),
- ENCODING_MAP(kThumb2Sel, 0xfaa0f080,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
- kFmtUnused, -1, -1,
- IS_TERTIARY_OP | REG_DEF0_USE12 | USES_CCODES,
- "sel", "!0C, !1C, !2C", 4, kFixupNone),
- ENCODING_MAP(kThumb2Ubfx, 0xf3c00000,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtLsb, -1, -1,
- kFmtBWidth, 4, 0, IS_QUAD_OP | REG_DEF0_USE1,
- "ubfx", "!0C, !1C, #!2d, #!3d", 4, kFixupNone),
- ENCODING_MAP(kThumb2Sbfx, 0xf3400000,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtLsb, -1, -1,
- kFmtBWidth, 4, 0, IS_QUAD_OP | REG_DEF0_USE1,
- "sbfx", "!0C, !1C, #!2d, #!3d", 4, kFixupNone),
- ENCODING_MAP(kThumb2LdrRRR, 0xf8500000,
- kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
- kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
- "ldr", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
- ENCODING_MAP(kThumb2LdrhRRR, 0xf8300000,
- kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
- kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
- "ldrh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
- ENCODING_MAP(kThumb2LdrshRRR, 0xf9300000,
- kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
- kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
- "ldrsh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
- ENCODING_MAP(kThumb2LdrbRRR, 0xf8100000,
- kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
- kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
- "ldrb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
- ENCODING_MAP(kThumb2LdrsbRRR, 0xf9100000,
- kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
- kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
- "ldrsb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
- ENCODING_MAP(kThumb2StrRRR, 0xf8400000,
- kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
- kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE,
- "str", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
- ENCODING_MAP(kThumb2StrhRRR, 0xf8200000,
- kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
- kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE,
- "strh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
- ENCODING_MAP(kThumb2StrbRRR, 0xf8000000,
- kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
- kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE,
- "strb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone),
- ENCODING_MAP(kThumb2LdrhRRI12, 0xf8b00000,
- kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
- "ldrh", "!0C, [!1C, #!2d]", 4, kFixupNone),
- ENCODING_MAP(kThumb2LdrshRRI12, 0xf9b00000,
- kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
- "ldrsh", "!0C, [!1C, #!2d]", 4, kFixupNone),
- ENCODING_MAP(kThumb2LdrbRRI12, 0xf8900000,
- kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
- "ldrb", "!0C, [!1C, #!2d]", 4, kFixupNone),
- ENCODING_MAP(kThumb2LdrsbRRI12, 0xf9900000,
- kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
- "ldrsb", "!0C, [!1C, #!2d]", 4, kFixupNone),
- ENCODING_MAP(kThumb2StrhRRI12, 0xf8a00000,
- kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
- "strh", "!0C, [!1C, #!2d]", 4, kFixupNone),
- ENCODING_MAP(kThumb2StrbRRI12, 0xf8800000,
- kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
- "strb", "!0C, [!1C, #!2d]", 4, kFixupNone),
- ENCODING_MAP(kThumb2Pop, 0xe8bd0000,
- kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0
- | IS_LOAD | NEEDS_FIXUP, "pop", "<!0R>", 4, kFixupPushPop),
- ENCODING_MAP(kThumb2Push, 0xe92d0000,
- kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0
- | IS_STORE | NEEDS_FIXUP, "push", "<!0R>", 4, kFixupPushPop),
- ENCODING_MAP(kThumb2CmpRI8M, 0xf1b00f00,
- kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_BINARY_OP | REG_USE0 | SETS_CCODES,
- "cmp", "!0C, #!1m", 4, kFixupNone),
- ENCODING_MAP(kThumb2CmnRI8M, 0xf1100f00,
- kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_BINARY_OP | REG_USE0 | SETS_CCODES,
- "cmn", "!0C, #!1m", 4, kFixupNone),
- ENCODING_MAP(kThumb2AdcRRR, 0xeb500000, /* setflags encoding */
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
- kFmtShift, -1, -1,
- IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
- "adcs", "!0C, !1C, !2C!3H", 4, kFixupNone),
- ENCODING_MAP(kThumb2AndRRR, 0xea000000,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
- kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
- "and", "!0C, !1C, !2C!3H", 4, kFixupNone),
- ENCODING_MAP(kThumb2BicRRR, 0xea200000,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
- kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
- "bic", "!0C, !1C, !2C!3H", 4, kFixupNone),
- ENCODING_MAP(kThumb2CmnRR, 0xeb000000,
- kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
- kFmtUnused, -1, -1,
- IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
- "cmn", "!0C, !1C, shift !2d", 4, kFixupNone),
- ENCODING_MAP(kThumb2EorRRR, 0xea800000,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
- kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
- "eor", "!0C, !1C, !2C!3H", 4, kFixupNone),
- ENCODING_MAP(kThumb2MulRRR, 0xfb00f000,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
- "mul", "!0C, !1C, !2C", 4, kFixupNone),
- ENCODING_MAP(kThumb2SdivRRR, 0xfb90f0f0,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
- "sdiv", "!0C, !1C, !2C", 4, kFixupNone),
- ENCODING_MAP(kThumb2UdivRRR, 0xfbb0f0f0,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
- "udiv", "!0C, !1C, !2C", 4, kFixupNone),
- ENCODING_MAP(kThumb2MnvRR, 0xea6f0000,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
- "mvn", "!0C, !1C, shift !2d", 4, kFixupNone),
- ENCODING_MAP(kThumb2RsubRRI8M, 0xf1d00000,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
- kFmtUnused, -1, -1,
- IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
- "rsbs", "!0C,!1C,#!2m", 4, kFixupNone),
- ENCODING_MAP(kThumb2NegRR, 0xf1d00000, /* instance of rsub */
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES,
- "neg", "!0C,!1C", 4, kFixupNone),
- ENCODING_MAP(kThumb2OrrRRR, 0xea400000,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
- kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
- "orr", "!0C, !1C, !2C!3H", 4, kFixupNone),
- ENCODING_MAP(kThumb2TstRR, 0xea100f00,
- kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
- kFmtUnused, -1, -1,
- IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
- "tst", "!0C, !1C, shift !2d", 4, kFixupNone),
- ENCODING_MAP(kThumb2LslRRR, 0xfa00f000,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
- "lsl", "!0C, !1C, !2C", 4, kFixupNone),
- ENCODING_MAP(kThumb2LsrRRR, 0xfa20f000,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
- "lsr", "!0C, !1C, !2C", 4, kFixupNone),
- ENCODING_MAP(kThumb2AsrRRR, 0xfa40f000,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
- "asr", "!0C, !1C, !2C", 4, kFixupNone),
- ENCODING_MAP(kThumb2RorRRR, 0xfa60f000,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
- "ror", "!0C, !1C, !2C", 4, kFixupNone),
- ENCODING_MAP(kThumb2LslRRI5, 0xea4f0000,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
- "lsl", "!0C, !1C, #!2d", 4, kFixupNone),
- ENCODING_MAP(kThumb2LsrRRI5, 0xea4f0010,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
- "lsr", "!0C, !1C, #!2d", 4, kFixupNone),
- ENCODING_MAP(kThumb2AsrRRI5, 0xea4f0020,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
- "asr", "!0C, !1C, #!2d", 4, kFixupNone),
- ENCODING_MAP(kThumb2RorRRI5, 0xea4f0030,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
- "ror", "!0C, !1C, #!2d", 4, kFixupNone),
- ENCODING_MAP(kThumb2BicRRI8M, 0xf0200000,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
- "bic", "!0C, !1C, #!2m", 4, kFixupNone),
- ENCODING_MAP(kThumb2AndRRI8M, 0xf0000000,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
- "and", "!0C, !1C, #!2m", 4, kFixupNone),
- ENCODING_MAP(kThumb2OrrRRI8M, 0xf0400000,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
- "orr", "!0C, !1C, #!2m", 4, kFixupNone),
- ENCODING_MAP(kThumb2EorRRI8M, 0xf0800000,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
- "eor", "!0C, !1C, #!2m", 4, kFixupNone),
- ENCODING_MAP(kThumb2AddRRI8M, 0xf1100000,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
- kFmtUnused, -1, -1,
- IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
- "adds", "!0C, !1C, #!2m", 4, kFixupNone),
- ENCODING_MAP(kThumb2AdcRRI8M, 0xf1500000,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
- kFmtUnused, -1, -1,
- IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES | USES_CCODES,
- "adcs", "!0C, !1C, #!2m", 4, kFixupNone),
- ENCODING_MAP(kThumb2SubRRI8M, 0xf1b00000,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
- kFmtUnused, -1, -1,
- IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
- "subs", "!0C, !1C, #!2m", 4, kFixupNone),
- ENCODING_MAP(kThumb2SbcRRI8M, 0xf1700000,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
- kFmtUnused, -1, -1,
- IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES | USES_CCODES,
- "sbcs", "!0C, !1C, #!2m", 4, kFixupNone),
- ENCODING_MAP(kThumb2RevRR, 0xfa90f080,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
- kFmtUnused, -1, -1,
- IS_TERTIARY_OP | REG_DEF0_USE12, // Binary, but rm is stored twice.
- "rev", "!0C, !1C", 4, kFixupNone),
- ENCODING_MAP(kThumb2RevshRR, 0xfa90f0b0,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
- kFmtUnused, -1, -1,
- IS_TERTIARY_OP | REG_DEF0_USE12, // Binary, but rm is stored twice.
- "revsh", "!0C, !1C", 4, kFixupNone),
- ENCODING_MAP(kThumb2It, 0xbf00,
- kFmtBitBlt, 7, 4, kFmtBitBlt, 3, 0, kFmtModImm, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | IS_IT | USES_CCODES,
- "it:!1b", "!0c", 2, kFixupNone),
- ENCODING_MAP(kThumb2Fmstat, 0xeef1fa10,
- kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, NO_OPERAND | SETS_CCODES,
- "fmstat", "", 4, kFixupNone),
- ENCODING_MAP(kThumb2Vcmpd, 0xeeb40b40,
- kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01,
- "vcmp.f64", "!0S, !1S", 4, kFixupNone),
- ENCODING_MAP(kThumb2Vcmps, 0xeeb40a40,
- kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01,
- "vcmp.f32", "!0s, !1s", 4, kFixupNone),
- ENCODING_MAP(kThumb2LdrPcRel12, 0xf8df0000,
- kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP,
- "ldr", "!0C, [r15pc, #!1d]", 4, kFixupLoad),
- ENCODING_MAP(kThumb2BCond, 0xf0008000,
- kFmtBrOffset, -1, -1, kFmtBitBlt, 25, 22, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_BINARY_OP | IS_BRANCH | USES_CCODES | NEEDS_FIXUP,
- "b!1c", "!0t", 4, kFixupCondBranch),
- ENCODING_MAP(kThumb2Fmrs, 0xee100a10,
- kFmtBitBlt, 15, 12, kFmtSfp, 7, 16, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
- "fmrs", "!0C, !1s", 4, kFixupNone),
- ENCODING_MAP(kThumb2Fmsr, 0xee000a10,
- kFmtSfp, 7, 16, kFmtBitBlt, 15, 12, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
- "fmsr", "!0s, !1C", 4, kFixupNone),
- ENCODING_MAP(kThumb2Fmrrd, 0xec500b10,
- kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtDfp, 5, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF01_USE2,
- "fmrrd", "!0C, !1C, !2S", 4, kFixupNone),
- ENCODING_MAP(kThumb2Fmdrr, 0xec400b10,
- kFmtDfp, 5, 0, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
- "fmdrr", "!0S, !1C, !2C", 4, kFixupNone),
- ENCODING_MAP(kThumb2Vabsd, 0xeeb00bc0,
- kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
- "vabs.f64", "!0S, !1S", 4, kFixupNone),
- ENCODING_MAP(kThumb2Vabss, 0xeeb00ac0,
- kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
- "vabs.f32", "!0s, !1s", 4, kFixupNone),
- ENCODING_MAP(kThumb2Vnegd, 0xeeb10b40,
- kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
- "vneg.f64", "!0S, !1S", 4, kFixupNone),
- ENCODING_MAP(kThumb2Vnegs, 0xeeb10a40,
- kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
- "vneg.f32", "!0s, !1s", 4, kFixupNone),
- ENCODING_MAP(kThumb2Vmovs_IMM8, 0xeeb00a00,
- kFmtSfp, 22, 12, kFmtFPImm, 16, 0, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
- "vmov.f32", "!0s, #0x!1h", 4, kFixupNone),
- ENCODING_MAP(kThumb2Vmovd_IMM8, 0xeeb00b00,
- kFmtDfp, 22, 12, kFmtFPImm, 16, 0, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
- "vmov.f64", "!0S, #0x!1h", 4, kFixupNone),
- ENCODING_MAP(kThumb2Mla, 0xfb000000,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
- kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE123,
- "mla", "!0C, !1C, !2C, !3C", 4, kFixupNone),
- ENCODING_MAP(kThumb2Umull, 0xfba00000,
- kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
- kFmtBitBlt, 3, 0,
- IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | REG_USE3,
- "umull", "!0C, !1C, !2C, !3C", 4, kFixupNone),
- ENCODING_MAP(kThumb2Ldrex, 0xe8500f00,
- kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
- "ldrex", "!0C, [!1C, #!2E]", 4, kFixupNone),
- ENCODING_MAP(kThumb2Ldrexd, 0xe8d0007f,
- kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF01_USE2 | IS_LOAD,
- "ldrexd", "!0C, !1C, [!2C]", 4, kFixupNone),
- ENCODING_MAP(kThumb2Strex, 0xe8400000,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16,
- kFmtBitBlt, 7, 0, IS_QUAD_OP | REG_DEF0_USE12 | IS_STORE,
- "strex", "!0C, !1C, [!2C, #!2E]", 4, kFixupNone),
- ENCODING_MAP(kThumb2Strexd, 0xe8c00070,
- kFmtBitBlt, 3, 0, kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8,
- kFmtBitBlt, 19, 16, IS_QUAD_OP | REG_DEF0_USE123 | IS_STORE,
- "strexd", "!0C, !1C, !2C, [!3C]", 4, kFixupNone),
- ENCODING_MAP(kThumb2Clrex, 0xf3bf8f2f,
- kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, NO_OPERAND,
- "clrex", "", 4, kFixupNone),
- ENCODING_MAP(kThumb2Bfi, 0xf3600000,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtShift5, -1, -1,
- kFmtBitBlt, 4, 0, IS_QUAD_OP | REG_DEF0_USE1,
- "bfi", "!0C,!1C,#!2d,#!3d", 4, kFixupNone),
- ENCODING_MAP(kThumb2Bfc, 0xf36f0000,
- kFmtBitBlt, 11, 8, kFmtShift5, -1, -1, kFmtBitBlt, 4, 0,
- kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0,
- "bfc", "!0C,#!1d,#!2d", 4, kFixupNone),
- ENCODING_MAP(kThumb2Dmb, 0xf3bf8f50,
- kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+ IS_BINARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP,
+ "cbz", "!0r, !1t", kFixupCBxZ),
+ ENCODING_MAP(WIDE(kA64Cmn3Rro), SF_VARIANTS(0x6b20001f),
+ kFmtRegROrSp, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
+ "cmn", "!0R, !1r!2o", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Cmn3RdT), SF_VARIANTS(0x3100001f),
+ kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10, kFmtBitBlt, 23, 22,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | SETS_CCODES,
+ "cmn", "!0R, #!1d!2T", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Cmp3Rro), SF_VARIANTS(0x6b20001f),
+ kFmtRegROrSp, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
+ "cmp", "!0R, !1r!2o", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Cmp3RdT), SF_VARIANTS(0x7100001f),
+ kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10, kFmtBitBlt, 23, 22,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | SETS_CCODES,
+ "cmp", "!0R, #!1d!2T", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Csel4rrrc), SF_VARIANTS(0x1a800000),
+ kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+ kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES,
+ "csel", "!0r, !1r, !2r, !3c", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Csinc4rrrc), SF_VARIANTS(0x1a800400),
+ kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+ kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES,
+ "csinc", "!0r, !1r, !2r, !3c", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Csneg4rrrc), SF_VARIANTS(0x5a800400),
+ kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+ kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES,
+ "csneg", "!0r, !1r, !2r, !3c", kFixupNone),
+ ENCODING_MAP(kA64Dmb1B, NO_VARIANTS(0xd50330bf),
+ kFmtBitBlt, 11, 8, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
kFmtUnused, -1, -1, IS_UNARY_OP,
- "dmb", "#!0B", 4, kFixupNone),
- ENCODING_MAP(kThumb2LdrPcReln12, 0xf85f0000,
- kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1,
+ "dmb", "#!0B", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Eor3Rrl), SF_VARIANTS(0x52000000),
+ kFmtRegROrSp, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 22, 10,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+ "eor", "!0R, !1r, #!2l", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Eor4rrro), SF_VARIANTS(0x4a000000),
+ kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+ kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
+ "eor", "!0r, !1r, !2r!3o", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Extr4rrrd), SF_N_VARIANTS(0x13800000),
+ kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+ kFmtBitBlt, 15, 10, IS_QUAD_OP | REG_DEF0_USE12,
+ "extr", "!0r, !1r, !2r, #!3d", kFixupNone),
+ ENCODING_MAP(FWIDE(kA64Fabs2ff), FLOAT_VARIANTS(0x1e20c000),
+ kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_BINARY_OP| REG_DEF0_USE1,
+ "fabs", "!0f, !1f", kFixupNone),
+ ENCODING_MAP(FWIDE(kA64Fadd3fff), FLOAT_VARIANTS(0x1e202800),
+ kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+ "fadd", "!0f, !1f, !2f", kFixupNone),
+ ENCODING_MAP(FWIDE(kA64Fcmp1f), FLOAT_VARIANTS(0x1e202008),
+ kFmtRegF, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE0 | SETS_CCODES,
+ "fcmp", "!0f, #0", kFixupNone),
+ ENCODING_MAP(FWIDE(kA64Fcmp2ff), FLOAT_VARIANTS(0x1e202000),
+ kFmtRegF, 9, 5, kFmtRegF, 20, 16, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
+ "fcmp", "!0f, !1f", kFixupNone),
+ ENCODING_MAP(FWIDE(kA64Fcvtzs2wf), FLOAT_VARIANTS(0x1e380000),
+ kFmtRegW, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+ "fcvtzs", "!0w, !1f", kFixupNone),
+ ENCODING_MAP(FWIDE(kA64Fcvtzs2xf), FLOAT_VARIANTS(0x9e380000),
+ kFmtRegX, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+ "fcvtzs", "!0x, !1f", kFixupNone),
+ ENCODING_MAP(kA64Fcvt2Ss, NO_VARIANTS(0x1e22C000),
+ kFmtRegD, 4, 0, kFmtRegS, 9, 5, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+ "fcvt", "!0S, !1s", kFixupNone),
+ ENCODING_MAP(kA64Fcvt2sS, NO_VARIANTS(0x1e624000),
+ kFmtRegS, 4, 0, kFmtRegD, 9, 5, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+ "fcvt", "!0s, !1S", kFixupNone),
+ ENCODING_MAP(FWIDE(kA64Fdiv3fff), FLOAT_VARIANTS(0x1e201800),
+ kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+ "fdiv", "!0f, !1f, !2f", kFixupNone),
+ ENCODING_MAP(FWIDE(kA64Fmov2ff), FLOAT_VARIANTS(0x1e204000),
+ kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+ "fmov", "!0f, !1f", kFixupNone),
+ ENCODING_MAP(FWIDE(kA64Fmov2fI), FLOAT_VARIANTS(0x1e201000),
+ kFmtRegF, 4, 0, kFmtBitBlt, 20, 13, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
+ "fmov", "!0f, #!1I", kFixupNone),
+ ENCODING_MAP(kA64Fmov2sw, NO_VARIANTS(0x1e270000),
+ kFmtRegS, 4, 0, kFmtRegW, 9, 5, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+ "fmov", "!0s, !1w", kFixupNone),
+ ENCODING_MAP(kA64Fmov2Sx, NO_VARIANTS(0x9e6f0000),
+ kFmtRegD, 4, 0, kFmtRegX, 9, 5, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+ "fmov", "!0S, !1x", kFixupNone),
+ ENCODING_MAP(kA64Fmov2ws, NO_VARIANTS(0x1e260000),
+ kFmtRegW, 4, 0, kFmtRegS, 9, 5, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+ "fmov", "!0w, !1s", kFixupNone),
+ ENCODING_MAP(kA64Fmov2xS, NO_VARIANTS(0x9e6e0000),
+ kFmtRegX, 4, 0, kFmtRegD, 9, 5, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+ "fmov", "!0x, !1S", kFixupNone),
+ ENCODING_MAP(FWIDE(kA64Fmul3fff), FLOAT_VARIANTS(0x1e200800),
+ kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+ "fmul", "!0f, !1f, !2f", kFixupNone),
+ ENCODING_MAP(FWIDE(kA64Fneg2ff), FLOAT_VARIANTS(0x1e214000),
+ kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+ "fneg", "!0f, !1f", kFixupNone),
+ ENCODING_MAP(FWIDE(kA64Frintz2ff), FLOAT_VARIANTS(0x1e25c000),
+ kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+ "frintz", "!0f, !1f", kFixupNone),
+ ENCODING_MAP(FWIDE(kA64Fsqrt2ff), FLOAT_VARIANTS(0x1e61c000),
+ kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+ "fsqrt", "!0f, !1f", kFixupNone),
+ ENCODING_MAP(FWIDE(kA64Fsub3fff), FLOAT_VARIANTS(0x1e203800),
+ kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+ "fsub", "!0f, !1f, !2f", kFixupNone),
+ ENCODING_MAP(kA64Ldrb3wXd, NO_VARIANTS(0x39400000),
+ kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+ "ldrb", "!0w, [!1X, #!2d]", kFixupNone),
+ ENCODING_MAP(kA64Ldrb3wXx, NO_VARIANTS(0x38606800),
+ kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
+ "ldrb", "!0w, [!1X, !2x]", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Ldrsb3rXd), CUSTOM_VARIANTS(0x39c00000, 0x39800000),
+ kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+ "ldrsb", "!0r, [!1X, #!2d]", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Ldrsb3rXx), CUSTOM_VARIANTS(0x38e06800, 0x38a06800),
+ kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
+ "ldrsb", "!0r, [!1X, !2x]", kFixupNone),
+ ENCODING_MAP(kA64Ldrh3wXF, NO_VARIANTS(0x79400000),
+ kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+ "ldrh", "!0w, [!1X, #!2F]", kFixupNone),
+ ENCODING_MAP(kA64Ldrh4wXxd, NO_VARIANTS(0x78606800),
+ kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+ kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+ "ldrh", "!0w, [!1X, !2x, lsl #!3d]", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Ldrsh3rXF), CUSTOM_VARIANTS(0x79c00000, 0x79800000),
+ kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+ "ldrsh", "!0r, [!1X, #!2F]", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Ldrsh4rXxd), CUSTOM_VARIANTS(0x78e06800, 0x78906800),
+ kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+ kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+ "ldrsh", "!0r, [!1X, !2x, lsl #!3d]", kFixupNone),
+ ENCODING_MAP(FWIDE(kA64Ldr2fp), SIZE_VARIANTS(0x1c000000),
+ kFmtRegF, 4, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1,
kFmtUnused, -1, -1,
- IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD,
- "ldr", "!0C, [r15pc, -#!1d]", 4, kFixupNone),
- ENCODING_MAP(kThumb2Stm, 0xe9000000,
- kFmtBitBlt, 19, 16, kFmtBitBlt, 12, 0, kFmtUnused, -1, -1,
+ IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP,
+ "ldr", "!0f, !1p", kFixupLoad),
+ ENCODING_MAP(WIDE(kA64Ldr2rp), SIZE_VARIANTS(0x18000000),
+ kFmtRegR, 4, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1,
kFmtUnused, -1, -1,
- IS_BINARY_OP | REG_USE0 | REG_USE_LIST1 | IS_STORE,
- "stm", "!0C, <!1R>", 4, kFixupNone),
- ENCODING_MAP(kThumbUndefined, 0xde00,
+ IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP,
+ "ldr", "!0r, !1p", kFixupLoad),
+ ENCODING_MAP(FWIDE(kA64Ldr3fXD), SIZE_VARIANTS(0xbd400000),
+ kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+ "ldr", "!0f, [!1X, #!2D]", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Ldr3rXD), SIZE_VARIANTS(0xb9400000),
+ kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+ "ldr", "!0r, [!1X, #!2D]", kFixupNone),
+ ENCODING_MAP(FWIDE(kA64Ldr4fXxG), SIZE_VARIANTS(0xbc606800),
+ kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+ kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+ "ldr", "!0f, [!1X, !2x!3G]", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Ldr4rXxG), SIZE_VARIANTS(0xb8606800),
+ kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+ kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+ "ldr", "!0r, [!1X, !2x!3G]", kFixupNone),
+ ENCODING_MAP(WIDE(kA64LdrPost3rXd), SIZE_VARIANTS(0xb8400400),
+ kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF01 | REG_USE1 | IS_LOAD,
+ "ldr", "!0r, [!1X], #!2d", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Ldp4rrXD), SF_VARIANTS(0x29400000),
+ kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
+ kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF012 | IS_LOAD,
+ "ldp", "!0r, !1r, [!2X, #!3D]", kFixupNone),
+ ENCODING_MAP(WIDE(kA64LdpPost4rrXD), CUSTOM_VARIANTS(0x28c00000, 0xa8c00000),
+ kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
+ kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_USE2 | REG_DEF012 | IS_LOAD,
+ "ldp", "!0r, !1r, [!2X], #!3D", kFixupNone),
+ ENCODING_MAP(FWIDE(kA64Ldur3fXd), CUSTOM_VARIANTS(0xbc400000, 0xfc400000),
+ kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+ "ldur", "!0f, [!1X, #!2d]", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Ldur3rXd), SIZE_VARIANTS(0xb8400000),
+ kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+ "ldur", "!0r, [!1X, #!2d]", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Ldxr2rX), SIZE_VARIANTS(0x885f7c00),
+ kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | IS_LOAD,
+ "ldxr", "!0r, [!1X]", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Lsl3rrr), SF_VARIANTS(0x1ac02000),
+ kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+ "lsl", "!0r, !1r, !2r", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Lsr3rrd), CUSTOM_VARIANTS(0x53007c00, 0xd340fc00),
+ kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 21, 16,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+ "lsr", "!0r, !1r, #!2d", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Lsr3rrr), SF_VARIANTS(0x1ac02400),
+ kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+ "lsr", "!0r, !1r, !2r", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Movk3rdM), SF_VARIANTS(0x72800000),
+ kFmtRegR, 4, 0, kFmtBitBlt, 20, 5, kFmtBitBlt, 22, 21,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE0,
+ "movk", "!0r, #!1d!2M", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Movn3rdM), SF_VARIANTS(0x12800000),
+ kFmtRegR, 4, 0, kFmtBitBlt, 20, 5, kFmtBitBlt, 22, 21,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0,
+ "movn", "!0r, #!1d!2M", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Movz3rdM), SF_VARIANTS(0x52800000),
+ kFmtRegR, 4, 0, kFmtBitBlt, 20, 5, kFmtBitBlt, 22, 21,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0,
+ "movz", "!0r, #!1d!2M", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Mov2rr), SF_VARIANTS(0x2a0003e0),
+ kFmtRegR, 4, 0, kFmtRegR, 20, 16, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+ "mov", "!0r, !1r", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Mvn2rr), SF_VARIANTS(0x2a2003e0),
+ kFmtRegR, 4, 0, kFmtRegR, 20, 16, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+ "mvn", "!0r, !1r", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Mul3rrr), SF_VARIANTS(0x1b007c00),
+ kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+ "mul", "!0r, !1r, !2r", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Neg3rro), SF_VARIANTS(0x4b0003e0),
+ kFmtRegR, 4, 0, kFmtRegR, 20, 16, kFmtShift, -1, -1,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+ "neg", "!0r, !1r!2o", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Orr3Rrl), SF_VARIANTS(0x32000000),
+ kFmtRegROrSp, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 22, 10,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+ "orr", "!0R, !1r, #!2l", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Orr4rrro), SF_VARIANTS(0x2a000000),
+ kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+ kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
+ "orr", "!0r, !1r, !2r!3o", kFixupNone),
+ ENCODING_MAP(kA64Ret, NO_VARIANTS(0xd65f03c0),
kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, NO_OPERAND,
- "undefined", "", 2, kFixupNone),
- // NOTE: vpop, vpush hard-encoded for s16+ reg list
- ENCODING_MAP(kThumb2VPopCS, 0xecbd8a00,
- kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_FPCS_LIST0
- | IS_LOAD, "vpop", "<!0P>", 4, kFixupNone),
- ENCODING_MAP(kThumb2VPushCS, 0xed2d8a00,
- kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_FPCS_LIST0
- | IS_STORE, "vpush", "<!0P>", 4, kFixupNone),
- ENCODING_MAP(kThumb2Vldms, 0xec900a00,
- kFmtBitBlt, 19, 16, kFmtSfp, 22, 12, kFmtBitBlt, 7, 0,
- kFmtUnused, -1, -1,
- IS_TERTIARY_OP | REG_USE0 | REG_DEF_FPCS_LIST2
- | IS_LOAD, "vldms", "!0C, <!2Q>", 4, kFixupNone),
- ENCODING_MAP(kThumb2Vstms, 0xec800a00,
- kFmtBitBlt, 19, 16, kFmtSfp, 22, 12, kFmtBitBlt, 7, 0,
- kFmtUnused, -1, -1,
- IS_TERTIARY_OP | REG_USE0 | REG_USE_FPCS_LIST2
- | IS_STORE, "vstms", "!0C, <!2Q>", 4, kFixupNone),
- ENCODING_MAP(kThumb2BUncond, 0xf0009000,
- kFmtOff24, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
kFmtUnused, -1, -1, NO_OPERAND | IS_BRANCH,
- "b", "!0t", 4, kFixupT2Branch),
- ENCODING_MAP(kThumb2MovImm16H, 0xf2c00000,
- kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0,
- "movt", "!0C, #!1M", 4, kFixupNone),
- ENCODING_MAP(kThumb2AddPCR, 0x4487,
- kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_UNARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP,
- "add", "rPC, !0C", 2, kFixupLabel),
- ENCODING_MAP(kThumb2Adr, 0xf20f0000,
- kFmtBitBlt, 11, 8, kFmtImm12, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- /* Note: doesn't affect flags */
- IS_TERTIARY_OP | REG_DEF0 | NEEDS_FIXUP,
- "adr", "!0C,#!1d", 4, kFixupAdr),
- ENCODING_MAP(kThumb2MovImm16LST, 0xf2400000,
- kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | NEEDS_FIXUP,
- "mov", "!0C, #!1M", 4, kFixupMovImmLST),
- ENCODING_MAP(kThumb2MovImm16HST, 0xf2c00000,
- kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0 | NEEDS_FIXUP,
- "movt", "!0C, #!1M", 4, kFixupMovImmHST),
- ENCODING_MAP(kThumb2LdmiaWB, 0xe8b00000,
- kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD,
- "ldmia", "!0C!!, <!1R>", 4, kFixupNone),
- ENCODING_MAP(kThumb2OrrRRRs, 0xea500000,
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
- kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
- "orrs", "!0C, !1C, !2C!3H", 4, kFixupNone),
- ENCODING_MAP(kThumb2Push1, 0xf84d0d04,
- kFmtBitBlt, 15, 12, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE0
- | IS_STORE, "push1", "!0C", 4, kFixupNone),
- ENCODING_MAP(kThumb2Pop1, 0xf85d0b04,
- kFmtBitBlt, 15, 12, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1,
- IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF0
- | IS_LOAD, "pop1", "!0C", 4, kFixupNone),
- ENCODING_MAP(kThumb2RsubRRR, 0xebd00000, /* setflags encoding */
- kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
- kFmtShift, -1, -1,
- IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
- "rsbs", "!0C, !1C, !2C!3H", 4, kFixupNone),
- ENCODING_MAP(kThumb2Smull, 0xfb800000,
- kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
- kFmtBitBlt, 3, 0,
- IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | REG_USE3,
- "smull", "!0C, !1C, !2C, !3C", 4, kFixupNone),
- ENCODING_MAP(kThumb2LdrdPcRel8, 0xe9df0000,
- kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 7, 0,
- kFmtUnused, -1, -1,
- IS_TERTIARY_OP | REG_DEF0 | REG_DEF1 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP,
- "ldrd", "!0C, !1C, [pc, #!2E]", 4, kFixupLoad),
- ENCODING_MAP(kThumb2LdrdI8, 0xe9d00000,
- kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
- kFmtBitBlt, 7, 0,
- IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | IS_LOAD,
- "ldrd", "!0C, !1C, [!2C, #!3E]", 4, kFixupNone),
- ENCODING_MAP(kThumb2StrdI8, 0xe9c00000,
- kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
- kFmtBitBlt, 7, 0,
- IS_QUAD_OP | REG_USE0 | REG_USE1 | REG_USE2 | IS_STORE,
- "strd", "!0C, !1C, [!2C, #!3E]", 4, kFixupNone),
+ "ret", "", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Rev2rr), CUSTOM_VARIANTS(0x5ac00800, 0xdac00c00),
+ kFmtRegR, 11, 8, kFmtRegR, 19, 16, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+ "rev", "!0r, !1r", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Rev162rr), SF_VARIANTS(0xfa90f0b0),
+ kFmtRegR, 11, 8, kFmtRegR, 19, 16, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+ "rev16", "!0r, !1r", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Ror3rrr), SF_VARIANTS(0x1ac02c00),
+ kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+ "ror", "!0r, !1r, !2r", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Sbc3rrr), SF_VARIANTS(0x5a000000),
+ kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+ "sbc", "!0r, !1r, !2r", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Sbfm4rrdd), SF_N_VARIANTS(0x13000000),
+ kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 21, 16,
+ kFmtBitBlt, 15, 10, IS_QUAD_OP | REG_DEF0_USE1,
+ "sbfm", "!0r, !1r, #!2d, #!3d", kFixupNone),
+ ENCODING_MAP(FWIDE(kA64Scvtf2fw), FLOAT_VARIANTS(0x1e220000),
+ kFmtRegF, 4, 0, kFmtRegW, 9, 5, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+ "scvtf", "!0f, !1w", kFixupNone),
+ ENCODING_MAP(FWIDE(kA64Scvtf2fx), FLOAT_VARIANTS(0x9e220000),
+ kFmtRegF, 4, 0, kFmtRegX, 9, 5, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+ "scvtf", "!0f, !1x", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Sdiv3rrr), SF_VARIANTS(0x1ac00c00),
+ kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+ "sdiv", "!0r, !1r, !2r", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Smaddl4xwwx), NO_VARIANTS(0x9b200000),
+ kFmtRegX, 4, 0, kFmtRegW, 9, 5, kFmtRegW, 20, 16,
+ kFmtRegX, -1, -1, IS_QUAD_OP | REG_DEF0_USE123,
+ "smaddl", "!0x, !1w, !2w, !3x", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Stp4rrXD), SF_VARIANTS(0x29000000),
+ kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
+ kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_DEF2 | REG_USE012 | IS_STORE,
+ "stp", "!0r, !1r, [!2X, #!3D]", kFixupNone),
+ ENCODING_MAP(WIDE(kA64StpPost4rrXD), CUSTOM_VARIANTS(0x28800000, 0xa8800000),
+ kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
+ kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_DEF2 | REG_USE012 | IS_STORE,
+ "stp", "!0r, !1r, [!2X], #!3D", kFixupNone),
+ ENCODING_MAP(WIDE(kA64StpPre4rrXD), CUSTOM_VARIANTS(0x29800000, 0xa9800000),
+ kFmtRegR, 4, 0, kFmtRegR, 14, 10, kFmtRegXOrSp, 9, 5,
+ kFmtBitBlt, 21, 15, IS_QUAD_OP | REG_DEF2 | REG_USE012 | IS_STORE,
+ "stp", "!0r, !1r, [!2X, #!3D]!!", kFixupNone),
+ ENCODING_MAP(FWIDE(kA64Str3fXD), CUSTOM_VARIANTS(0xbd000000, 0xfd000000),
+ kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+ "str", "!0f, [!1X, #!2D]", kFixupNone),
+ ENCODING_MAP(FWIDE(kA64Str4fXxG), CUSTOM_VARIANTS(0xbc206800, 0xfc206800),
+ kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+ kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_USE012 | IS_STORE,
+ "str", "!0f, [!1X, !2x!3G]", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Str3rXD), SIZE_VARIANTS(0xb9000000),
+ kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+ "str", "!0r, [!1X, #!2D]", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Str4rXxG), SIZE_VARIANTS(0xb8206800),
+ kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+ kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_USE012 | IS_STORE,
+ "str", "!0r, [!1X, !2x!3G]", kFixupNone),
+ ENCODING_MAP(kA64Strb3wXd, NO_VARIANTS(0x39000000),
+ kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+ "strb", "!0w, [!1X, #!2d]", kFixupNone),
+ ENCODING_MAP(kA64Strb3wXx, NO_VARIANTS(0x38206800),
+ kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE,
+ "strb", "!0w, [!1X, !2x]", kFixupNone),
+ ENCODING_MAP(kA64Strh3wXF, NO_VARIANTS(0x79000000),
+ kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 21, 10,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+ "strh", "!0w, [!1X, #!2F]", kFixupNone),
+ ENCODING_MAP(kA64Strh4wXxd, NO_VARIANTS(0x78206800),
+ kFmtRegW, 4, 0, kFmtRegXOrSp, 9, 5, kFmtRegX, 20, 16,
+ kFmtBitBlt, 12, 12, IS_QUAD_OP | REG_USE012 | IS_STORE,
+ "strh", "!0w, [!1X, !2x, lsl #!3d]", kFixupNone),
+ ENCODING_MAP(WIDE(kA64StrPost3rXd), SIZE_VARIANTS(0xb8000400),
+ kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | REG_DEF1 | IS_STORE,
+ "str", "!0r, [!1X], #!2d", kFixupNone),
+ ENCODING_MAP(FWIDE(kA64Stur3fXd), CUSTOM_VARIANTS(0xbc000000, 0xfc000000),
+ kFmtRegF, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+ "stur", "!0f, [!1X, #!2d]", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Stur3rXd), SIZE_VARIANTS(0xb8000000),
+ kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5, kFmtBitBlt, 20, 12,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+ "stur", "!0r, [!1X, #!2d]", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Stxr3wrX), SIZE_VARIANTS(0x88007c00),
+ kFmtRegW, 20, 16, kFmtRegR, 4, 0, kFmtRegXOrSp, 9, 5,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_STORE,
+ "stxr", "!0w, !1r, [!2X]", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Sub4RRdT), SF_VARIANTS(0x51000000),
+ kFmtRegROrSp, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10,
+ kFmtBitBlt, 23, 22, IS_QUAD_OP | REG_DEF0_USE1,
+ "sub", "!0R, !1R, #!2d!3T", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Sub4rrro), SF_VARIANTS(0x4b000000),
+ kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+ kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
+ "sub", "!0r, !1r, !2r!3o", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Subs3rRd), SF_VARIANTS(0x71000000),
+ kFmtRegR, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+ "subs", "!0r, !1R, #!2d", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Tst3rro), SF_VARIANTS(0x6a000000),
+ kFmtRegR, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1,
+ kFmtUnused, -1, -1, IS_QUAD_OP | REG_USE01 | SETS_CCODES,
+ "tst", "!0r, !1r!2o", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Ubfm4rrdd), SF_N_VARIANTS(0x53000000),
+ kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 21, 16,
+ kFmtBitBlt, 15, 10, IS_QUAD_OP | REG_DEF0_USE1,
+ "ubfm", "!0r, !1r, !2d, !3d", kFixupNone),
};
// new_lir replaces orig_lir in the pcrel_fixup list.
@@ -1059,153 +566,159 @@
}
}
-/*
- * The fake NOP of moving r0 to r0 actually will incur data stalls if r0 is
- * not ready. Since r5FP is not updated often, it is less likely to
- * generate unnecessary stall cycles.
- * TUNING: No longer true - find new NOP pattern.
- */
-#define PADDING_MOV_R5_R5 0x1C2D
+/* Nop, used for aligning code. Nop is an alias for hint #0. */
+#define PADDING_NOP (UINT32_C(0xd503201f))
uint8_t* Arm64Mir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) {
- for (; lir != NULL; lir = NEXT_LIR(lir)) {
- if (!lir->flags.is_nop) {
- int opcode = lir->opcode;
- if (IsPseudoLirOp(opcode)) {
- if (UNLIKELY(opcode == kPseudoPseudoAlign4)) {
- // Note: size for this opcode will be either 0 or 2 depending on final alignment.
- if (lir->offset & 0x2) {
- write_pos[0] = (PADDING_MOV_R5_R5 & 0xff);
- write_pos[1] = ((PADDING_MOV_R5_R5 >> 8) & 0xff);
- write_pos += 2;
- }
- }
- } else if (LIKELY(!lir->flags.is_nop)) {
- const ArmEncodingMap *encoder = &EncodingMap[lir->opcode];
- uint32_t bits = encoder->skeleton;
- for (int i = 0; i < 4; i++) {
- uint32_t operand;
- uint32_t value;
- operand = lir->operands[i];
- ArmEncodingKind kind = encoder->field_loc[i].kind;
- if (LIKELY(kind == kFmtBitBlt)) {
- value = (operand << encoder->field_loc[i].start) &
- ((1 << (encoder->field_loc[i].end + 1)) - 1);
- bits |= value;
- } else {
- switch (encoder->field_loc[i].kind) {
- case kFmtSkip:
- break; // Nothing to do, but continue to next.
- case kFmtUnused:
- i = 4; // Done, break out of the enclosing loop.
- break;
- case kFmtFPImm:
- value = ((operand & 0xF0) >> 4) << encoder->field_loc[i].end;
- value |= (operand & 0x0F) << encoder->field_loc[i].start;
- bits |= value;
- break;
- case kFmtBrOffset:
- value = ((operand & 0x80000) >> 19) << 26;
- value |= ((operand & 0x40000) >> 18) << 11;
- value |= ((operand & 0x20000) >> 17) << 13;
- value |= ((operand & 0x1f800) >> 11) << 16;
- value |= (operand & 0x007ff);
- bits |= value;
- break;
- case kFmtShift5:
- value = ((operand & 0x1c) >> 2) << 12;
- value |= (operand & 0x03) << 6;
- bits |= value;
- break;
- case kFmtShift:
- value = ((operand & 0x70) >> 4) << 12;
- value |= (operand & 0x0f) << 4;
- bits |= value;
- break;
- case kFmtBWidth:
- value = operand - 1;
- bits |= value;
- break;
- case kFmtLsb:
- value = ((operand & 0x1c) >> 2) << 12;
- value |= (operand & 0x03) << 6;
- bits |= value;
- break;
- case kFmtImm6:
- value = ((operand & 0x20) >> 5) << 9;
- value |= (operand & 0x1f) << 3;
- bits |= value;
- break;
- case kFmtDfp: {
- DCHECK(RegStorage::IsDouble(operand)) << ", Operand = 0x" << std::hex << operand;
- uint32_t reg_num = RegStorage::RegNum(operand);
- /* Snag the 1-bit slice and position it */
- value = ((reg_num & 0x10) >> 4) << encoder->field_loc[i].end;
- /* Extract and position the 4-bit slice */
- value |= (reg_num & 0x0f) << encoder->field_loc[i].start;
- bits |= value;
- break;
+ for (; lir != nullptr; lir = NEXT_LIR(lir)) {
+ bool opcode_is_wide = IS_WIDE(lir->opcode);
+ ArmOpcode opcode = UNWIDE(lir->opcode);
+
+ if (UNLIKELY(IsPseudoLirOp(opcode))) {
+ continue;
+ }
+
+ if (LIKELY(!lir->flags.is_nop)) {
+ const ArmEncodingMap *encoder = &EncodingMap[opcode];
+
+ // Select the right variant of the skeleton.
+ uint32_t bits = opcode_is_wide ? encoder->xskeleton : encoder->wskeleton;
+ DCHECK(!opcode_is_wide || IS_WIDE(encoder->opcode));
+
+ for (int i = 0; i < 4; i++) {
+ ArmEncodingKind kind = encoder->field_loc[i].kind;
+ uint32_t operand = lir->operands[i];
+ uint32_t value;
+
+ if (LIKELY(static_cast<unsigned>(kind) <= kFmtBitBlt)) {
+ // Note: this will handle kFmtReg* and kFmtBitBlt.
+
+ if (static_cast<unsigned>(kind) < kFmtBitBlt) {
+ bool is_zero = A64_REG_IS_ZR(operand);
+
+ if (kIsDebugBuild) {
+ // Register usage checks: First establish register usage requirements based on the
+ // format in `kind'.
+ bool want_float = false;
+ bool want_64_bit = false;
+ bool want_size_match = false;
+ bool want_zero = false;
+ switch (kind) {
+ case kFmtRegX:
+ want_64_bit = true;
+ // Intentional fall-through.
+ case kFmtRegW:
+ want_size_match = true;
+ // Intentional fall-through.
+ case kFmtRegR:
+ want_zero = true;
+ break;
+ case kFmtRegXOrSp:
+ want_64_bit = true;
+ // Intentional fall-through.
+ case kFmtRegWOrSp:
+ want_size_match = true;
+ break;
+ case kFmtRegROrSp:
+ break;
+ case kFmtRegD:
+ want_64_bit = true;
+ // Intentional fall-through.
+ case kFmtRegS:
+ want_size_match = true;
+ // Intentional fall-through.
+ case kFmtRegF:
+ want_float = true;
+ break;
+ default:
+ LOG(FATAL) << "Bad fmt for arg n. " << i << " of " << encoder->name
+ << " (" << kind << ")";
+ break;
}
- case kFmtSfp: {
- DCHECK(RegStorage::IsSingle(operand)) << ", Operand = 0x" << std::hex << operand;
- uint32_t reg_num = RegStorage::RegNum(operand);
- /* Snag the 1-bit slice and position it */
- value = (reg_num & 0x1) << encoder->field_loc[i].end;
- /* Extract and position the 4-bit slice */
- value |= ((reg_num & 0x1e) >> 1) << encoder->field_loc[i].start;
- bits |= value;
- break;
- }
- case kFmtImm12:
- case kFmtModImm:
- value = ((operand & 0x800) >> 11) << 26;
- value |= ((operand & 0x700) >> 8) << 12;
- value |= operand & 0x0ff;
- bits |= value;
- break;
- case kFmtImm16:
- value = ((operand & 0x0800) >> 11) << 26;
- value |= ((operand & 0xf000) >> 12) << 16;
- value |= ((operand & 0x0700) >> 8) << 12;
- value |= operand & 0x0ff;
- bits |= value;
- break;
- case kFmtOff24: {
- uint32_t signbit = (operand >> 31) & 0x1;
- uint32_t i1 = (operand >> 22) & 0x1;
- uint32_t i2 = (operand >> 21) & 0x1;
- uint32_t imm10 = (operand >> 11) & 0x03ff;
- uint32_t imm11 = operand & 0x07ff;
- uint32_t j1 = (i1 ^ signbit) ? 0 : 1;
- uint32_t j2 = (i2 ^ signbit) ? 0 : 1;
- value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) |
- imm11;
- bits |= value;
+
+ // Now check that the requirements are satisfied.
+ RegStorage reg(operand);
+ const char *expected = nullptr;
+ if (want_float) {
+ if (!reg.IsFloat()) {
+ expected = "float register";
+ } else if (want_size_match && (reg.IsDouble() != want_64_bit)) {
+ expected = (want_64_bit) ? "double register" : "single register";
}
- break;
- default:
- LOG(FATAL) << "Bad fmt:" << encoder->field_loc[i].kind;
+ } else {
+ if (reg.IsFloat()) {
+ expected = "core register";
+ } else if (want_size_match && (reg.Is64Bit() != want_64_bit)) {
+ expected = (want_64_bit) ? "x-register" : "w-register";
+ } else if (reg.GetRegNum() == 31 && is_zero == want_zero) {
+ expected = (want_zero) ? "zero-register" : "sp-register";
+ }
+ }
+
+ // TODO(Arm64): if !want_size_match, then we still should compare the size of the
+ // register with the size required by the instruction width (kA64Wide).
+
+ // Fail, if `expected' contains an unsatisfied requirement.
+ if (expected != nullptr) {
+ // TODO(Arm64): make this FATAL.
+ LOG(WARNING) << "Bad argument n. " << i << " of " << encoder->name
+ << ". Expected " << expected << ", got 0x" << std::hex << operand;
+ }
+ }
+
+ // TODO(Arm64): this may or may not be necessary, depending on how wzr, xzr are
+ // defined.
+ if (is_zero) {
+ operand = 31;
}
}
- }
- if (encoder->size == 4) {
- write_pos[0] = ((bits >> 16) & 0xff);
- write_pos[1] = ((bits >> 24) & 0xff);
- write_pos[2] = (bits & 0xff);
- write_pos[3] = ((bits >> 8) & 0xff);
- write_pos += 4;
+
+ value = (operand << encoder->field_loc[i].start) &
+ ((1 << (encoder->field_loc[i].end + 1)) - 1);
+ bits |= value;
} else {
- DCHECK_EQ(encoder->size, 2);
- write_pos[0] = (bits & 0xff);
- write_pos[1] = ((bits >> 8) & 0xff);
- write_pos += 2;
+ switch (kind) {
+ case kFmtSkip:
+ break; // Nothing to do, but continue to next.
+ case kFmtUnused:
+ i = 4; // Done, break out of the enclosing loop.
+ break;
+ case kFmtShift:
+ // Intentional fallthrough.
+ case kFmtExtend:
+ DCHECK_EQ((operand & (1 << 6)) == 0, kind == kFmtShift);
+ value = (operand & 0x3f) << 10;
+ value |= ((operand & 0x1c0) >> 6) << 21;
+ bits |= value;
+ break;
+ case kFmtImm21:
+ value = (operand & 0x3) << 29;
+ value |= ((operand & 0x1ffffc) >> 2) << 5;
+ bits |= value;
+ break;
+ default:
+ LOG(FATAL) << "Bad fmt for arg. " << i << " in " << encoder->name
+ << " (" << kind << ")";
+ }
}
}
+
+ DCHECK_EQ(encoder->size, 4);
+ write_pos[0] = (bits & 0xff);
+ write_pos[1] = ((bits >> 8) & 0xff);
+ write_pos[2] = ((bits >> 16) & 0xff);
+ write_pos[3] = ((bits >> 24) & 0xff);
+ write_pos += 4;
}
}
+
return write_pos;
}
+// Align data offset on 8 byte boundary: it will only contain double-word items, as word immediates
+// are better set directly from the code (they will require no more than 2 instructions).
+#define ALIGNED_DATA_OFFSET(offset) (((offset) + 0x7) & ~0x7)
+
// Assemble the LIR into binary instruction format.
void Arm64Mir2Lir::AssembleLIR() {
LIR* lir;
@@ -1213,20 +726,25 @@
cu_->NewTimingSplit("Assemble");
int assembler_retries = 0;
CodeOffset starting_offset = LinkFixupInsns(first_lir_insn_, last_lir_insn_, 0);
- data_offset_ = (starting_offset + 0x3) & ~0x3;
+ data_offset_ = ALIGNED_DATA_OFFSET(starting_offset);
int32_t offset_adjustment;
AssignDataOffsets();
/*
- * Note: generation must be 1 on first pass (to distinguish from initialized state of 0 for
- * non-visited nodes). Start at zero here, and bit will be flipped to 1 on entry to the loop.
+ * Note: generation must be 1 on first pass (to distinguish from initialized state of 0
+ * for non-visited nodes). Start at zero here, and bit will be flipped to 1 on entry to the loop.
*/
int generation = 0;
while (true) {
+ // TODO(Arm64): check whether passes and offset adjustments are really necessary.
+ // Currently they aren't, as - in the fixups below - LIR are never inserted.
+ // Things can be different if jump ranges above 1 MB need to be supported.
+ // If they are not, then we can get rid of the assembler retry logic.
+
offset_adjustment = 0;
AssemblerStatus res = kSuccess; // Assume success
generation ^= 1;
- // Note: nodes requring possible fixup linked in ascending order.
+ // Note: nodes requiring possible fixup linked in ascending order.
lir = first_fixup_;
prev_lir = NULL;
while (lir != NULL) {
@@ -1243,341 +761,54 @@
switch (static_cast<FixupKind>(lir->flags.fixup)) {
case kFixupLabel:
case kFixupNone:
- break;
case kFixupVLoad:
- if (lir->operands[1] != rs_r15pc.GetReg()) {
- break;
- }
- // NOTE: intentional fallthrough.
- case kFixupLoad: {
- /*
- * PC-relative loads are mostly used to load immediates
- * that are too large to materialize directly in one shot.
- * However, if the load displacement exceeds the limit,
- * we revert to a multiple-instruction materialization sequence.
- */
- LIR *lir_target = lir->target;
- CodeOffset pc = (lir->offset + 4) & ~3;
- CodeOffset target = lir_target->offset +
- ((lir_target->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
- int32_t delta = target - pc;
- if (res != kSuccess) {
- /*
- * In this case, we're just estimating and will do it again for real. Ensure offset
- * is legal.
- */
- delta &= ~0x3;
- }
- DCHECK_EQ((delta & 0x3), 0);
- // First, a sanity check for cases we shouldn't see now
- if (kIsDebugBuild && (((lir->opcode == kThumbAddPcRel) && (delta > 1020)) ||
- ((lir->opcode == kThumbLdrPcRel) && (delta > 1020)))) {
- // Shouldn't happen in current codegen.
- LOG(FATAL) << "Unexpected pc-rel offset " << delta;
- }
- // Now, check for the difficult cases
- if (((lir->opcode == kThumb2LdrPcRel12) && (delta > 4091)) ||
- ((lir->opcode == kThumb2LdrdPcRel8) && (delta > 1020)) ||
- ((lir->opcode == kThumb2Vldrs) && (delta > 1020)) ||
- ((lir->opcode == kThumb2Vldrd) && (delta > 1020))) {
- /*
- * Note: The reason vldrs/vldrd include rARM_LR in their use/def masks is that we
- * sometimes have to use it to fix up out-of-range accesses. This is where that
- * happens.
- */
- int base_reg = ((lir->opcode == kThumb2LdrdPcRel8) ||
- (lir->opcode == kThumb2LdrPcRel12)) ? lir->operands[0] :
- rs_rARM_LR.GetReg();
-
- // Add new Adr to generate the address.
- LIR* new_adr = RawLIR(lir->dalvik_offset, kThumb2Adr,
- base_reg, 0, 0, 0, 0, lir->target);
- new_adr->offset = lir->offset;
- new_adr->flags.fixup = kFixupAdr;
- new_adr->flags.size = EncodingMap[kThumb2Adr].size;
- InsertLIRBefore(lir, new_adr);
- lir->offset += new_adr->flags.size;
- offset_adjustment += new_adr->flags.size;
-
- // lir no longer pcrel, unlink and link in new_adr.
- ReplaceFixup(prev_lir, lir, new_adr);
-
- // Convert to normal load.
- offset_adjustment -= lir->flags.size;
- if (lir->opcode == kThumb2LdrPcRel12) {
- lir->opcode = kThumb2LdrRRI12;
- } else if (lir->opcode == kThumb2LdrdPcRel8) {
- lir->opcode = kThumb2LdrdI8;
- }
- lir->flags.size = EncodingMap[lir->opcode].size;
- offset_adjustment += lir->flags.size;
- // Change the load to be relative to the new Adr base.
- if (lir->opcode == kThumb2LdrdI8) {
- lir->operands[3] = 0;
- lir->operands[2] = base_reg;
- } else {
- lir->operands[2] = 0;
- lir->operands[1] = base_reg;
- }
- prev_lir = new_adr; // Continue scan with new_adr;
- lir = new_adr->u.a.pcrel_next;
- res = kRetryAll;
- continue;
- } else {
- if ((lir->opcode == kThumb2Vldrs) ||
- (lir->opcode == kThumb2Vldrd) ||
- (lir->opcode == kThumb2LdrdPcRel8)) {
- lir->operands[2] = delta >> 2;
- } else {
- lir->operands[1] = (lir->opcode == kThumb2LdrPcRel12) ? delta :
- delta >> 2;
- }
- }
break;
- }
- case kFixupCBxZ: {
- LIR *target_lir = lir->target;
- CodeOffset pc = lir->offset + 4;
- CodeOffset target = target_lir->offset +
- ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
- int32_t delta = target - pc;
- if (delta > 126 || delta < 0) {
- /*
- * Convert to cmp rx,#0 / b[eq/ne] tgt pair
- * Make new branch instruction and insert after
- */
- LIR* new_inst =
- RawLIR(lir->dalvik_offset, kThumbBCond, 0,
- (lir->opcode == kThumb2Cbz) ? kArmCondEq : kArmCondNe,
- 0, 0, 0, lir->target);
- InsertLIRAfter(lir, new_inst);
-
- /* Convert the cb[n]z to a cmp rx, #0 ] */
- // Subtract the old size.
- offset_adjustment -= lir->flags.size;
- lir->opcode = kThumbCmpRI8;
- /* operand[0] is src1 in both cb[n]z & CmpRI8 */
- lir->operands[1] = 0;
- lir->target = 0;
- lir->flags.size = EncodingMap[lir->opcode].size;
- // Add back the new size.
- offset_adjustment += lir->flags.size;
- // Set up the new following inst.
- new_inst->offset = lir->offset + lir->flags.size;
- new_inst->flags.fixup = kFixupCondBranch;
- new_inst->flags.size = EncodingMap[new_inst->opcode].size;
- offset_adjustment += new_inst->flags.size;
-
- // lir no longer pcrel, unlink and link in new_inst.
- ReplaceFixup(prev_lir, lir, new_inst);
- prev_lir = new_inst; // Continue with the new instruction.
- lir = new_inst->u.a.pcrel_next;
- res = kRetryAll;
- continue;
- } else {
- lir->operands[1] = delta >> 1;
- }
- break;
- }
- case kFixupPushPop: {
- if (__builtin_popcount(lir->operands[0]) == 1) {
- /*
- * The standard push/pop multiple instruction
- * requires at least two registers in the list.
- * If we've got just one, switch to the single-reg
- * encoding.
- */
- lir->opcode = (lir->opcode == kThumb2Push) ? kThumb2Push1 :
- kThumb2Pop1;
- int reg = 0;
- while (lir->operands[0]) {
- if (lir->operands[0] & 0x1) {
- break;
- } else {
- reg++;
- lir->operands[0] >>= 1;
- }
- }
- lir->operands[0] = reg;
- // This won't change again, don't bother unlinking, just reset fixup kind
- lir->flags.fixup = kFixupNone;
- }
- break;
- }
- case kFixupCondBranch: {
- LIR *target_lir = lir->target;
- int32_t delta = 0;
- DCHECK(target_lir);
- CodeOffset pc = lir->offset + 4;
- CodeOffset target = target_lir->offset +
- ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
- delta = target - pc;
- if ((lir->opcode == kThumbBCond) && (delta > 254 || delta < -256)) {
- offset_adjustment -= lir->flags.size;
- lir->opcode = kThumb2BCond;
- lir->flags.size = EncodingMap[lir->opcode].size;
- // Fixup kind remains the same.
- offset_adjustment += lir->flags.size;
- res = kRetryAll;
- }
- lir->operands[0] = delta >> 1;
- break;
- }
- case kFixupT2Branch: {
- LIR *target_lir = lir->target;
- CodeOffset pc = lir->offset + 4;
- CodeOffset target = target_lir->offset +
- ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
- int32_t delta = target - pc;
- lir->operands[0] = delta >> 1;
- if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && lir->operands[0] == 0) {
- // Useless branch
- offset_adjustment -= lir->flags.size;
- lir->flags.is_nop = true;
- // Don't unlink - just set to do-nothing.
- lir->flags.fixup = kFixupNone;
- res = kRetryAll;
- }
- break;
- }
case kFixupT1Branch: {
LIR *target_lir = lir->target;
- CodeOffset pc = lir->offset + 4;
+ DCHECK(target_lir);
+ CodeOffset pc = lir->offset;
CodeOffset target = target_lir->offset +
((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
int32_t delta = target - pc;
- if (delta > 2046 || delta < -2048) {
- // Convert to Thumb2BCond w/ kArmCondAl
- offset_adjustment -= lir->flags.size;
- lir->opcode = kThumb2BUncond;
- lir->operands[0] = 0;
- lir->flags.size = EncodingMap[lir->opcode].size;
- lir->flags.fixup = kFixupT2Branch;
- offset_adjustment += lir->flags.size;
- res = kRetryAll;
- } else {
- lir->operands[0] = delta >> 1;
- if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && lir->operands[0] == -1) {
- // Useless branch
- offset_adjustment -= lir->flags.size;
- lir->flags.is_nop = true;
- // Don't unlink - just set to do-nothing.
- lir->flags.fixup = kFixupNone;
- res = kRetryAll;
- }
+ if (!((delta & 0x3) == 0 && IS_SIGNED_IMM19(delta >> 2))) {
+ LOG(FATAL) << "Invalid jump range in kFixupT1Branch";
}
+ lir->operands[0] = delta >> 2;
break;
}
- case kFixupBlx1: {
- DCHECK(NEXT_LIR(lir)->opcode == kThumbBlx2);
- /* cur_pc is Thumb */
- CodeOffset cur_pc = (lir->offset + 4) & ~3;
- CodeOffset target = lir->operands[1];
-
- /* Match bit[1] in target with base */
- if (cur_pc & 0x2) {
- target |= 0x2;
+ case kFixupLoad:
+ case kFixupCBxZ:
+ case kFixupCondBranch: {
+ LIR *target_lir = lir->target;
+ DCHECK(target_lir);
+ CodeOffset pc = lir->offset;
+ CodeOffset target = target_lir->offset +
+ ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
+ int32_t delta = target - pc;
+ if (!((delta & 0x3) == 0 && IS_SIGNED_IMM19(delta >> 2))) {
+ LOG(FATAL) << "Invalid jump range in kFixupLoad";
}
- int32_t delta = target - cur_pc;
- DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2)));
-
- lir->operands[0] = (delta >> 12) & 0x7ff;
- NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff;
- break;
- }
- case kFixupBl1: {
- DCHECK(NEXT_LIR(lir)->opcode == kThumbBl2);
- /* Both cur_pc and target are Thumb */
- CodeOffset cur_pc = lir->offset + 4;
- CodeOffset target = lir->operands[1];
-
- int32_t delta = target - cur_pc;
- DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2)));
-
- lir->operands[0] = (delta >> 12) & 0x7ff;
- NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff;
+ lir->operands[1] = delta >> 2;
break;
}
case kFixupAdr: {
- EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[2]));
- LIR* target = lir->target;
- int32_t target_disp = (tab_rec != NULL) ? tab_rec->offset + offset_adjustment
- : target->offset + ((target->flags.generation == lir->flags.generation) ? 0 :
- offset_adjustment);
- int32_t disp = target_disp - ((lir->offset + 4) & ~3);
- if (disp < 4096) {
- lir->operands[1] = disp;
+ LIR* target_lir = lir->target;
+ int32_t delta;
+ if (target_lir) {
+ CodeOffset target_offs = ((target_lir->flags.generation == lir->flags.generation) ?
+ 0 : offset_adjustment) + target_lir->offset;
+ delta = target_offs - lir->offset;
+ } else if (lir->operands[2] >= 0) {
+ EmbeddedData* tab = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[2]));
+ delta = tab->offset + offset_adjustment - lir->offset;
} else {
- // convert to ldimm16l, ldimm16h, add tgt, pc, operands[0]
- // TUNING: if this case fires often, it can be improved. Not expected to be common.
- LIR *new_mov16L =
- RawLIR(lir->dalvik_offset, kThumb2MovImm16LST, lir->operands[0], 0,
- WrapPointer(lir), WrapPointer(tab_rec), 0, lir->target);
- new_mov16L->flags.size = EncodingMap[new_mov16L->opcode].size;
- new_mov16L->flags.fixup = kFixupMovImmLST;
- new_mov16L->offset = lir->offset;
- // Link the new instruction, retaining lir.
- InsertLIRBefore(lir, new_mov16L);
- lir->offset += new_mov16L->flags.size;
- offset_adjustment += new_mov16L->flags.size;
- InsertFixupBefore(prev_lir, lir, new_mov16L);
- prev_lir = new_mov16L; // Now we've got a new prev.
- LIR *new_mov16H =
- RawLIR(lir->dalvik_offset, kThumb2MovImm16HST, lir->operands[0], 0,
- WrapPointer(lir), WrapPointer(tab_rec), 0, lir->target);
- new_mov16H->flags.size = EncodingMap[new_mov16H->opcode].size;
- new_mov16H->flags.fixup = kFixupMovImmHST;
- new_mov16H->offset = lir->offset;
- // Link the new instruction, retaining lir.
- InsertLIRBefore(lir, new_mov16H);
- lir->offset += new_mov16H->flags.size;
- offset_adjustment += new_mov16H->flags.size;
- InsertFixupBefore(prev_lir, lir, new_mov16H);
- prev_lir = new_mov16H; // Now we've got a new prev.
-
- offset_adjustment -= lir->flags.size;
- if (RegStorage::RegNum(lir->operands[0]) < 8) {
- lir->opcode = kThumbAddRRLH;
- } else {
- lir->opcode = kThumbAddRRHH;
- }
- lir->operands[1] = rs_rARM_PC.GetReg();
- lir->flags.size = EncodingMap[lir->opcode].size;
- offset_adjustment += lir->flags.size;
- // Must stay in fixup list and have offset updated; will be used by LST/HSP pair.
- lir->flags.fixup = kFixupNone;
- res = kRetryAll;
+ // No fixup: this usage allows to retrieve the current PC.
+ delta = lir->operands[1];
}
- break;
- }
- case kFixupMovImmLST: {
- // operands[1] should hold disp, [2] has add, [3] has tab_rec
- LIR *addPCInst = reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2]));
- EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3]));
- // If tab_rec is null, this is a literal load. Use target
- LIR* target = lir->target;
- int32_t target_disp = tab_rec ? tab_rec->offset : target->offset;
- lir->operands[1] = (target_disp - (addPCInst->offset + 4)) & 0xffff;
- break;
- }
- case kFixupMovImmHST: {
- // operands[1] should hold disp, [2] has add, [3] has tab_rec
- LIR *addPCInst = reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2]));
- EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3]));
- // If tab_rec is null, this is a literal load. Use target
- LIR* target = lir->target;
- int32_t target_disp = tab_rec ? tab_rec->offset : target->offset;
- lir->operands[1] =
- ((target_disp - (addPCInst->offset + 4)) >> 16) & 0xffff;
- break;
- }
- case kFixupAlign4: {
- int32_t required_size = lir->offset & 0x2;
- if (lir->flags.size != required_size) {
- offset_adjustment += required_size - lir->flags.size;
- lir->flags.size = required_size;
- res = kRetryAll;
+ if (!IS_SIGNED_IMM21(delta)) {
+ LOG(FATAL) << "Jump range above 1MB in kFixupAdr";
}
+ lir->operands[1] = delta;
break;
}
default:
@@ -1596,7 +827,7 @@
LOG(FATAL) << "Assembler error - too many retries";
}
starting_offset += offset_adjustment;
- data_offset_ = (starting_offset + 0x3) & ~0x3;
+ data_offset_ = ALIGNED_DATA_OFFSET(starting_offset);
AssignDataOffsets();
}
}
@@ -1609,7 +840,7 @@
write_pos = EncodeLIRs(write_pos, first_lir_insn_);
DCHECK_EQ(static_cast<CodeOffset>(write_pos - &code_buffer_[0]), starting_offset);
- DCHECK_EQ(data_offset_, (code_buffer_.size() + 0x3) & ~0x3);
+ DCHECK_EQ(data_offset_, ALIGNED_DATA_OFFSET(code_buffer_.size()));
// Install literals
InstallLiteralPools();
@@ -1629,8 +860,9 @@
}
int Arm64Mir2Lir::GetInsnSize(LIR* lir) {
- DCHECK(!IsPseudoLirOp(lir->opcode));
- return EncodingMap[lir->opcode].size;
+ ArmOpcode opcode = UNWIDE(lir->opcode);
+ DCHECK(!IsPseudoLirOp(opcode));
+ return EncodingMap[opcode].size;
}
// Encode instruction bit pattern and assign offsets.
@@ -1639,15 +871,14 @@
LIR* last_fixup = NULL;
for (LIR* lir = head_lir; lir != end_lir; lir = NEXT_LIR(lir)) {
+ ArmOpcode opcode = UNWIDE(lir->opcode);
if (!lir->flags.is_nop) {
if (lir->flags.fixup != kFixupNone) {
- if (!IsPseudoLirOp(lir->opcode)) {
- lir->flags.size = EncodingMap[lir->opcode].size;
- lir->flags.fixup = EncodingMap[lir->opcode].fixup;
- } else if (UNLIKELY(lir->opcode == kPseudoPseudoAlign4)) {
- lir->flags.size = (offset & 0x2);
- lir->flags.fixup = kFixupAlign4;
+ if (!IsPseudoLirOp(opcode)) {
+ lir->flags.size = EncodingMap[opcode].size;
+ lir->flags.fixup = EncodingMap[opcode].fixup;
} else {
+ DCHECK_NE(static_cast<int>(opcode), kPseudoPseudoAlign4);
lir->flags.size = 0;
lir->flags.fixup = kFixupLabel;
}
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 9dfee6e..c210816 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -23,27 +23,32 @@
namespace art {
+bool Arm64Mir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir,
+ const InlineMethod& special) {
+ return Mir2Lir::GenSpecialCase(bb, mir, special);
+}
+
/*
* The sparse table in the literal pool is an array of <key,displacement>
- * pairs. For each set, we'll load them as a pair using ldmia.
- * This means that the register number of the temp we use for the key
- * must be lower than the reg for the displacement.
- *
+ * pairs. For each set, we'll load them as a pair using ldp.
* The test loop will look something like:
*
* adr r_base, <table>
- * ldr r_val, [rARM_SP, v_reg_off]
+ * ldr r_val, [rA64_SP, v_reg_off]
* mov r_idx, #table_size
- * lp:
- * ldmia r_base!, {r_key, r_disp}
+ * loop:
+ * cbz r_idx, quit
+ * ldp r_key, r_disp, [r_base], #8
* sub r_idx, #1
* cmp r_val, r_key
- * ifeq
- * add rARM_PC, r_disp ; This is the branch from which we compute displacement
- * cbnz r_idx, lp
+ * b.ne loop
+ * adr r_base, #0 ; This is the instruction from which we compute displacements
+ * add r_base, r_disp
+ * br r_base
+ * quit:
*/
void Arm64Mir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset,
- RegLocation rl_src) {
+ RegLocation rl_src) {
const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
if (cu_->verbose) {
DumpSparseSwitchTable(table);
@@ -60,34 +65,39 @@
// Get the switch value
rl_src = LoadValue(rl_src, kCoreReg);
RegStorage r_base = AllocTemp();
- /* Allocate key and disp temps */
+ // Allocate key and disp temps.
RegStorage r_key = AllocTemp();
RegStorage r_disp = AllocTemp();
- // Make sure r_key's register number is less than r_disp's number for ldmia
- if (r_key.GetReg() > r_disp.GetReg()) {
- RegStorage tmp = r_disp;
- r_disp = r_key;
- r_key = tmp;
- }
// Materialize a pointer to the switch table
- NewLIR3(kThumb2Adr, r_base.GetReg(), 0, WrapPointer(tab_rec));
+ NewLIR3(kA64Adr2xd, r_base.GetReg(), 0, WrapPointer(tab_rec));
// Set up r_idx
RegStorage r_idx = AllocTemp();
LoadConstant(r_idx, size);
- // Establish loop branch target
- LIR* target = NewLIR0(kPseudoTargetLabel);
- // Load next key/disp
- NewLIR2(kThumb2LdmiaWB, r_base.GetReg(), (1 << r_key.GetRegNum()) | (1 << r_disp.GetRegNum()));
+
+ // Entry of loop.
+ LIR* loop_entry = NewLIR0(kPseudoTargetLabel);
+ LIR* branch_out = NewLIR2(kA64Cbz2rt, r_idx.GetReg(), 0);
+
+ // Load next key/disp.
+ NewLIR4(kA64LdpPost4rrXD, r_key.GetReg(), r_disp.GetReg(), r_base.GetReg(), 2);
+ OpRegRegImm(kOpSub, r_idx, r_idx, 1);
+
+ // Go to next case, if key does not match.
OpRegReg(kOpCmp, r_key, rl_src.reg);
- // Go if match. NOTE: No instruction set switch here - must stay Thumb2
- LIR* it = OpIT(kCondEq, "");
- LIR* switch_branch = NewLIR1(kThumb2AddPCR, r_disp.GetReg());
- OpEndIT(it);
- tab_rec->anchor = switch_branch;
- // Needs to use setflags encoding here
- OpRegRegImm(kOpSub, r_idx, r_idx, 1); // For value == 1, this should set flags.
- DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
- OpCondBranch(kCondNe, target);
+ OpCondBranch(kCondNe, loop_entry);
+
+ // Key does match: branch to case label.
+ LIR* switch_label = NewLIR3(kA64Adr2xd, r_base.GetReg(), 0, -1);
+ tab_rec->anchor = switch_label;
+
+ // Add displacement to base branch address and go!
+ OpRegRegRegShift(kOpAdd, r_base.GetReg(), r_base.GetReg(), r_disp.GetReg(),
+ ENCODE_NO_SHIFT, true);
+ NewLIR1(kA64Br1x, r_base.GetReg());
+
+ // Loop exit label.
+ LIR* loop_exit = NewLIR0(kPseudoTargetLabel);
+ branch_out->target = loop_exit;
}
@@ -111,29 +121,35 @@
rl_src = LoadValue(rl_src, kCoreReg);
RegStorage table_base = AllocTemp();
// Materialize a pointer to the switch table
- NewLIR3(kThumb2Adr, table_base.GetReg(), 0, WrapPointer(tab_rec));
+ NewLIR3(kA64Adr2xd, table_base.GetReg(), 0, WrapPointer(tab_rec));
int low_key = s4FromSwitchData(&table[2]);
- RegStorage keyReg;
+ RegStorage key_reg;
// Remove the bias, if necessary
if (low_key == 0) {
- keyReg = rl_src.reg;
+ key_reg = rl_src.reg;
} else {
- keyReg = AllocTemp();
- OpRegRegImm(kOpSub, keyReg, rl_src.reg, low_key);
+ key_reg = AllocTemp();
+ OpRegRegImm(kOpSub, key_reg, rl_src.reg, low_key);
}
// Bounds check - if < 0 or >= size continue following switch
- OpRegImm(kOpCmp, keyReg, size-1);
+ OpRegImm(kOpCmp, key_reg, size - 1);
LIR* branch_over = OpCondBranch(kCondHi, NULL);
// Load the displacement from the switch table
RegStorage disp_reg = AllocTemp();
- LoadBaseIndexed(table_base, keyReg, disp_reg, 2, k32);
+ LoadBaseIndexed(table_base, key_reg, disp_reg, 2, k32);
- // ..and go! NOTE: No instruction set switch here - must stay Thumb2
- LIR* switch_branch = NewLIR1(kThumb2AddPCR, disp_reg.GetReg());
- tab_rec->anchor = switch_branch;
+ // Get base branch address.
+ RegStorage branch_reg = AllocTemp();
+ LIR* switch_label = NewLIR3(kA64Adr2xd, branch_reg.GetReg(), 0, -1);
+ tab_rec->anchor = switch_label;
- /* branch_over target here */
+ // Add displacement to base branch address and go!
+ OpRegRegRegShift(kOpAdd, branch_reg.GetReg(), branch_reg.GetReg(), disp_reg.GetReg(),
+ ENCODE_NO_SHIFT, true);
+ NewLIR1(kA64Br1x, branch_reg.GetReg());
+
+ // branch_over target here
LIR* target = NewLIR0(kPseudoTargetLabel);
branch_over->target = target;
}
@@ -163,13 +179,13 @@
// Making a call - use explicit registers
FlushAllRegs(); /* Everything to home location */
- LoadValueDirectFixed(rl_src, rs_r0);
- LoadWordDisp(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pHandleFillArrayData).Int32Value(),
- rs_rARM_LR);
+ LoadValueDirectFixed(rl_src, rs_x0);
+ LoadWordDisp(rs_rA64_SELF, A64_QUICK_ENTRYPOINT_INT_OFFS(pHandleFillArrayData),
+ rs_rA64_LR);
// Materialize a pointer to the fill data image
- NewLIR3(kThumb2Adr, rs_r1.GetReg(), 0, WrapPointer(tab_rec));
+ NewLIR3(kA64Adr2xd, rx1, 0, WrapPointer(tab_rec));
ClobberCallerSave();
- LIR* call_inst = OpReg(kOpBlx, rs_rARM_LR);
+ LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
MarkSafepointPC(call_inst);
}
@@ -180,7 +196,7 @@
void Arm64Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
FlushAllRegs();
// FIXME: need separate LoadValues for object references.
- LoadValueDirectFixed(rl_src, rs_r0); // Get obj
+ LoadValueDirectFixed(rl_src, rs_x0); // Get obj
LockCallTemps(); // Prepare for explicit register usage
constexpr bool kArchVariantHasGoodBranchPredictor = false; // TODO: true if cortex-A15.
if (kArchVariantHasGoodBranchPredictor) {
@@ -190,17 +206,15 @@
} else {
// If the null-check fails its handled by the slow-path to reduce exception related meta-data.
if (Runtime::Current()->ExplicitNullChecks()) {
- null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
+ null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
}
}
- Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
- NewLIR3(kThumb2Ldrex, rs_r1.GetReg(), rs_r0.GetReg(),
- mirror::Object::MonitorOffset().Int32Value() >> 2);
+ Load32Disp(rs_rA64_SELF, A64_THREAD_THIN_LOCK_ID_OFFSET, rs_x2);
+ NewLIR3(kA64Ldxr2rX, rx1, rx0, mirror::Object::MonitorOffset().Int32Value() >> 2);
MarkPossibleNullPointerException(opt_flags);
- LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_r1, 0, NULL);
- NewLIR4(kThumb2Strex, rs_r1.GetReg(), rs_r2.GetReg(), rs_r0.GetReg(),
- mirror::Object::MonitorOffset().Int32Value() >> 2);
- LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_r1, 0, NULL);
+ LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_x1, 0, NULL);
+ NewLIR4(kA64Stxr3wrX, rx1, rx2, rx0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+ LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_x1, 0, NULL);
LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
@@ -210,9 +224,9 @@
}
// TODO: move to a slow path.
// Go expensive route - artLockObjectFromCode(obj);
- LoadWordDisp(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pLockObject).Int32Value(), rs_rARM_LR);
+ LoadWordDisp(rs_rA64_SELF, A64_QUICK_ENTRYPOINT_INT_OFFS(pLockObject), rs_rA64_LR);
ClobberCallerSave();
- LIR* call_inst = OpReg(kOpBlx, rs_rARM_LR);
+ LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
MarkSafepointPC(call_inst);
LIR* success_target = NewLIR0(kPseudoTargetLabel);
@@ -220,24 +234,19 @@
GenMemBarrier(kLoadLoad);
} else {
// Explicit null-check as slow-path is entered using an IT.
- GenNullCheck(rs_r0, opt_flags);
- Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
- NewLIR3(kThumb2Ldrex, rs_r1.GetReg(), rs_r0.GetReg(),
- mirror::Object::MonitorOffset().Int32Value() >> 2);
+ GenNullCheck(rs_x0, opt_flags);
+ Load32Disp(rs_rA64_SELF, A64_THREAD_THIN_LOCK_ID_OFFSET, rs_x2);
MarkPossibleNullPointerException(opt_flags);
- OpRegImm(kOpCmp, rs_r1, 0);
- LIR* it = OpIT(kCondEq, "");
- NewLIR4(kThumb2Strex/*eq*/, rs_r1.GetReg(), rs_r2.GetReg(), rs_r0.GetReg(),
- mirror::Object::MonitorOffset().Int32Value() >> 2);
- OpEndIT(it);
- OpRegImm(kOpCmp, rs_r1, 0);
- it = OpIT(kCondNe, "T");
+ NewLIR3(kA64Ldxr2rX, rx1, rx0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+ OpRegImm(kOpCmp, rs_x1, 0);
+ OpIT(kCondEq, "");
+ NewLIR4(kA64Stxr3wrX/*eq*/, rx1, rx2, rx0, mirror::Object::MonitorOffset().Int32Value() >> 2);
+ OpRegImm(kOpCmp, rs_x1, 0);
+ OpIT(kCondNe, "T");
// Go expensive route - artLockObjectFromCode(self, obj);
- LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pLockObject).Int32Value(),
- rs_rARM_LR);
+ LoadWordDisp/*ne*/(rs_rA64_SELF, A64_QUICK_ENTRYPOINT_INT_OFFS(pLockObject), rs_rA64_LR);
ClobberCallerSave();
- LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rARM_LR);
- OpEndIT(it);
+ LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rA64_LR);
MarkSafepointPC(call_inst);
GenMemBarrier(kLoadLoad);
}
@@ -250,10 +259,10 @@
*/
void Arm64Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
FlushAllRegs();
- LoadValueDirectFixed(rl_src, rs_r0); // Get obj
+ LoadValueDirectFixed(rl_src, rs_x0); // Get obj
LockCallTemps(); // Prepare for explicit register usage
LIR* null_check_branch = nullptr;
- Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
+ Load32Disp(rs_rA64_SELF, A64_THREAD_THIN_LOCK_ID_OFFSET, rs_x2);
constexpr bool kArchVariantHasGoodBranchPredictor = false; // TODO: true if cortex-A15.
if (kArchVariantHasGoodBranchPredictor) {
if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
@@ -261,14 +270,14 @@
} else {
// If the null-check fails its handled by the slow-path to reduce exception related meta-data.
if (Runtime::Current()->ExplicitNullChecks()) {
- null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL);
+ null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
}
}
- Load32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1);
+ Load32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_x1);
MarkPossibleNullPointerException(opt_flags);
- LoadConstantNoClobber(rs_r3, 0);
- LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_r1, rs_r2, NULL);
- Store32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
+ LoadConstantNoClobber(rs_x3, 0);
+ LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_x1, rs_x2, NULL);
+ Store32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_x3);
LIR* unlock_success_branch = OpUnconditionalBranch(NULL);
LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
@@ -278,9 +287,9 @@
}
// TODO: move to a slow path.
// Go expensive route - artUnlockObjectFromCode(obj);
- LoadWordDisp(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(), rs_rARM_LR);
+ LoadWordDisp(rs_rA64_SELF, A64_QUICK_ENTRYPOINT_INT_OFFS(pUnlockObject), rs_rA64_LR);
ClobberCallerSave();
- LIR* call_inst = OpReg(kOpBlx, rs_rARM_LR);
+ LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
MarkSafepointPC(call_inst);
LIR* success_target = NewLIR0(kPseudoTargetLabel);
@@ -288,33 +297,31 @@
GenMemBarrier(kStoreLoad);
} else {
// Explicit null-check as slow-path is entered using an IT.
- GenNullCheck(rs_r0, opt_flags);
- Load32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1); // Get lock
+ GenNullCheck(rs_x0, opt_flags);
+ Load32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_x1); // Get lock
MarkPossibleNullPointerException(opt_flags);
- Load32Disp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2);
- LoadConstantNoClobber(rs_r3, 0);
+ Load32Disp(rs_rA64_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_x2);
+ LoadConstantNoClobber(rs_x3, 0);
// Is lock unheld on lock or held by us (==thread_id) on unlock?
- OpRegReg(kOpCmp, rs_r1, rs_r2);
- LIR* it = OpIT(kCondEq, "EE");
- Store32Disp/*eq*/(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
+ OpRegReg(kOpCmp, rs_x1, rs_x2);
+ OpIT(kCondEq, "EE");
+ Store32Disp/*eq*/(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_x3);
// Go expensive route - UnlockObjectFromCode(obj);
- LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(),
- rs_rARM_LR);
+ LoadWordDisp/*ne*/(rs_rA64_SELF, A64_QUICK_ENTRYPOINT_INT_OFFS(pUnlockObject), rs_rA64_LR);
ClobberCallerSave();
- LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rARM_LR);
- OpEndIT(it);
+ LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rA64_LR);
MarkSafepointPC(call_inst);
GenMemBarrier(kStoreLoad);
}
}
void Arm64Mir2Lir::GenMoveException(RegLocation rl_dest) {
- int ex_offset = Thread::ExceptionOffset<4>().Int32Value();
+ int ex_offset = A64_THREAD_EXCEPTION_INT_OFFS;
RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
RegStorage reset_reg = AllocTemp();
- Load32Disp(rs_rARM_SELF, ex_offset, rl_result.reg);
+ Load32Disp(rs_rA64_SELF, ex_offset, rl_result.reg);
LoadConstant(reset_reg, 0);
- Store32Disp(rs_rARM_SELF, ex_offset, reset_reg);
+ Store32Disp(rs_rA64_SELF, ex_offset, reset_reg);
FreeTemp(reset_reg);
StoreValue(rl_dest, rl_result);
}
@@ -326,7 +333,7 @@
RegStorage reg_card_base = AllocTemp();
RegStorage reg_card_no = AllocTemp();
LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL);
- LoadWordDisp(rs_rARM_SELF, Thread::CardTableOffset<4>().Int32Value(), reg_card_base);
+ LoadWordDisp(rs_rA64_SELF, A64_THREAD_CARD_TABLE_INT_OFFS, reg_card_base);
OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0, kUnsignedByte);
LIR* target = NewLIR0(kPseudoTargetLabel);
@@ -336,17 +343,16 @@
}
void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
- int spill_count = num_core_spills_ + num_fp_spills_;
/*
- * On entry, r0, r1, r2 & r3 are live. Let the register allocation
+ * On entry, x0, x1, x2 & x3 are live. Let the register allocation
* mechanism know so it doesn't try to use any of them when
* expanding the frame or flushing. This leaves the utility
* code with a single temp: r12. This should be enough.
*/
- LockTemp(rs_r0);
- LockTemp(rs_r1);
- LockTemp(rs_r2);
- LockTemp(rs_r3);
+ LockTemp(rs_x0);
+ LockTemp(rs_x1);
+ LockTemp(rs_x2);
+ LockTemp(rs_x3);
/*
* We can safely skip the stack overflow check if we're
@@ -356,14 +362,30 @@
(static_cast<size_t>(frame_size_) <
Thread::kStackOverflowReservedBytes));
NewLIR0(kPseudoMethodEntry);
+
if (!skip_overflow_check) {
+ LoadWordDisp(rs_rA64_SELF, A64_THREAD_STACK_END_INT_OFFS, rs_x12);
+ OpRegImm64(kOpSub, rs_rA64_SP, frame_size_, /*is_wide*/true);
if (Runtime::Current()->ExplicitStackOverflowChecks()) {
/* Load stack limit */
- Load32Disp(rs_rARM_SELF, Thread::StackEndOffset<4>().Int32Value(), rs_r12);
+ // TODO(Arm64): fix the line below:
+ // GenRegRegCheck(kCondUlt, rA64_SP, r12, kThrowStackOverflow);
+ } else {
+ // Implicit stack overflow check.
+ // Generate a load from [sp, #-framesize]. If this is in the stack
+ // redzone we will get a segmentation fault.
+ // TODO(Arm64): does the following really work or do we need a reg != rA64_ZR?
+ Load32Disp(rs_rA64_SP, 0, rs_wzr);
+ MarkPossibleStackOverflowException();
}
+ } else if (frame_size_ > 0) {
+ OpRegImm64(kOpSub, rs_rA64_SP, frame_size_, /*is_wide*/true);
}
+
/* Spill core callee saves */
- NewLIR1(kThumb2Push, core_spill_mask_);
+ if (core_spill_mask_) {
+ SpillCoreRegs(rs_rA64_SP, frame_size_, core_spill_mask_);
+ }
/* Need to spill any FP regs? */
if (num_fp_spills_) {
/*
@@ -371,107 +393,40 @@
* they are pushed as a contiguous block. When promoting from
* the fp set, we must allocate all singles from s16..highest-promoted
*/
- NewLIR1(kThumb2VPushCS, num_fp_spills_);
- }
-
- const int spill_size = spill_count * 4;
- const int frame_size_without_spills = frame_size_ - spill_size;
- if (!skip_overflow_check) {
- if (Runtime::Current()->ExplicitStackOverflowChecks()) {
- class StackOverflowSlowPath : public LIRSlowPath {
- public:
- StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, bool restore_lr, size_t sp_displace)
- : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr), restore_lr_(restore_lr),
- sp_displace_(sp_displace) {
- }
- void Compile() OVERRIDE {
- m2l_->ResetRegPool();
- m2l_->ResetDefTracking();
- GenerateTargetLabel(kPseudoThrowTarget);
- if (restore_lr_) {
- m2l_->LoadWordDisp(rs_rARM_SP, sp_displace_ - 4, rs_rARM_LR);
- }
- m2l_->OpRegImm(kOpAdd, rs_rARM_SP, sp_displace_);
- m2l_->ClobberCallerSave();
- ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pThrowStackOverflow);
- // Load the entrypoint directly into the pc instead of doing a load + branch. Assumes
- // codegen and target are in thumb2 mode.
- // NOTE: native pointer.
- m2l_->LoadWordDisp(rs_rARM_SELF, func_offset.Int32Value(), rs_rARM_PC);
- }
-
- private:
- const bool restore_lr_;
- const size_t sp_displace_;
- };
- if (static_cast<size_t>(frame_size_) > Thread::kStackOverflowReservedUsableBytes) {
- OpRegRegImm(kOpSub, rs_rARM_LR, rs_rARM_SP, frame_size_without_spills);
- LIR* branch = OpCmpBranch(kCondUlt, rs_rARM_LR, rs_r12, nullptr);
- // Need to restore LR since we used it as a temp.
- AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, true, spill_size));
- OpRegCopy(rs_rARM_SP, rs_rARM_LR); // Establish stack
- } else {
- // If the frame is small enough we are guaranteed to have enough space that remains to
- // handle signals on the user stack.
- OpRegRegImm(kOpSub, rs_rARM_SP, rs_rARM_SP, frame_size_without_spills);
- LIR* branch = OpCmpBranch(kCondUlt, rs_rARM_SP, rs_r12, nullptr);
- AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, false, frame_size_));
- }
- } else {
- // Implicit stack overflow check.
- // Generate a load from [sp, #-overflowsize]. If this is in the stack
- // redzone we will get a segmentation fault.
- //
- // Caveat coder: if someone changes the kStackOverflowReservedBytes value
- // we need to make sure that it's loadable in an immediate field of
- // a sub instruction. Otherwise we will get a temp allocation and the
- // code size will increase.
- OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, Thread::kStackOverflowReservedBytes);
- Load32Disp(rs_r12, 0, rs_r12);
- MarkPossibleStackOverflowException();
- OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
- }
- } else {
- OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
+ // TODO(Arm64): SpillFPRegs(rA64_SP, frame_size_, core_spill_mask_);
}
FlushIns(ArgLocs, rl_method);
- FreeTemp(rs_r0);
- FreeTemp(rs_r1);
- FreeTemp(rs_r2);
- FreeTemp(rs_r3);
+ FreeTemp(rs_x0);
+ FreeTemp(rs_x1);
+ FreeTemp(rs_x2);
+ FreeTemp(rs_x3);
}
void Arm64Mir2Lir::GenExitSequence() {
- int spill_count = num_core_spills_ + num_fp_spills_;
/*
* In the exit path, r0/r1 are live - make sure they aren't
* allocated by the register utilities as temps.
*/
- LockTemp(rs_r0);
- LockTemp(rs_r1);
+ LockTemp(rs_x0);
+ LockTemp(rs_x1);
NewLIR0(kPseudoMethodExit);
- OpRegImm(kOpAdd, rs_rARM_SP, frame_size_ - (spill_count * 4));
/* Need to restore any FP callee saves? */
if (num_fp_spills_) {
- NewLIR1(kThumb2VPopCS, num_fp_spills_);
+ // TODO(Arm64): UnspillFPRegs(num_fp_spills_);
}
- if (core_spill_mask_ & (1 << rs_rARM_LR.GetRegNum())) {
- /* Unspill rARM_LR to rARM_PC */
- core_spill_mask_ &= ~(1 << rs_rARM_LR.GetRegNum());
- core_spill_mask_ |= (1 << rs_rARM_PC.GetRegNum());
+ if (core_spill_mask_) {
+ UnSpillCoreRegs(rs_rA64_SP, frame_size_, core_spill_mask_);
}
- NewLIR1(kThumb2Pop, core_spill_mask_);
- if (!(core_spill_mask_ & (1 << rs_rARM_PC.GetRegNum()))) {
- /* We didn't pop to rARM_PC, so must do a bv rARM_LR */
- NewLIR1(kThumbBx, rs_rARM_LR.GetReg());
- }
+
+ OpRegImm64(kOpAdd, rs_rA64_SP, frame_size_, /*is_wide*/true);
+ NewLIR0(kA64Ret);
}
void Arm64Mir2Lir::GenSpecialExitSequence() {
- NewLIR1(kThumbBx, rs_rARM_LR.GetReg());
+ NewLIR0(kA64Ret);
}
} // namespace art
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 4e784c6..903be10 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -22,7 +22,7 @@
namespace art {
-class Arm64Mir2Lir FINAL : public Mir2Lir {
+class Arm64Mir2Lir : public Mir2Lir {
public:
Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
@@ -31,20 +31,21 @@
RegLocation rl_dest, int lit);
bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
LIR* CheckSuspendUsingLoad() OVERRIDE;
- RegStorage LoadHelper(ThreadOffset<4> offset);
- LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size,
- int s_reg);
+ RegStorage LoadHelper(A64ThreadOffset offset);
+ LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
+ OpSize size) OVERRIDE;
LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale,
- OpSize size);
+ OpSize size) OVERRIDE;
LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
- RegStorage r_dest, OpSize size, int s_reg);
+ RegStorage r_dest, OpSize size) OVERRIDE;
LIR* LoadConstantNoClobber(RegStorage r_dest, int value);
LIR* LoadConstantWide(RegStorage r_dest, int64_t value);
- LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, OpSize size);
+ LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
+ OpSize size) OVERRIDE;
LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale,
- OpSize size);
+ OpSize size) OVERRIDE;
LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
- RegStorage r_src, OpSize size, int s_reg);
+ RegStorage r_src, OpSize size) OVERRIDE;
void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
// Required for target - register utilities.
@@ -74,7 +75,7 @@
uint32_t LinkFixupInsns(LIR* head_lir, LIR* tail_lir, CodeOffset offset);
int AssignInsnOffsets();
void AssignOffsets();
- static uint8_t* EncodeLIRs(uint8_t* write_pos, LIR* lir);
+ uint8_t* EncodeLIRs(uint8_t* write_pos, LIR* lir);
void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix);
void SetupTargetResourceMasks(LIR* lir, uint64_t flags);
const char* GetTargetInstFmt(int opcode);
@@ -94,6 +95,7 @@
RegLocation rl_src, int scale, bool card_mark);
void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
RegLocation rl_src1, RegLocation rl_shift);
+ void GenLongOp(OpKind op, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
RegLocation rl_src2);
void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
@@ -140,6 +142,11 @@
void GenNegFloat(RegLocation rl_dest, RegLocation rl_src);
void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
+ bool GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special);
+
+ uint32_t GenPairWise(uint32_t reg_mask, int* reg1, int* reg2);
+ void UnSpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask);
+ void SpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask);
// Required for target - single operation generators.
LIR* OpUnconditionalBranch(LIR* target);
@@ -155,6 +162,7 @@
LIR* OpReg(OpKind op, RegStorage r_dest_src);
void OpRegCopy(RegStorage r_dest, RegStorage r_src);
LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src);
+ LIR* OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value, bool is_wide);
LIR* OpRegImm(OpKind op, RegStorage r_dest_src1, int value);
LIR* OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset);
LIR* OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2);
@@ -164,45 +172,50 @@
LIR* OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value);
LIR* OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2);
LIR* OpTestSuspend(LIR* target);
- LIR* OpThreadMem(OpKind op, ThreadOffset<4> thread_offset);
+ LIR* OpThreadMem(OpKind op, A64ThreadOffset thread_offset);
LIR* OpVldm(RegStorage r_base, int count);
LIR* OpVstm(RegStorage r_base, int count);
void OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset);
void OpRegCopyWide(RegStorage dest, RegStorage src);
- void OpTlsCmp(ThreadOffset<4> offset, int val);
+ void OpTlsCmp(A64ThreadOffset offset, int val);
- LIR* LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size,
- int s_reg);
+ LIR* LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size);
LIR* StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src, OpSize size);
- LIR* OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2,
- int shift);
- LIR* OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift);
- static const ArmEncodingMap EncodingMap[kArmLast];
+ LIR* OpRegRegRegShift(OpKind op, int r_dest, int r_src1, int r_src2, int shift,
+ bool is_wide = false);
+ LIR* OpRegRegShift(OpKind op, int r_dest_src1, int r_src2, int shift, bool is_wide = false);
+ static const ArmEncodingMap EncodingMap[kA64Last];
int EncodeShift(int code, int amount);
- int ModifiedImmediate(uint32_t value);
+ int EncodeExtend(int extend_type, int amount);
+ bool IsExtendEncoding(int encoded_value);
+ int EncodeLogicalImmediate(bool is_wide, uint64_t value);
+ uint64_t DecodeLogicalImmediate(bool is_wide, int value);
+
ArmConditionCode ArmConditionEncoding(ConditionCode code);
bool InexpensiveConstantInt(int32_t value);
bool InexpensiveConstantFloat(int32_t value);
bool InexpensiveConstantLong(int64_t value);
bool InexpensiveConstantDouble(int64_t value);
+ void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
+ int LoadArgRegs(CallInfo* info, int call_state,
+ NextCallInsn next_call_insn,
+ const MethodReference& target_method,
+ uint32_t vtable_idx,
+ uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
+ bool skip_this);
+
private:
void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val,
ConditionCode ccode);
- LIR* LoadFPConstantValue(int r_dest, int value);
+ LIR* LoadFPConstantValue(int r_dest, int32_t value);
+ LIR* LoadFPConstantValueWide(int r_dest, int64_t value);
void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
void AssignDataOffsets();
RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
bool is_div, bool check_zero);
RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div);
- typedef struct {
- OpKind op;
- uint32_t shift;
- } EasyMultiplyOp;
- bool GetEasyMultiplyOp(int lit, EasyMultiplyOp* op);
- bool GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops);
- void GenEasyMultiplyTwoOps(RegStorage r_dest, RegStorage r_src, EasyMultiplyOp* ops);
};
} // namespace art
diff --git a/compiler/dex/quick/arm64/create.sh b/compiler/dex/quick/arm64/create.sh
deleted file mode 100644
index a3833bd..0000000
--- a/compiler/dex/quick/arm64/create.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/bash
-
-set -e
-
-if [ ! -d ./arm ]; then
- echo "Directory ./arm not found."
- exit 1
-fi
-
-mkdir -p arm64
-dst=`cd arm64 && pwd`
-cd arm/
-for f in *; do
- cp $f $dst/`echo $f | sed 's/arm/arm64/g'`
-done
-
-sed -i 's,ART_COMPILER_DEX_QUICK_ARM_ARM_LIR_H_,ART_COMPILER_DEX_QUICK_ARM64_ARM64_LIR_H_,g' $dst/arm64_lir.h
-sed -i 's,ART_COMPILER_DEX_QUICK_ARM_CODEGEN_ARM_H_,ART_COMPILER_DEX_QUICK_ARM64_CODEGEN_ARM64_H_,g' $dst/codegen_arm64.h
-sed -i -e 's,ArmMir2Lir,Arm64Mir2Lir,g' -e 's,arm_lir.h,arm64_lir.h,g' -e 's,codegen_arm.h,codegen_arm64.h,g' $dst/*.h $dst/*.cc
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc
index 9684283..c2a550e 100644
--- a/compiler/dex/quick/arm64/fp_arm64.cc
+++ b/compiler/dex/quick/arm64/fp_arm64.cc
@@ -21,8 +21,8 @@
namespace art {
void Arm64Mir2Lir::GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest,
- RegLocation rl_src1, RegLocation rl_src2) {
- int op = kThumbBkpt;
+ RegLocation rl_src1, RegLocation rl_src2) {
+ int op = kA64Brk1d;
RegLocation rl_result;
/*
@@ -32,24 +32,24 @@
switch (opcode) {
case Instruction::ADD_FLOAT_2ADDR:
case Instruction::ADD_FLOAT:
- op = kThumb2Vadds;
+ op = kA64Fadd3fff;
break;
case Instruction::SUB_FLOAT_2ADDR:
case Instruction::SUB_FLOAT:
- op = kThumb2Vsubs;
+ op = kA64Fsub3fff;
break;
case Instruction::DIV_FLOAT_2ADDR:
case Instruction::DIV_FLOAT:
- op = kThumb2Vdivs;
+ op = kA64Fdiv3fff;
break;
case Instruction::MUL_FLOAT_2ADDR:
case Instruction::MUL_FLOAT:
- op = kThumb2Vmuls;
+ op = kA64Fmul3fff;
break;
case Instruction::REM_FLOAT_2ADDR:
case Instruction::REM_FLOAT:
FlushAllRegs(); // Send everything to home location
- CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2,
+ CallRuntimeHelperRegLocationRegLocation(A64_QUICK_ENTRYPOINT_OFFSET(pFmodf), rl_src1, rl_src2,
false);
rl_result = GetReturn(true);
StoreValue(rl_dest, rl_result);
@@ -68,31 +68,31 @@
}
void Arm64Mir2Lir::GenArithOpDouble(Instruction::Code opcode,
- RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
- int op = kThumbBkpt;
+ RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
+ int op = kA64Brk1d;
RegLocation rl_result;
switch (opcode) {
case Instruction::ADD_DOUBLE_2ADDR:
case Instruction::ADD_DOUBLE:
- op = kThumb2Vaddd;
+ op = kA64Fadd3fff;
break;
case Instruction::SUB_DOUBLE_2ADDR:
case Instruction::SUB_DOUBLE:
- op = kThumb2Vsubd;
+ op = kA64Fsub3fff;
break;
case Instruction::DIV_DOUBLE_2ADDR:
case Instruction::DIV_DOUBLE:
- op = kThumb2Vdivd;
+ op = kA64Fdiv3fff;
break;
case Instruction::MUL_DOUBLE_2ADDR:
case Instruction::MUL_DOUBLE:
- op = kThumb2Vmuld;
+ op = kA64Fmul3fff;
break;
case Instruction::REM_DOUBLE_2ADDR:
case Instruction::REM_DOUBLE:
FlushAllRegs(); // Send everything to home location
- CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2,
+ CallRuntimeHelperRegLocationRegLocation(A64_QUICK_ENTRYPOINT_OFFSET(pFmod), rl_src1, rl_src2,
false);
rl_result = GetReturnWide(true);
StoreValueWide(rl_dest, rl_result);
@@ -111,98 +111,62 @@
rl_result = EvalLoc(rl_dest, kFPReg, true);
DCHECK(rl_dest.wide);
DCHECK(rl_result.wide);
- NewLIR3(op, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+ NewLIR3(FWIDE(op), rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
StoreValueWide(rl_dest, rl_result);
}
-void Arm64Mir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src) {
- int op = kThumbBkpt;
- int src_reg;
+void Arm64Mir2Lir::GenConversion(Instruction::Code opcode,
+ RegLocation rl_dest, RegLocation rl_src) {
+ int op = kA64Brk1d;
RegLocation rl_result;
switch (opcode) {
case Instruction::INT_TO_FLOAT:
- op = kThumb2VcvtIF;
+ op = kA64Scvtf2fw;
break;
case Instruction::FLOAT_TO_INT:
- op = kThumb2VcvtFI;
+ op = kA64Fcvtzs2wf;
break;
case Instruction::DOUBLE_TO_FLOAT:
- op = kThumb2VcvtDF;
+ op = kA64Fcvt2sS;
break;
case Instruction::FLOAT_TO_DOUBLE:
- op = kThumb2VcvtFd;
+ op = kA64Fcvt2Ss;
break;
case Instruction::INT_TO_DOUBLE:
- op = kThumb2VcvtF64S32;
+ op = FWIDE(kA64Scvtf2fw);
break;
case Instruction::DOUBLE_TO_INT:
- op = kThumb2VcvtDI;
+ op = FWIDE(kA64Fcvtzs2wf);
break;
- case Instruction::LONG_TO_DOUBLE: {
- rl_src = LoadValueWide(rl_src, kFPReg);
- RegStorage src_low = rl_src.reg.DoubleToLowSingle();
- RegStorage src_high = rl_src.reg.DoubleToHighSingle();
- rl_result = EvalLoc(rl_dest, kFPReg, true);
- RegStorage tmp1 = AllocTempDouble();
- RegStorage tmp2 = AllocTempDouble();
-
- NewLIR2(kThumb2VcvtF64S32, tmp1.GetReg(), src_high.GetReg());
- NewLIR2(kThumb2VcvtF64U32, rl_result.reg.GetReg(), src_low.GetReg());
- LoadConstantWide(tmp2, 0x41f0000000000000LL);
- NewLIR3(kThumb2VmlaF64, rl_result.reg.GetReg(), tmp1.GetReg(), tmp2.GetReg());
- FreeTemp(tmp1);
- FreeTemp(tmp2);
- StoreValueWide(rl_dest, rl_result);
- return;
- }
+ case Instruction::LONG_TO_DOUBLE:
+ op = FWIDE(kA64Scvtf2fx);
+ break;
case Instruction::FLOAT_TO_LONG:
- GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pF2l), rl_dest, rl_src);
- return;
- case Instruction::LONG_TO_FLOAT: {
- rl_src = LoadValueWide(rl_src, kFPReg);
- RegStorage src_low = rl_src.reg.DoubleToLowSingle();
- RegStorage src_high = rl_src.reg.DoubleToHighSingle();
- rl_result = EvalLoc(rl_dest, kFPReg, true);
- // Allocate temp registers.
- RegStorage high_val = AllocTempDouble();
- RegStorage low_val = AllocTempDouble();
- RegStorage const_val = AllocTempDouble();
- // Long to double.
- NewLIR2(kThumb2VcvtF64S32, high_val.GetReg(), src_high.GetReg());
- NewLIR2(kThumb2VcvtF64U32, low_val.GetReg(), src_low.GetReg());
- LoadConstantWide(const_val, INT64_C(0x41f0000000000000));
- NewLIR3(kThumb2VmlaF64, low_val.GetReg(), high_val.GetReg(), const_val.GetReg());
- // Double to float.
- NewLIR2(kThumb2VcvtDF, rl_result.reg.GetReg(), low_val.GetReg());
- // Free temp registers.
- FreeTemp(high_val);
- FreeTemp(low_val);
- FreeTemp(const_val);
- // Store result.
- StoreValue(rl_dest, rl_result);
- return;
- }
+ op = kA64Fcvtzs2xf;
+ break;
+ case Instruction::LONG_TO_FLOAT:
+ op = kA64Scvtf2fx;
+ break;
case Instruction::DOUBLE_TO_LONG:
- GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pD2l), rl_dest, rl_src);
- return;
+ op = FWIDE(kA64Fcvtzs2xf);
+ break;
default:
LOG(FATAL) << "Unexpected opcode: " << opcode;
}
+
if (rl_src.wide) {
rl_src = LoadValueWide(rl_src, kFPReg);
- src_reg = rl_src.reg.GetReg();
} else {
rl_src = LoadValue(rl_src, kFPReg);
- src_reg = rl_src.reg.GetReg();
}
+
+ rl_result = EvalLoc(rl_dest, kFPReg, true);
+ NewLIR2(op, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+
if (rl_dest.wide) {
- rl_result = EvalLoc(rl_dest, kFPReg, true);
- NewLIR2(op, rl_result.reg.GetReg(), src_reg);
StoreValueWide(rl_dest, rl_result);
} else {
- rl_result = EvalLoc(rl_dest, kFPReg, true);
- NewLIR2(op, rl_result.reg.GetReg(), src_reg);
StoreValue(rl_dest, rl_result);
}
}
@@ -217,15 +181,14 @@
rl_src2 = mir_graph_->GetSrcWide(mir, 2);
rl_src1 = LoadValueWide(rl_src1, kFPReg);
rl_src2 = LoadValueWide(rl_src2, kFPReg);
- NewLIR2(kThumb2Vcmpd, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+ NewLIR2(FWIDE(kA64Fcmp2ff), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
} else {
rl_src1 = mir_graph_->GetSrc(mir, 0);
rl_src2 = mir_graph_->GetSrc(mir, 1);
rl_src1 = LoadValue(rl_src1, kFPReg);
rl_src2 = LoadValue(rl_src2, kFPReg);
- NewLIR2(kThumb2Vcmps, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+ NewLIR2(kA64Fcmp2ff, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
}
- NewLIR0(kThumb2Fmstat);
ConditionCode ccode = mir->meta.ccode;
switch (ccode) {
case kCondEq:
@@ -259,7 +222,7 @@
void Arm64Mir2Lir::GenCmpFP(Instruction::Code opcode, RegLocation rl_dest,
- RegLocation rl_src1, RegLocation rl_src2) {
+ RegLocation rl_src1, RegLocation rl_src2) {
bool is_double = false;
int default_result = -1;
RegLocation rl_result;
@@ -291,7 +254,7 @@
ClobberSReg(rl_dest.s_reg_low);
rl_result = EvalLoc(rl_dest, kCoreReg, true);
LoadConstant(rl_result.reg, default_result);
- NewLIR2(kThumb2Vcmpd, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+ NewLIR2(FWIDE(kA64Fcmp2ff), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
} else {
rl_src1 = LoadValue(rl_src1, kFPReg);
rl_src2 = LoadValue(rl_src2, kFPReg);
@@ -299,20 +262,20 @@
ClobberSReg(rl_dest.s_reg_low);
rl_result = EvalLoc(rl_dest, kCoreReg, true);
LoadConstant(rl_result.reg, default_result);
- NewLIR2(kThumb2Vcmps, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+ NewLIR2(kA64Fcmp2ff, rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
}
DCHECK(!rl_result.reg.IsFloat());
- NewLIR0(kThumb2Fmstat);
- LIR* it = OpIT((default_result == -1) ? kCondGt : kCondMi, "");
- NewLIR2(kThumb2MovI8M, rl_result.reg.GetReg(),
- ModifiedImmediate(-default_result)); // Must not alter ccodes
- OpEndIT(it);
+ // TODO(Arm64): should we rather do this?
+ // csinc wD, wzr, wzr, eq
+ // csneg wD, wD, wD, le
+ // (which requires 2 instructions rather than 3)
- it = OpIT(kCondEq, "");
- LoadConstant(rl_result.reg, 0);
- OpEndIT(it);
-
+ // Rd = if cond then Rd else -Rd.
+ NewLIR4(kA64Csneg4rrrc, rl_result.reg.GetReg(), rl_result.reg.GetReg(),
+ rl_result.reg.GetReg(), (default_result == 1) ? kArmCondPl : kArmCondLe);
+ NewLIR4(kA64Csel4rrrc, rl_result.reg.GetReg(), rwzr, rl_result.reg.GetReg(),
+ kArmCondEq);
StoreValue(rl_dest, rl_result);
}
@@ -320,7 +283,7 @@
RegLocation rl_result;
rl_src = LoadValue(rl_src, kFPReg);
rl_result = EvalLoc(rl_dest, kFPReg, true);
- NewLIR2(kThumb2Vnegs, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+ NewLIR2(kA64Fneg2ff, rl_result.reg.GetReg(), rl_src.reg.GetReg());
StoreValue(rl_dest, rl_result);
}
@@ -328,31 +291,32 @@
RegLocation rl_result;
rl_src = LoadValueWide(rl_src, kFPReg);
rl_result = EvalLoc(rl_dest, kFPReg, true);
- NewLIR2(kThumb2Vnegd, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+ NewLIR2(FWIDE(kA64Fneg2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
StoreValueWide(rl_dest, rl_result);
}
bool Arm64Mir2Lir::GenInlinedSqrt(CallInfo* info) {
- DCHECK_EQ(cu_->instruction_set, kThumb2);
+ // TODO(Arm64): implement this.
+ UNIMPLEMENTED(FATAL) << "GenInlinedSqrt not implemented for Arm64";
+
+ DCHECK_EQ(cu_->instruction_set, kArm64);
LIR *branch;
RegLocation rl_src = info->args[0];
RegLocation rl_dest = InlineTargetWide(info); // double place for result
rl_src = LoadValueWide(rl_src, kFPReg);
RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
- NewLIR2(kThumb2Vsqrtd, rl_result.reg.GetReg(), rl_src.reg.GetReg());
- NewLIR2(kThumb2Vcmpd, rl_result.reg.GetReg(), rl_result.reg.GetReg());
- NewLIR0(kThumb2Fmstat);
- branch = NewLIR2(kThumbBCond, 0, kArmCondEq);
+ NewLIR2(FWIDE(kA64Fsqrt2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg());
+ NewLIR2(FWIDE(kA64Fcmp2ff), rl_result.reg.GetReg(), rl_result.reg.GetReg());
+ branch = NewLIR2(kA64B2ct, kArmCondEq, 0);
ClobberCallerSave();
LockCallTemps(); // Using fixed registers
- RegStorage r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(4, pSqrt));
- NewLIR3(kThumb2Fmrrd, rs_r0.GetReg(), rs_r1.GetReg(), rl_src.reg.GetReg());
- NewLIR1(kThumbBlxR, r_tgt.GetReg());
- NewLIR3(kThumb2Fmdrr, rl_result.reg.GetReg(), rs_r0.GetReg(), rs_r1.GetReg());
+ RegStorage r_tgt = LoadHelper(A64_QUICK_ENTRYPOINT_OFFSET(pSqrt));
+ // NewLIR3(kThumb2Fmrrd, r0, r1, rl_src.reg.GetReg());
+ NewLIR1(kA64Blr1x, r_tgt.GetReg());
+ // NewLIR3(kThumb2Fmdrr, rl_result.reg.GetReg(), r0, r1);
branch->target = NewLIR0(kPseudoTargetLabel);
StoreValueWide(rl_dest, rl_result);
return true;
}
-
} // namespace art
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index c5a3ab6..709f583 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -29,152 +29,55 @@
return OpCondBranch(cond, target);
}
-/*
- * Generate a Thumb2 IT instruction, which can nullify up to
- * four subsequent instructions based on a condition and its
- * inverse. The condition applies to the first instruction, which
- * is executed if the condition is met. The string "guide" consists
- * of 0 to 3 chars, and applies to the 2nd through 4th instruction.
- * A "T" means the instruction is executed if the condition is
- * met, and an "E" means the instruction is executed if the condition
- * is not met.
- */
+// TODO(Arm64): remove this.
LIR* Arm64Mir2Lir::OpIT(ConditionCode ccode, const char* guide) {
- int mask;
- int mask3 = 0;
- int mask2 = 0;
- int mask1 = 0;
- ArmConditionCode code = ArmConditionEncoding(ccode);
- int cond_bit = code & 1;
- int alt_bit = cond_bit ^ 1;
-
- // Note: case fallthroughs intentional
- switch (strlen(guide)) {
- case 3:
- mask1 = (guide[2] == 'T') ? cond_bit : alt_bit;
- case 2:
- mask2 = (guide[1] == 'T') ? cond_bit : alt_bit;
- case 1:
- mask3 = (guide[0] == 'T') ? cond_bit : alt_bit;
- break;
- case 0:
- break;
- default:
- LOG(FATAL) << "OAT: bad case in OpIT";
- }
- mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) |
- (1 << (3 - strlen(guide)));
- return NewLIR2(kThumb2It, code, mask);
+ LOG(FATAL) << "Unexpected use of OpIT for Arm64";
+ return NULL;
}
void Arm64Mir2Lir::OpEndIT(LIR* it) {
- // TODO: use the 'it' pointer to do some checks with the LIR, for example
- // we could check that the number of instructions matches the mask
- // in the IT instruction.
- CHECK(it != nullptr);
- GenBarrier();
+ LOG(FATAL) << "Unexpected use of OpEndIT for Arm64";
}
/*
* 64-bit 3way compare function.
- * mov rX, #-1
- * cmp op1hi, op2hi
- * blt done
- * bgt flip
- * sub rX, op1lo, op2lo (treat as unsigned)
- * beq done
- * ite hi
- * mov(hi) rX, #-1
- * mov(!hi) rX, #1
- * flip:
- * neg rX
- * done:
+ * cmp xA, xB
+ * csinc wC, wzr, wzr, eq
+ * csneg wC, wC, wC, le
*/
-void Arm64Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
- LIR* target1;
- LIR* target2;
+void Arm64Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
+ RegLocation rl_src2) {
+ RegLocation rl_result;
rl_src1 = LoadValueWide(rl_src1, kCoreReg);
rl_src2 = LoadValueWide(rl_src2, kCoreReg);
- RegStorage t_reg = AllocTemp();
- LoadConstant(t_reg, -1);
- OpRegReg(kOpCmp, rl_src1.reg.GetHigh(), rl_src2.reg.GetHigh());
- LIR* branch1 = OpCondBranch(kCondLt, NULL);
- LIR* branch2 = OpCondBranch(kCondGt, NULL);
- OpRegRegReg(kOpSub, t_reg, rl_src1.reg.GetLow(), rl_src2.reg.GetLow());
- LIR* branch3 = OpCondBranch(kCondEq, NULL);
+ rl_result = EvalLoc(rl_dest, kCoreReg, true);
- LIR* it = OpIT(kCondHi, "E");
- NewLIR2(kThumb2MovI8M, t_reg.GetReg(), ModifiedImmediate(-1));
- LoadConstant(t_reg, 1);
- OpEndIT(it);
-
- target2 = NewLIR0(kPseudoTargetLabel);
- OpRegReg(kOpNeg, t_reg, t_reg);
-
- target1 = NewLIR0(kPseudoTargetLabel);
-
- RegLocation rl_temp = LocCReturn(); // Just using as template, will change
- rl_temp.reg.SetReg(t_reg.GetReg());
- StoreValue(rl_dest, rl_temp);
- FreeTemp(t_reg);
-
- branch1->target = target1;
- branch2->target = target2;
- branch3->target = branch1->target;
+ OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
+ NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondEq);
+ NewLIR4(kA64Csneg4rrrc, rl_result.reg.GetReg(), rl_result.reg.GetReg(),
+ rl_result.reg.GetReg(), kArmCondLe);
+ StoreValue(rl_dest, rl_result);
}
void Arm64Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
- int64_t val, ConditionCode ccode) {
- int32_t val_lo = Low32Bits(val);
- int32_t val_hi = High32Bits(val);
- DCHECK_GE(ModifiedImmediate(val_lo), 0);
- DCHECK_GE(ModifiedImmediate(val_hi), 0);
+ int64_t val, ConditionCode ccode) {
LIR* taken = &block_label_list_[bb->taken];
- LIR* not_taken = &block_label_list_[bb->fall_through];
rl_src1 = LoadValueWide(rl_src1, kCoreReg);
- RegStorage low_reg = rl_src1.reg.GetLow();
- RegStorage high_reg = rl_src1.reg.GetHigh();
if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) {
- RegStorage t_reg = AllocTemp();
- NewLIR4(kThumb2OrrRRRs, t_reg.GetReg(), low_reg.GetReg(), high_reg.GetReg(), 0);
- FreeTemp(t_reg);
+ ArmOpcode opcode = (ccode == kCondEq) ? kA64Cbz2rt : kA64Cbnz2rt;
+ LIR* branch = NewLIR2(WIDE(opcode), rl_src1.reg.GetLowReg(), 0);
+ branch->target = taken;
+ } else {
+ OpRegImm64(kOpCmp, rl_src1.reg, val, /*is_wide*/true);
OpCondBranch(ccode, taken);
- return;
}
-
- switch (ccode) {
- case kCondEq:
- case kCondNe:
- OpCmpImmBranch(kCondNe, high_reg, val_hi, (ccode == kCondEq) ? not_taken : taken);
- break;
- case kCondLt:
- OpCmpImmBranch(kCondLt, high_reg, val_hi, taken);
- OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken);
- ccode = kCondUlt;
- break;
- case kCondLe:
- OpCmpImmBranch(kCondLt, high_reg, val_hi, taken);
- OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken);
- ccode = kCondLs;
- break;
- case kCondGt:
- OpCmpImmBranch(kCondGt, high_reg, val_hi, taken);
- OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken);
- ccode = kCondHi;
- break;
- case kCondGe:
- OpCmpImmBranch(kCondGt, high_reg, val_hi, taken);
- OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken);
- ccode = kCondUge;
- break;
- default:
- LOG(FATAL) << "Unexpected ccode: " << ccode;
- }
- OpCmpImmBranch(ccode, low_reg, val_lo, taken);
}
void Arm64Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
+ // TODO(Arm64): implement this.
+ UNIMPLEMENTED(FATAL);
+
RegLocation rl_result;
RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
RegLocation rl_dest = mir_graph_->GetDest(mir);
@@ -194,21 +97,21 @@
if (cheap_false_val && ccode == kCondEq && (true_val == 0 || true_val == -1)) {
OpRegRegImm(kOpSub, rl_result.reg, rl_src.reg, -true_val);
DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
- LIR* it = OpIT(true_val == 0 ? kCondNe : kCondUge, "");
+ OpIT(true_val == 0 ? kCondNe : kCondUge, "");
LoadConstant(rl_result.reg, false_val);
- OpEndIT(it); // Add a scheduling barrier to keep the IT shadow intact
+ GenBarrier(); // Add a scheduling barrier to keep the IT shadow intact
} else if (cheap_false_val && ccode == kCondEq && true_val == 1) {
OpRegRegImm(kOpRsub, rl_result.reg, rl_src.reg, 1);
DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
- LIR* it = OpIT(kCondLs, "");
+ OpIT(kCondLs, "");
LoadConstant(rl_result.reg, false_val);
- OpEndIT(it); // Add a scheduling barrier to keep the IT shadow intact
+ GenBarrier(); // Add a scheduling barrier to keep the IT shadow intact
} else if (cheap_false_val && InexpensiveConstantInt(true_val)) {
OpRegImm(kOpCmp, rl_src.reg, 0);
- LIR* it = OpIT(ccode, "E");
+ OpIT(ccode, "E");
LoadConstant(rl_result.reg, true_val);
LoadConstant(rl_result.reg, false_val);
- OpEndIT(it); // Add a scheduling barrier to keep the IT shadow intact
+ GenBarrier(); // Add a scheduling barrier to keep the IT shadow intact
} else {
// Unlikely case - could be tuned.
RegStorage t_reg1 = AllocTemp();
@@ -216,10 +119,10 @@
LoadConstant(t_reg1, true_val);
LoadConstant(t_reg2, false_val);
OpRegImm(kOpCmp, rl_src.reg, 0);
- LIR* it = OpIT(ccode, "E");
+ OpIT(ccode, "E");
OpRegCopy(rl_result.reg, t_reg1);
OpRegCopy(rl_result.reg, t_reg2);
- OpEndIT(it); // Add a scheduling barrier to keep the IT shadow intact
+ GenBarrier(); // Add a scheduling barrier to keep the IT shadow intact
}
} else {
// MOVE case
@@ -229,24 +132,26 @@
rl_false = LoadValue(rl_false, kCoreReg);
rl_result = EvalLoc(rl_dest, kCoreReg, true);
OpRegImm(kOpCmp, rl_src.reg, 0);
- LIR* it = nullptr;
if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) { // Is the "true" case already in place?
- it = OpIT(NegateComparison(ccode), "");
+ OpIT(NegateComparison(ccode), "");
OpRegCopy(rl_result.reg, rl_false.reg);
} else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) { // False case in place?
- it = OpIT(ccode, "");
+ OpIT(ccode, "");
OpRegCopy(rl_result.reg, rl_true.reg);
} else { // Normal - select between the two.
- it = OpIT(ccode, "E");
+ OpIT(ccode, "E");
OpRegCopy(rl_result.reg, rl_true.reg);
OpRegCopy(rl_result.reg, rl_false.reg);
}
- OpEndIT(it); // Add a scheduling barrier to keep the IT shadow intact
+ GenBarrier(); // Add a scheduling barrier to keep the IT shadow intact
}
StoreValue(rl_dest, rl_result);
}
void Arm64Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
+ // TODO(Arm64): implement this.
+ UNIMPLEMENTED(FATAL);
+
RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
// Normalize such that if either operand is constant, src2 will be constant.
@@ -259,8 +164,8 @@
RegLocation rl_temp = UpdateLocWide(rl_src2);
// Do special compare/branch against simple const operand if not already in registers.
int64_t val = mir_graph_->ConstantValueWide(rl_src2);
- if ((rl_temp.location != kLocPhysReg) &&
- ((ModifiedImmediate(Low32Bits(val)) >= 0) && (ModifiedImmediate(High32Bits(val)) >= 0))) {
+ if ((rl_temp.location != kLocPhysReg)
+ /*&& ((ModifiedImmediate(Low32Bits(val)) >= 0) && (ModifiedImmediate(High32Bits(val)) >= 0))*/) {
GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode);
return;
}
@@ -308,56 +213,77 @@
* Generate a register comparison to an immediate and branch. Caller
* is responsible for setting branch target field.
*/
-LIR* Arm64Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target) {
+LIR* Arm64Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value,
+ LIR* target) {
LIR* branch;
ArmConditionCode arm_cond = ArmConditionEncoding(cond);
- /*
- * A common use of OpCmpImmBranch is for null checks, and using the Thumb 16-bit
- * compare-and-branch if zero is ideal if it will reach. However, because null checks
- * branch forward to a slow path, they will frequently not reach - and thus have to
- * be converted to a long form during assembly (which will trigger another assembly
- * pass). Here we estimate the branch distance for checks, and if large directly
- * generate the long form in an attempt to avoid an extra assembly pass.
- * TODO: consider interspersing slowpaths in code following unconditional branches.
- */
- bool skip = ((target != NULL) && (target->opcode == kPseudoThrowTarget));
- skip &= ((cu_->code_item->insns_size_in_code_units_ - current_dalvik_offset_) > 64);
- if (!skip && reg.Low8() && (check_value == 0) &&
- ((arm_cond == kArmCondEq) || (arm_cond == kArmCondNe))) {
- branch = NewLIR2((arm_cond == kArmCondEq) ? kThumb2Cbz : kThumb2Cbnz,
- reg.GetReg(), 0);
+ if (check_value == 0 && (arm_cond == kArmCondEq || arm_cond == kArmCondNe)) {
+ ArmOpcode opcode = (arm_cond == kArmCondEq) ? kA64Cbz2rt : kA64Cbnz2rt;
+ branch = NewLIR2(opcode, reg.GetReg(), 0);
} else {
OpRegImm(kOpCmp, reg, check_value);
- branch = NewLIR2(kThumbBCond, 0, arm_cond);
+ branch = NewLIR2(kA64B2ct, arm_cond, 0);
}
branch->target = target;
return branch;
}
LIR* Arm64Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
+ bool dest_is_fp = r_dest.IsFloat();
+ bool src_is_fp = r_src.IsFloat();
+ ArmOpcode opcode = kA64Brk1d;
LIR* res;
- int opcode;
- // If src or dest is a pair, we'll be using low reg.
- if (r_dest.IsPair()) {
- r_dest = r_dest.GetLow();
+
+ if (LIKELY(dest_is_fp == src_is_fp)) {
+ if (LIKELY(!dest_is_fp)) {
+ // Core/core copy.
+ // Copies involving the sp register require a different instruction.
+ opcode = UNLIKELY(A64_REG_IS_SP(r_dest.GetReg())) ? kA64Add4RRdT : kA64Mov2rr;
+
+ // TODO(Arm64): kA64Add4RRdT formally has 4 args, but is used as a 2 args instruction.
+ // This currently works because the other arguments are set to 0 by default. We should
+ // rather introduce an alias kA64Mov2RR.
+
+ // core/core copy. Do a x/x copy only if both registers are x.
+ if (r_dest.Is64Bit() && r_src.Is64Bit()) {
+ opcode = WIDE(opcode);
+ }
+ } else {
+ // Float/float copy.
+ bool dest_is_double = r_dest.IsDouble();
+ bool src_is_double = r_src.IsDouble();
+
+ // We do not do float/double or double/float casts here.
+ DCHECK_EQ(dest_is_double, src_is_double);
+
+ // Homogeneous float/float copy.
+ opcode = (dest_is_double) ? FWIDE(kA64Fmov2ff) : kA64Fmov2ff;
+ }
+ } else {
+ // Inhomogeneous register copy.
+ if (dest_is_fp) {
+ if (r_dest.IsDouble()) {
+ opcode = kA64Fmov2Sx;
+ } else {
+ DCHECK(r_src.IsSingle());
+ opcode = kA64Fmov2sw;
+ }
+ } else {
+ if (r_src.IsDouble()) {
+ opcode = kA64Fmov2xS;
+ } else {
+ DCHECK(r_dest.Is32Bit());
+ opcode = kA64Fmov2ws;
+ }
+ }
}
- if (r_src.IsPair()) {
- r_src = r_src.GetLow();
- }
- if (r_dest.IsFloat() || r_src.IsFloat())
- return OpFpRegCopy(r_dest, r_src);
- if (r_dest.Low8() && r_src.Low8())
- opcode = kThumbMovRR;
- else if (!r_dest.Low8() && !r_src.Low8())
- opcode = kThumbMovRR_H2H;
- else if (r_dest.Low8())
- opcode = kThumbMovRR_H2L;
- else
- opcode = kThumbMovRR_L2H;
+
res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg());
+
if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
res->flags.is_nop = true;
}
+
return res;
}
@@ -369,33 +295,7 @@
}
void Arm64Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
- if (r_dest != r_src) {
- bool dest_fp = r_dest.IsFloat();
- bool src_fp = r_src.IsFloat();
- DCHECK(r_dest.Is64Bit());
- DCHECK(r_src.Is64Bit());
- if (dest_fp) {
- if (src_fp) {
- OpRegCopy(r_dest, r_src);
- } else {
- NewLIR3(kThumb2Fmdrr, r_dest.GetReg(), r_src.GetLowReg(), r_src.GetHighReg());
- }
- } else {
- if (src_fp) {
- NewLIR3(kThumb2Fmrrd, r_dest.GetLowReg(), r_dest.GetHighReg(), r_src.GetReg());
- } else {
- // Handle overlap
- if (r_src.GetHighReg() == r_dest.GetLowReg()) {
- DCHECK_NE(r_src.GetLowReg(), r_dest.GetHighReg());
- OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
- OpRegCopy(r_dest.GetLow(), r_src.GetLow());
- } else {
- OpRegCopy(r_dest.GetLow(), r_src.GetLow());
- OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
- }
- }
- }
- }
+ OpRegCopy(r_dest, r_src);
}
// Table of magic divisors
@@ -427,6 +327,12 @@
// Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4)
bool Arm64Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
RegLocation rl_src, RegLocation rl_dest, int lit) {
+ // TODO(Arm64): fix this for Arm64. Note: may be worth revisiting the magic table.
+ // It should be possible subtracting one from all its entries, and using smaddl
+ // to counteract this. The advantage is that integers should then be easier to
+ // encode as logical immediates (0x55555555 rather than 0x55555556).
+ UNIMPLEMENTED(FATAL);
+
if ((lit < 0) || (lit >= static_cast<int>(sizeof(magic_table)/sizeof(magic_table[0])))) {
return false;
}
@@ -434,6 +340,10 @@
if (pattern == DivideNone) {
return false;
}
+ // Tuning: add rem patterns
+ if (!is_div) {
+ return false;
+ }
RegStorage r_magic = AllocTemp();
LoadConstant(r_magic, magic_table[lit].magic);
@@ -441,182 +351,43 @@
RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
RegStorage r_hi = AllocTemp();
RegStorage r_lo = AllocTemp();
-
- // rl_dest and rl_src might overlap.
- // Reuse r_hi to save the div result for reminder case.
- RegStorage r_div_result = is_div ? rl_result.reg : r_hi;
-
- NewLIR4(kThumb2Smull, r_lo.GetReg(), r_hi.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg());
+ NewLIR4(kA64Smaddl4xwwx, r_lo.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg(), rxzr);
switch (pattern) {
case Divide3:
- OpRegRegRegShift(kOpSub, r_div_result, r_hi, rl_src.reg, EncodeShift(kArmAsr, 31));
+ OpRegRegRegShift(kOpSub, rl_result.reg.GetReg(), r_hi.GetReg(),
+ rl_src.reg.GetReg(), EncodeShift(kA64Asr, 31));
break;
case Divide5:
OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31);
- OpRegRegRegShift(kOpRsub, r_div_result, r_lo, r_hi,
- EncodeShift(kArmAsr, magic_table[lit].shift));
+ OpRegRegRegShift(kOpRsub, rl_result.reg.GetReg(), r_lo.GetReg(), r_hi.GetReg(),
+ EncodeShift(kA64Asr, magic_table[lit].shift));
break;
case Divide7:
OpRegReg(kOpAdd, r_hi, rl_src.reg);
OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31);
- OpRegRegRegShift(kOpRsub, r_div_result, r_lo, r_hi,
- EncodeShift(kArmAsr, magic_table[lit].shift));
+ OpRegRegRegShift(kOpRsub, rl_result.reg.GetReg(), r_lo.GetReg(), r_hi.GetReg(),
+ EncodeShift(kA64Asr, magic_table[lit].shift));
break;
default:
LOG(FATAL) << "Unexpected pattern: " << pattern;
}
-
- if (!is_div) {
- // div_result = src / lit
- // tmp1 = div_result * lit
- // dest = src - tmp1
- RegStorage tmp1 = r_lo;
- EasyMultiplyOp ops[2];
-
- bool canEasyMultiply = GetEasyMultiplyTwoOps(lit, ops);
- DCHECK_NE(canEasyMultiply, false);
-
- GenEasyMultiplyTwoOps(tmp1, r_div_result, ops);
- OpRegRegReg(kOpSub, rl_result.reg, rl_src.reg, tmp1);
- }
-
StoreValue(rl_dest, rl_result);
return true;
}
-// Try to convert *lit to 1 RegRegRegShift/RegRegShift form.
-bool Arm64Mir2Lir::GetEasyMultiplyOp(int lit, Arm64Mir2Lir::EasyMultiplyOp* op) {
- if (IsPowerOfTwo(lit)) {
- op->op = kOpLsl;
- op->shift = LowestSetBit(lit);
- return true;
- }
-
- if (IsPowerOfTwo(lit - 1)) {
- op->op = kOpAdd;
- op->shift = LowestSetBit(lit - 1);
- return true;
- }
-
- if (IsPowerOfTwo(lit + 1)) {
- op->op = kOpRsub;
- op->shift = LowestSetBit(lit + 1);
- return true;
- }
-
- op->op = kOpInvalid;
- op->shift = 0;
- return false;
-}
-
-// Try to convert *lit to 1~2 RegRegRegShift/RegRegShift forms.
-bool Arm64Mir2Lir::GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops) {
- GetEasyMultiplyOp(lit, &ops[0]);
- if (GetEasyMultiplyOp(lit, &ops[0])) {
- ops[1].op = kOpInvalid;
- ops[1].shift = 0;
- return true;
- }
-
- int lit1 = lit;
- uint32_t shift = LowestSetBit(lit1);
- if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) {
- ops[1].op = kOpLsl;
- ops[1].shift = shift;
- return true;
- }
-
- lit1 = lit - 1;
- shift = LowestSetBit(lit1);
- if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) {
- ops[1].op = kOpAdd;
- ops[1].shift = shift;
- return true;
- }
-
- lit1 = lit + 1;
- shift = LowestSetBit(lit1);
- if (GetEasyMultiplyOp(lit1 >> shift, &ops[0])) {
- ops[1].op = kOpRsub;
- ops[1].shift = shift;
- return true;
- }
-
- return false;
-}
-
-// Generate instructions to do multiply.
-// Additional temporary register is required,
-// if it need to generate 2 instructions and src/dest overlap.
-void Arm64Mir2Lir::GenEasyMultiplyTwoOps(RegStorage r_dest, RegStorage r_src, EasyMultiplyOp* ops) {
- // tmp1 = ( src << shift1) + [ src | -src | 0 ]
- // dest = (tmp1 << shift2) + [ src | -src | 0 ]
-
- RegStorage r_tmp1;
- if (ops[1].op == kOpInvalid) {
- r_tmp1 = r_dest;
- } else if (r_dest.GetReg() != r_src.GetReg()) {
- r_tmp1 = r_dest;
- } else {
- r_tmp1 = AllocTemp();
- }
-
- switch (ops[0].op) {
- case kOpLsl:
- OpRegRegImm(kOpLsl, r_tmp1, r_src, ops[0].shift);
- break;
- case kOpAdd:
- OpRegRegRegShift(kOpAdd, r_tmp1, r_src, r_src, EncodeShift(kArmLsl, ops[0].shift));
- break;
- case kOpRsub:
- OpRegRegRegShift(kOpRsub, r_tmp1, r_src, r_src, EncodeShift(kArmLsl, ops[0].shift));
- break;
- default:
- DCHECK_EQ(ops[0].op, kOpInvalid);
- break;
- }
-
- switch (ops[1].op) {
- case kOpInvalid:
- return;
- case kOpLsl:
- OpRegRegImm(kOpLsl, r_dest, r_tmp1, ops[1].shift);
- break;
- case kOpAdd:
- OpRegRegRegShift(kOpAdd, r_dest, r_src, r_tmp1, EncodeShift(kArmLsl, ops[1].shift));
- break;
- case kOpRsub:
- OpRegRegRegShift(kOpRsub, r_dest, r_src, r_tmp1, EncodeShift(kArmLsl, ops[1].shift));
- break;
- default:
- LOG(FATAL) << "Unexpected opcode passed to GenEasyMultiplyTwoOps";
- break;
- }
-}
-
bool Arm64Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
- EasyMultiplyOp ops[2];
-
- if (!GetEasyMultiplyTwoOps(lit, ops)) {
- return false;
- }
-
- rl_src = LoadValue(rl_src, kCoreReg);
- RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-
- GenEasyMultiplyTwoOps(rl_result.reg, rl_src.reg, ops);
- StoreValue(rl_dest, rl_result);
- return true;
+ LOG(FATAL) << "Unexpected use of EasyMultiply for Arm64";
+ return false;
}
RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
RegLocation rl_src2, bool is_div, bool check_zero) {
- LOG(FATAL) << "Unexpected use of GenDivRem for Arm";
+ LOG(FATAL) << "Unexpected use of GenDivRem for Arm64";
return rl_dest;
}
RegLocation Arm64Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) {
- LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm";
+ LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm64";
return rl_dest;
}
@@ -657,6 +428,9 @@
}
bool Arm64Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) {
+ // TODO(Arm64): implement this.
+ UNIMPLEMENTED(FATAL);
+
DCHECK_EQ(cu_->instruction_set, kThumb2);
RegLocation rl_src1 = info->args[0];
RegLocation rl_src2 = info->args[1];
@@ -665,15 +439,18 @@
RegLocation rl_dest = InlineTarget(info);
RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
- LIR* it = OpIT((is_min) ? kCondGt : kCondLt, "E");
+ OpIT((is_min) ? kCondGt : kCondLt, "E");
OpRegReg(kOpMov, rl_result.reg, rl_src2.reg);
OpRegReg(kOpMov, rl_result.reg, rl_src1.reg);
- OpEndIT(it);
+ GenBarrier();
StoreValue(rl_dest, rl_result);
return true;
}
bool Arm64Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
+ // TODO(Arm64): implement this.
+ UNIMPLEMENTED(WARNING);
+
RegLocation rl_src_address = info->args[0]; // long address
rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[1]
RegLocation rl_dest = InlineTarget(info);
@@ -682,23 +459,26 @@
if (size == k64) {
// Fake unaligned LDRD by two unaligned LDR instructions on ARMv7 with SCTLR.A set to 0.
if (rl_address.reg.GetReg() != rl_result.reg.GetLowReg()) {
- Load32Disp(rl_address.reg, 0, rl_result.reg.GetLow());
- Load32Disp(rl_address.reg, 4, rl_result.reg.GetHigh());
+ LoadWordDisp(rl_address.reg, 0, rl_result.reg.GetLow());
+ LoadWordDisp(rl_address.reg, 4, rl_result.reg.GetHigh());
} else {
- Load32Disp(rl_address.reg, 4, rl_result.reg.GetHigh());
- Load32Disp(rl_address.reg, 0, rl_result.reg.GetLow());
+ LoadWordDisp(rl_address.reg, 4, rl_result.reg.GetHigh());
+ LoadWordDisp(rl_address.reg, 0, rl_result.reg.GetLow());
}
StoreValueWide(rl_dest, rl_result);
} else {
DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
// Unaligned load with LDR and LDRSH is allowed on ARMv7 with SCTLR.A set to 0.
- LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, INVALID_SREG);
+ LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size);
StoreValue(rl_dest, rl_result);
}
return true;
}
bool Arm64Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
+ // TODO(Arm64): implement this.
+ UNIMPLEMENTED(WARNING);
+
RegLocation rl_src_address = info->args[0]; // long address
rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[1]
RegLocation rl_src_value = info->args[2]; // [size] value
@@ -718,14 +498,17 @@
}
void Arm64Mir2Lir::OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset) {
- LOG(FATAL) << "Unexpected use of OpLea for Arm";
+ LOG(FATAL) << "Unexpected use of OpLea for Arm64";
}
-void Arm64Mir2Lir::OpTlsCmp(ThreadOffset<4> offset, int val) {
- LOG(FATAL) << "Unexpected use of OpTlsCmp for Arm";
+void Arm64Mir2Lir::OpTlsCmp(A64ThreadOffset offset, int val) {
+ LOG(FATAL) << "Unexpected use of OpTlsCmp for Arm64";
}
bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
+ // TODO(Arm64): implement this.
+ UNIMPLEMENTED(WARNING);
+
DCHECK_EQ(cu_->instruction_set, kThumb2);
// Unused - RegLocation rl_src_unsafe = info->args[0];
RegLocation rl_src_obj = info->args[1]; // Object - known non-null
@@ -745,10 +528,10 @@
// around the potentially locked temp by using LR for r_ptr, unconditionally.
// TODO: Pass information about the need for more temps to the stack frame generation
// code so that we can rely on being able to allocate enough temps.
- DCHECK(!GetRegInfo(rs_rARM_LR)->IsTemp());
- MarkTemp(rs_rARM_LR);
- FreeTemp(rs_rARM_LR);
- LockTemp(rs_rARM_LR);
+ DCHECK(!GetRegInfo(rs_rA64_LR)->IsTemp());
+ MarkTemp(rs_rA64_LR);
+ FreeTemp(rs_rA64_LR);
+ LockTemp(rs_rA64_LR);
bool load_early = true;
if (is_long) {
RegStorage expected_reg = rl_src_expected.reg.IsPair() ? rl_src_expected.reg.GetLow() :
@@ -797,7 +580,7 @@
RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
- RegStorage r_ptr = rs_rARM_LR;
+ RegStorage r_ptr = rs_rA64_LR;
OpRegRegReg(kOpAdd, r_ptr, rl_object.reg, rl_offset.reg);
// Free now unneeded rl_object and rl_offset to give more temps.
@@ -813,9 +596,9 @@
rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
} else {
// NOTE: partially defined rl_expected & rl_new_value - but we just want the regs.
- RegStorage low_reg = AllocTemp();
- RegStorage high_reg = AllocTemp();
- rl_new_value.reg = RegStorage::MakeRegPair(low_reg, high_reg);
+ int low_reg = AllocTemp().GetReg();
+ int high_reg = AllocTemp().GetReg();
+ rl_new_value.reg = RegStorage(RegStorage::k64BitPair, low_reg, high_reg);
rl_expected = rl_new_value;
}
@@ -827,42 +610,37 @@
RegStorage r_tmp = AllocTemp();
LIR* target = NewLIR0(kPseudoTargetLabel);
- LIR* it = nullptr;
if (is_long) {
RegStorage r_tmp_high = AllocTemp();
if (!load_early) {
LoadValueDirectWide(rl_src_expected, rl_expected.reg);
}
- NewLIR3(kThumb2Ldrexd, r_tmp.GetReg(), r_tmp_high.GetReg(), r_ptr.GetReg());
+ NewLIR3(kA64Ldxr2rX, r_tmp.GetReg(), r_tmp_high.GetReg(), r_ptr.GetReg());
OpRegReg(kOpSub, r_tmp, rl_expected.reg.GetLow());
OpRegReg(kOpSub, r_tmp_high, rl_expected.reg.GetHigh());
if (!load_early) {
LoadValueDirectWide(rl_src_new_value, rl_new_value.reg);
}
- // Make sure we use ORR that sets the ccode
- if (r_tmp.Low8() && r_tmp_high.Low8()) {
- NewLIR2(kThumbOrr, r_tmp.GetReg(), r_tmp_high.GetReg());
- } else {
- NewLIR4(kThumb2OrrRRRs, r_tmp.GetReg(), r_tmp.GetReg(), r_tmp_high.GetReg(), 0);
- }
+
+ LIR* branch1 = OpCmpImmBranch(kCondNe, r_tmp, 0, NULL);
+ LIR* branch2 = OpCmpImmBranch(kCondNe, r_tmp_high, 0, NULL);
+ NewLIR4(WIDE(kA64Stxr3wrX) /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(),
+ rl_new_value.reg.GetHighReg(), r_ptr.GetReg());
+ LIR* target2 = NewLIR0(kPseudoTargetLabel);
+ branch1->target = target2;
+ branch2->target = target2;
FreeTemp(r_tmp_high); // Now unneeded
- DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
- it = OpIT(kCondEq, "T");
- NewLIR4(kThumb2Strexd /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetLowReg(), rl_new_value.reg.GetHighReg(), r_ptr.GetReg());
-
} else {
- NewLIR3(kThumb2Ldrex, r_tmp.GetReg(), r_ptr.GetReg(), 0);
+ NewLIR3(kA64Ldxr2rX, r_tmp.GetReg(), r_ptr.GetReg(), 0);
OpRegReg(kOpSub, r_tmp, rl_expected.reg);
DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
- it = OpIT(kCondEq, "T");
- NewLIR4(kThumb2Strex /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg(), 0);
+ OpIT(kCondEq, "T");
+ NewLIR4(kA64Stxr3wrX /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg(), 0);
}
// Still one conditional left from OpIT(kCondEq, "T") from either branch
OpRegImm(kOpCmp /* eq */, r_tmp, 1);
- OpEndIT(it);
-
OpCondBranch(kCondEq, target);
if (!load_early) {
@@ -873,36 +651,37 @@
RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
OpRegRegImm(kOpRsub, rl_result.reg, r_tmp, 1);
DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE);
- it = OpIT(kCondUlt, "");
+ OpIT(kCondUlt, "");
LoadConstant(rl_result.reg, 0); /* cc */
FreeTemp(r_tmp); // Now unneeded.
- OpEndIT(it); // Barrier to terminate OpIT.
StoreValue(rl_dest, rl_result);
// Now, restore lr to its non-temp status.
- Clobber(rs_rARM_LR);
- UnmarkTemp(rs_rARM_LR);
+ Clobber(rs_rA64_LR);
+ UnmarkTemp(rs_rA64_LR);
return true;
}
LIR* Arm64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
- return RawLIR(current_dalvik_offset_, kThumb2LdrPcRel12, reg.GetReg(), 0, 0, 0, 0, target);
+ return RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp), reg.GetReg(), 0, 0, 0, 0, target);
}
LIR* Arm64Mir2Lir::OpVldm(RegStorage r_base, int count) {
- return NewLIR3(kThumb2Vldms, r_base.GetReg(), rs_fr0.GetReg(), count);
+ LOG(FATAL) << "Unexpected use of OpVldm for Arm64";
+ return NULL;
}
LIR* Arm64Mir2Lir::OpVstm(RegStorage r_base, int count) {
- return NewLIR3(kThumb2Vstms, r_base.GetReg(), rs_fr0.GetReg(), count);
+ LOG(FATAL) << "Unexpected use of OpVstm for Arm64";
+ return NULL;
}
void Arm64Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
RegLocation rl_result, int lit,
int first_bit, int second_bit) {
- OpRegRegRegShift(kOpAdd, rl_result.reg, rl_src.reg, rl_src.reg,
- EncodeShift(kArmLsl, second_bit - first_bit));
+ OpRegRegRegShift(kOpAdd, rl_result.reg.GetReg(), rl_src.reg.GetReg(), rl_src.reg.GetReg(),
+ EncodeShift(kA64Lsl, second_bit - first_bit));
if (first_bit != 0) {
OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
}
@@ -910,15 +689,14 @@
void Arm64Mir2Lir::GenDivZeroCheckWide(RegStorage reg) {
DCHECK(reg.IsPair()); // TODO: support k64BitSolo.
- RegStorage t_reg = AllocTemp();
- NewLIR4(kThumb2OrrRRRs, t_reg.GetReg(), reg.GetLowReg(), reg.GetHighReg(), 0);
- FreeTemp(t_reg);
+ OpRegImm64(kOpCmp, reg, 0, /*is_wide*/true);
GenDivZeroCheck(kCondEq);
}
+// TODO(Arm64): the function below should go.
// Test suspend flag, return target of taken suspend branch
LIR* Arm64Mir2Lir::OpTestSuspend(LIR* target) {
- NewLIR2(kThumbSubRI8, rs_rARM_SUSPEND.GetReg(), 1);
+ NewLIR3(kA64Subs3rRd, rA64_SUSPEND, rA64_SUSPEND, 1);
return OpCondBranch((target == NULL) ? kCondEq : kCondNe, target);
}
@@ -950,8 +728,8 @@
// If the same barrier already exists, don't generate another.
if (barrier == nullptr
- || (barrier != nullptr && (barrier->opcode != kThumb2Dmb || barrier->operands[0] != dmb_flavor))) {
- barrier = NewLIR1(kThumb2Dmb, dmb_flavor);
+ || (barrier->opcode != kA64Dmb1B || barrier->operands[0] != dmb_flavor)) {
+ barrier = NewLIR1(kA64Dmb1B, dmb_flavor);
}
// At this point we must have a memory barrier. Mark it as a scheduling barrier as well.
@@ -979,136 +757,45 @@
StoreValueWide(rl_dest, rl_result);
}
+void Arm64Mir2Lir::GenLongOp(OpKind op, RegLocation rl_dest, RegLocation rl_src1,
+ RegLocation rl_src2) {
+ RegLocation rl_result;
+ rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+ rl_src2 = LoadValueWide(rl_src2, kCoreReg);
+ rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+ OpRegRegRegShift(op, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg(),
+ ENCODE_NO_SHIFT, /*is_wide*/ true);
+ StoreValueWide(rl_dest, rl_result);
+}
+
void Arm64Mir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest,
- RegLocation rl_src1, RegLocation rl_src2) {
- /*
- * tmp1 = src1.hi * src2.lo; // src1.hi is no longer needed
- * dest = src1.lo * src2.lo;
- * tmp1 += src1.lo * src2.hi;
- * dest.hi += tmp1;
- *
- * To pull off inline multiply, we have a worst-case requirement of 7 temporary
- * registers. Normally for Arm, we get 5. We can get to 6 by including
- * lr in the temp set. The only problematic case is all operands and result are
- * distinct, and none have been promoted. In that case, we can succeed by aggressively
- * freeing operand temp registers after they are no longer needed. All other cases
- * can proceed normally. We'll just punt on the case of the result having a misaligned
- * overlap with either operand and send that case to a runtime handler.
- */
- RegLocation rl_result;
- if (BadOverlap(rl_src1, rl_dest) || (BadOverlap(rl_src2, rl_dest))) {
- ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pLmul);
- FlushAllRegs();
- CallRuntimeHelperRegLocationRegLocation(func_offset, rl_src1, rl_src2, false);
- rl_result = GetReturnWide(false);
- StoreValueWide(rl_dest, rl_result);
- return;
- }
-
- rl_src1 = LoadValueWide(rl_src1, kCoreReg);
- rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-
- int reg_status = 0;
- RegStorage res_lo;
- RegStorage res_hi;
- bool dest_promoted = rl_dest.location == kLocPhysReg && rl_dest.reg.Valid() &&
- !IsTemp(rl_dest.reg.GetLow()) && !IsTemp(rl_dest.reg.GetHigh());
- bool src1_promoted = !IsTemp(rl_src1.reg.GetLow()) && !IsTemp(rl_src1.reg.GetHigh());
- bool src2_promoted = !IsTemp(rl_src2.reg.GetLow()) && !IsTemp(rl_src2.reg.GetHigh());
- // Check if rl_dest is *not* either operand and we have enough temp registers.
- if ((rl_dest.s_reg_low != rl_src1.s_reg_low && rl_dest.s_reg_low != rl_src2.s_reg_low) &&
- (dest_promoted || src1_promoted || src2_promoted)) {
- // In this case, we do not need to manually allocate temp registers for result.
- rl_result = EvalLoc(rl_dest, kCoreReg, true);
- res_lo = rl_result.reg.GetLow();
- res_hi = rl_result.reg.GetHigh();
- } else {
- res_lo = AllocTemp();
- if ((rl_src1.s_reg_low == rl_src2.s_reg_low) || src1_promoted || src2_promoted) {
- // In this case, we have enough temp registers to be allocated for result.
- res_hi = AllocTemp();
- reg_status = 1;
- } else {
- // In this case, all temps are now allocated.
- // res_hi will be allocated after we can free src1_hi.
- reg_status = 2;
- }
- }
-
- // Temporarily add LR to the temp pool, and assign it to tmp1
- MarkTemp(rs_rARM_LR);
- FreeTemp(rs_rARM_LR);
- RegStorage tmp1 = rs_rARM_LR;
- LockTemp(rs_rARM_LR);
-
- if (rl_src1.reg == rl_src2.reg) {
- DCHECK(res_hi.Valid());
- DCHECK(res_lo.Valid());
- NewLIR3(kThumb2MulRRR, tmp1.GetReg(), rl_src1.reg.GetLowReg(), rl_src1.reg.GetHighReg());
- NewLIR4(kThumb2Umull, res_lo.GetReg(), res_hi.GetReg(), rl_src1.reg.GetLowReg(),
- rl_src1.reg.GetLowReg());
- OpRegRegRegShift(kOpAdd, res_hi, res_hi, tmp1, EncodeShift(kArmLsl, 1));
- } else {
- NewLIR3(kThumb2MulRRR, tmp1.GetReg(), rl_src2.reg.GetLowReg(), rl_src1.reg.GetHighReg());
- if (reg_status == 2) {
- DCHECK(!res_hi.Valid());
- DCHECK_NE(rl_src1.reg.GetLowReg(), rl_src2.reg.GetLowReg());
- DCHECK_NE(rl_src1.reg.GetHighReg(), rl_src2.reg.GetHighReg());
- FreeTemp(rl_src1.reg.GetHigh());
- res_hi = AllocTemp();
- }
- DCHECK(res_hi.Valid());
- DCHECK(res_lo.Valid());
- NewLIR4(kThumb2Umull, res_lo.GetReg(), res_hi.GetReg(), rl_src2.reg.GetLowReg(),
- rl_src1.reg.GetLowReg());
- NewLIR4(kThumb2Mla, tmp1.GetReg(), rl_src1.reg.GetLowReg(), rl_src2.reg.GetHighReg(),
- tmp1.GetReg());
- NewLIR4(kThumb2AddRRR, res_hi.GetReg(), tmp1.GetReg(), res_hi.GetReg(), 0);
- if (reg_status == 2) {
- // Clobber rl_src1 since it was corrupted.
- FreeTemp(rl_src1.reg);
- Clobber(rl_src1.reg);
- }
- }
-
- // Now, restore lr to its non-temp status.
- FreeTemp(tmp1);
- Clobber(rs_rARM_LR);
- UnmarkTemp(rs_rARM_LR);
-
- if (reg_status != 0) {
- // We had manually allocated registers for rl_result.
- // Now construct a RegLocation.
- rl_result = GetReturnWide(false); // Just using as a template.
- rl_result.reg = RegStorage::MakeRegPair(res_lo, res_hi);
- }
-
- StoreValueWide(rl_dest, rl_result);
+ RegLocation rl_src1, RegLocation rl_src2) {
+ GenLongOp(kOpMul, rl_dest, rl_src1, rl_src2);
}
void Arm64Mir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
- RegLocation rl_src2) {
- LOG(FATAL) << "Unexpected use of GenAddLong for Arm";
+ RegLocation rl_src2) {
+ GenLongOp(kOpAdd, rl_dest, rl_src1, rl_src2);
}
void Arm64Mir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
RegLocation rl_src2) {
- LOG(FATAL) << "Unexpected use of GenSubLong for Arm";
+ GenLongOp(kOpSub, rl_dest, rl_src1, rl_src2);
}
void Arm64Mir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
RegLocation rl_src2) {
- LOG(FATAL) << "Unexpected use of GenAndLong for Arm";
+ GenLongOp(kOpAnd, rl_dest, rl_src1, rl_src2);
}
void Arm64Mir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
RegLocation rl_src2) {
- LOG(FATAL) << "Unexpected use of GenOrLong for Arm";
+ GenLongOp(kOpOr, rl_dest, rl_src1, rl_src2);
}
void Arm64Mir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
RegLocation rl_src2) {
- LOG(FATAL) << "Unexpected use of genXoLong for Arm";
+ GenLongOp(kOpXor, rl_dest, rl_src1, rl_src2);
}
/*
@@ -1116,6 +803,9 @@
*/
void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
RegLocation rl_index, RegLocation rl_dest, int scale) {
+ // TODO(Arm64): check this.
+ UNIMPLEMENTED(WARNING);
+
RegisterClass reg_class = RegClassBySize(size);
int len_offset = mirror::Array::LengthOffset().Int32Value();
int data_offset;
@@ -1157,7 +847,8 @@
} else {
// No special indexed operation, lea + load w/ displacement
reg_ptr = AllocTemp();
- OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kArmLsl, scale));
+ OpRegRegRegShift(kOpAdd, reg_ptr.GetReg(), rl_array.reg.GetReg(), rl_index.reg.GetReg(),
+ EncodeShift(kA64Lsl, scale));
FreeTemp(rl_index.reg);
}
rl_result = EvalLoc(rl_dest, reg_class, true);
@@ -1170,7 +861,7 @@
}
FreeTemp(reg_len);
}
- LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size, INVALID_SREG);
+ LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size);
MarkPossibleNullPointerException(opt_flags);
if (!constant_index) {
FreeTemp(reg_ptr);
@@ -1204,6 +895,9 @@
*/
void Arm64Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
+ // TODO(Arm64): check this.
+ UNIMPLEMENTED(WARNING);
+
RegisterClass reg_class = RegClassBySize(size);
int len_offset = mirror::Array::LengthOffset().Int32Value();
bool constant_index = rl_index.is_const;
@@ -1259,7 +953,8 @@
rl_src = LoadValue(rl_src, reg_class);
}
if (!constant_index) {
- OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kArmLsl, scale));
+ OpRegRegRegShift(kOpAdd, reg_ptr.GetReg(), rl_array.reg.GetReg(), rl_index.reg.GetReg(),
+ EncodeShift(kA64Lsl, scale));
}
if (needs_range_check) {
if (constant_index) {
@@ -1294,6 +989,9 @@
void Arm64Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode,
RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift) {
+ // TODO(Arm64): check this.
+ UNIMPLEMENTED(WARNING);
+
rl_src = LoadValueWide(rl_src, kCoreReg);
// Per spec, we only care about low 6 bits of shift amount.
int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
@@ -1320,8 +1018,8 @@
LoadConstant(rl_result.reg.GetLow(), 0);
} else {
OpRegRegImm(kOpLsl, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
- OpRegRegRegShift(kOpOr, rl_result.reg.GetHigh(), rl_result.reg.GetHigh(), rl_src.reg.GetLow(),
- EncodeShift(kArmLsr, 32 - shift_amount));
+ OpRegRegRegShift(kOpOr, rl_result.reg.GetHighReg(), rl_result.reg.GetHighReg(), rl_src.reg.GetLowReg(),
+ EncodeShift(kA64Lsr, 32 - shift_amount));
OpRegRegImm(kOpLsl, rl_result.reg.GetLow(), rl_src.reg.GetLow(), shift_amount);
}
break;
@@ -1336,8 +1034,8 @@
} else {
RegStorage t_reg = AllocTemp();
OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount);
- OpRegRegRegShift(kOpOr, rl_result.reg.GetLow(), t_reg, rl_src.reg.GetHigh(),
- EncodeShift(kArmLsl, 32 - shift_amount));
+ OpRegRegRegShift(kOpOr, rl_result.reg.GetLowReg(), t_reg.GetReg(), rl_src.reg.GetHighReg(),
+ EncodeShift(kA64Lsl, 32 - shift_amount));
FreeTemp(t_reg);
OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
}
@@ -1353,8 +1051,8 @@
} else {
RegStorage t_reg = AllocTemp();
OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount);
- OpRegRegRegShift(kOpOr, rl_result.reg.GetLow(), t_reg, rl_src.reg.GetHigh(),
- EncodeShift(kArmLsl, 32 - shift_amount));
+ OpRegRegRegShift(kOpOr, rl_result.reg.GetLowReg(), t_reg.GetReg(), rl_src.reg.GetHighReg(),
+ EncodeShift(kA64Lsl, 32 - shift_amount));
FreeTemp(t_reg);
OpRegRegImm(kOpLsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
}
@@ -1365,8 +1063,11 @@
StoreValueWide(rl_dest, rl_result);
}
-void Arm64Mir2Lir::GenArithImmOpLong(Instruction::Code opcode,
- RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
+void Arm64Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
+ RegLocation rl_src1, RegLocation rl_src2) {
+ // TODO(Arm64): implement this.
+ UNIMPLEMENTED(WARNING);
+
if ((opcode == Instruction::SUB_LONG_2ADDR) || (opcode == Instruction::SUB_LONG)) {
if (!rl_src2.is_const) {
// Don't bother with special handling for subtract from immediate.
@@ -1385,11 +1086,10 @@
return;
}
DCHECK(rl_src2.is_const);
- int64_t val = mir_graph_->ConstantValueWide(rl_src2);
- uint32_t val_lo = Low32Bits(val);
- uint32_t val_hi = High32Bits(val);
- int32_t mod_imm_lo = ModifiedImmediate(val_lo);
- int32_t mod_imm_hi = ModifiedImmediate(val_hi);
+ // TODO(Arm64): implement this.
+ // int64_t val = mir_graph_->ConstantValueWide(rl_src2);
+ int32_t mod_imm_lo = -1; // ModifiedImmediate(val_lo);
+ int32_t mod_imm_hi = -1; // ModifiedImmediate(val_hi);
// Only a subset of add/sub immediate instructions set carry - so bail if we don't fit
switch (opcode) {
@@ -1409,6 +1109,7 @@
RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
// NOTE: once we've done the EvalLoc on dest, we can no longer bail.
switch (opcode) {
+#if 0
case Instruction::ADD_LONG:
case Instruction::ADD_LONG_2ADDR:
NewLIR3(kThumb2AddRRI8M, rl_result.reg.GetLowReg(), rl_src1.reg.GetLowReg(), mod_imm_lo);
@@ -1442,10 +1143,82 @@
NewLIR3(kThumb2SubRRI8M, rl_result.reg.GetLowReg(), rl_src1.reg.GetLowReg(), mod_imm_lo);
NewLIR3(kThumb2SbcRRI8M, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), mod_imm_hi);
break;
+#endif
default:
LOG(FATAL) << "Unexpected opcode " << opcode;
}
StoreValueWide(rl_dest, rl_result);
}
+/**
+ * @brief Split a register list in pairs or registers.
+ *
+ * Given a list of registers in @p reg_mask, split the list in pairs. Use as follows:
+ * @code
+ * int reg1 = -1, reg2 = -1;
+ * while (reg_mask) {
+ * reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
+ * if (UNLIKELY(reg2 < 0)) {
+ * // Single register in reg1.
+ * } else {
+ * // Pair in reg1, reg2.
+ * }
+ * }
+ * @endcode
+ */
+uint32_t Arm64Mir2Lir::GenPairWise(uint32_t reg_mask, int* reg1, int* reg2) {
+ // Find first register.
+ int first_bit_set = __builtin_ctz(reg_mask) + 1;
+ int reg = *reg1 + first_bit_set;
+ reg_mask >>= first_bit_set;
+
+ if (LIKELY(reg_mask)) {
+ // Save the first register, find the second and use the pair opcode.
+ int second_bit_set = __builtin_ctz(reg_mask) + 1;
+ *reg2 = reg;
+ reg_mask >>= second_bit_set;
+ *reg1 = reg + second_bit_set;
+ return reg_mask;
+ }
+
+ // Use the single opcode, as we just have one register.
+ *reg1 = reg;
+ *reg2 = -1;
+ return reg_mask;
+}
+
+void Arm64Mir2Lir::UnSpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask) {
+ int reg1 = -1, reg2 = -1;
+ const int pop_log2_size = 3;
+
+ for (offset = (offset >> pop_log2_size) - 1; reg_mask; offset--) {
+ reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
+ if (UNLIKELY(reg2 < 0)) {
+ // TODO(Arm64): replace Solo32 with Solo64, once rxN are defined properly.
+ NewLIR3(WIDE(kA64Ldr3rXD), RegStorage::Solo32(reg1).GetReg(), base.GetReg(), offset);
+ } else {
+ // TODO(Arm64): replace Solo32 with Solo64 (twice below), once rxN are defined properly.
+ NewLIR4(WIDE(kA64Ldp4rrXD), RegStorage::Solo32(reg1).GetReg(),
+ RegStorage::Solo32(reg2).GetReg(), base.GetReg(), offset);
+ }
+ }
+}
+
+void Arm64Mir2Lir::SpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask) {
+ int reg1 = -1, reg2 = -1;
+ const int pop_log2_size = 3;
+
+ for (offset = (offset >> pop_log2_size) - 1; reg_mask; offset--) {
+ reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
+ if (UNLIKELY(reg2 < 0)) {
+ // TODO(Arm64): replace Solo32 with Solo64, once rxN are defined properly.
+ NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo32(reg1).GetReg(), base.GetReg(), offset);
+ } else {
+ // TODO(Arm64): replace Solo32 with Solo64 (twice below), once rxN are defined properly.
+ NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo32(reg1).GetReg(),
+ RegStorage::Solo32(reg2).GetReg(), base.GetReg(), offset);
+ }
+ }
+}
+
} // namespace art
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index 233e9c2..7e07e15 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -27,39 +27,40 @@
// TODO: rework this when c++11 support allows.
static const RegStorage core_regs_arr[] =
- {rs_r0, rs_r1, rs_r2, rs_r3, rs_rARM_SUSPEND, rs_r5, rs_r6, rs_r7, rs_r8, rs_rARM_SELF,
- rs_r10, rs_r11, rs_r12, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC};
+ {rs_x0, rs_x1, rs_x2, rs_x3, rs_x4, rs_x5, rs_x6, rs_x7,
+ rs_x8, rs_x9, rs_x10, rs_x11, rs_x12, rs_x13, rs_x14, rs_x15,
+ rs_x16, rs_x17, rs_x18, rs_x19, rs_x20, rs_x21, rs_x22, rs_x23,
+ rs_x24, rs_x25, rs_x26, rs_x27, rs_x28, rs_x29, rs_x30, rs_x31};
static const RegStorage sp_regs_arr[] =
- {rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, rs_fr8, rs_fr9, rs_fr10,
- rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15, rs_fr16, rs_fr17, rs_fr18, rs_fr19, rs_fr20,
- rs_fr21, rs_fr22, rs_fr23, rs_fr24, rs_fr25, rs_fr26, rs_fr27, rs_fr28, rs_fr29, rs_fr30,
- rs_fr31};
+ {rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7,
+ rs_f8, rs_f9, rs_f10, rs_f11, rs_f12, rs_f13, rs_f14, rs_f15,
+ rs_f16, rs_f17, rs_f18, rs_f19, rs_f20, rs_f21, rs_f22, rs_f23,
+ rs_f24, rs_f25, rs_f26, rs_f27, rs_f28, rs_f29, rs_f30, rs_f31};
static const RegStorage dp_regs_arr[] =
- {rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, rs_dr8, rs_dr9, rs_dr10,
- rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15};
+ {rs_d0, rs_d1, rs_d2, rs_d3, rs_d4, rs_d5, rs_d6, rs_d7,
+ rs_d8, rs_d9, rs_d10, rs_d11, rs_d12, rs_d13, rs_d14, rs_d15};
static const RegStorage reserved_regs_arr[] =
- {rs_rARM_SUSPEND, rs_rARM_SELF, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC};
-static const RegStorage core_temps_arr[] = {rs_r0, rs_r1, rs_r2, rs_r3, rs_r12};
+ {rs_rA64_SUSPEND, rs_rA64_SELF, rs_rA64_SP, rs_rA64_LR};
+static const RegStorage core_temps_arr[] =
+ {rs_x0, rs_x1, rs_x2, rs_x3, rs_x12};
static const RegStorage sp_temps_arr[] =
- {rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, rs_fr8, rs_fr9, rs_fr10,
- rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15};
+ {rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7,
+ rs_f8, rs_f9, rs_f10, rs_f11, rs_f12, rs_f13, rs_f14, rs_f15};
static const RegStorage dp_temps_arr[] =
- {rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7};
+ {rs_d0, rs_d1, rs_d2, rs_d3, rs_d4, rs_d5, rs_d6, rs_d7};
static const std::vector<RegStorage> core_regs(core_regs_arr,
- core_regs_arr + sizeof(core_regs_arr) / sizeof(core_regs_arr[0]));
+ core_regs_arr + arraysize(core_regs_arr));
static const std::vector<RegStorage> sp_regs(sp_regs_arr,
- sp_regs_arr + sizeof(sp_regs_arr) / sizeof(sp_regs_arr[0]));
+ sp_regs_arr + arraysize(sp_regs_arr));
static const std::vector<RegStorage> dp_regs(dp_regs_arr,
- dp_regs_arr + sizeof(dp_regs_arr) / sizeof(dp_regs_arr[0]));
+ dp_regs_arr + arraysize(dp_regs_arr));
static const std::vector<RegStorage> reserved_regs(reserved_regs_arr,
- reserved_regs_arr + sizeof(reserved_regs_arr) / sizeof(reserved_regs_arr[0]));
+ reserved_regs_arr + arraysize(reserved_regs_arr));
static const std::vector<RegStorage> core_temps(core_temps_arr,
- core_temps_arr + sizeof(core_temps_arr) / sizeof(core_temps_arr[0]));
-static const std::vector<RegStorage> sp_temps(sp_temps_arr,
- sp_temps_arr + sizeof(sp_temps_arr) / sizeof(sp_temps_arr[0]));
-static const std::vector<RegStorage> dp_temps(dp_temps_arr,
- dp_temps_arr + sizeof(dp_temps_arr) / sizeof(dp_temps_arr[0]));
+ core_temps_arr + arraysize(core_temps_arr));
+static const std::vector<RegStorage> sp_temps(sp_temps_arr, sp_temps_arr + arraysize(sp_temps_arr));
+static const std::vector<RegStorage> dp_temps(dp_temps_arr, dp_temps_arr + arraysize(dp_temps_arr));
RegLocation Arm64Mir2Lir::LocCReturn() {
return arm_loc_c_return;
@@ -79,25 +80,26 @@
// Return a target-dependent special register.
RegStorage Arm64Mir2Lir::TargetReg(SpecialTargetRegister reg) {
+ // TODO(Arm64): this function doesn't work for hard-float ABI.
RegStorage res_reg = RegStorage::InvalidReg();
switch (reg) {
- case kSelf: res_reg = rs_rARM_SELF; break;
- case kSuspend: res_reg = rs_rARM_SUSPEND; break;
- case kLr: res_reg = rs_rARM_LR; break;
- case kPc: res_reg = rs_rARM_PC; break;
- case kSp: res_reg = rs_rARM_SP; break;
- case kArg0: res_reg = rs_r0; break;
- case kArg1: res_reg = rs_r1; break;
- case kArg2: res_reg = rs_r2; break;
- case kArg3: res_reg = rs_r3; break;
- case kFArg0: res_reg = rs_r0; break;
- case kFArg1: res_reg = rs_r1; break;
- case kFArg2: res_reg = rs_r2; break;
- case kFArg3: res_reg = rs_r3; break;
- case kRet0: res_reg = rs_r0; break;
- case kRet1: res_reg = rs_r1; break;
- case kInvokeTgt: res_reg = rs_rARM_LR; break;
- case kHiddenArg: res_reg = rs_r12; break;
+ case kSelf: res_reg = rs_rA64_SELF; break;
+ case kSuspend: res_reg = rs_rA64_SUSPEND; break;
+ case kLr: res_reg = rs_rA64_LR; break;
+ case kPc: res_reg = RegStorage::InvalidReg(); break;
+ case kSp: res_reg = rs_rA64_SP; break;
+ case kArg0: res_reg = rs_x0; break;
+ case kArg1: res_reg = rs_x1; break;
+ case kArg2: res_reg = rs_x2; break;
+ case kArg3: res_reg = rs_x3; break;
+ case kFArg0: res_reg = rs_f0; break;
+ case kFArg1: res_reg = rs_f1; break;
+ case kFArg2: res_reg = rs_f2; break;
+ case kFArg3: res_reg = rs_f3; break;
+ case kRet0: res_reg = rs_x0; break;
+ case kRet1: res_reg = rs_x0; break;
+ case kInvokeTgt: res_reg = rs_rA64_LR; break;
+ case kHiddenArg: res_reg = rs_x12; break;
case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break;
case kCount: res_reg = RegStorage::InvalidReg(); break;
}
@@ -105,55 +107,37 @@
}
RegStorage Arm64Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
- // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
- switch (arg_num) {
- case 0:
- return rs_r1;
- case 1:
- return rs_r2;
- case 2:
- return rs_r3;
- default:
- return RegStorage::InvalidReg();
- }
+ return RegStorage::InvalidReg();
}
/*
- * Decode the register id.
+ * Decode the register id. This routine makes assumptions on the encoding made by RegStorage.
*/
uint64_t Arm64Mir2Lir::GetRegMaskCommon(RegStorage reg) {
- uint64_t seed;
- int shift;
- int reg_id = reg.GetRegNum();
- /* Each double register is equal to a pair of single-precision FP registers */
- if (reg.IsDouble()) {
- seed = 0x3;
- reg_id = reg_id << 1;
- } else {
- seed = 1;
+ // TODO(Arm64): this function depends too much on the internal RegStorage encoding. Refactor.
+
+ int reg_raw = reg.GetRawBits();
+ // Check if the shape mask is zero (i.e. invalid).
+ if (UNLIKELY(reg == rs_wzr || reg == rs_xzr)) {
+ // The zero register is not a true register. It is just an immediate zero.
+ return 0;
}
- /* FP register starts at bit position 16 */
- shift = reg.IsFloat() ? kArmFPReg0 : 0;
- /* Expand the double register id into single offset */
- shift += reg_id;
- return (seed << shift);
+
+ return UINT64_C(1) << (reg_raw & RegStorage::kRegTypeMask);
}
uint64_t Arm64Mir2Lir::GetPCUseDefEncoding() {
- return ENCODE_ARM_REG_PC;
+ LOG(FATAL) << "Unexpected call to GetPCUseDefEncoding for Arm64";
+ return 0ULL;
}
-// Thumb2 specific setup. TODO: inline?:
+// Arm64 specific setup. TODO: inline?:
void Arm64Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) {
- DCHECK_EQ(cu_->instruction_set, kThumb2);
+ DCHECK_EQ(cu_->instruction_set, kArm64);
DCHECK(!lir->flags.use_def_invalid);
- int opcode = lir->opcode;
-
// These flags are somewhat uncommon - bypass if we can.
- if ((flags & (REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0 | REG_DEF_LIST1 |
- REG_DEF_FPCS_LIST0 | REG_DEF_FPCS_LIST2 | REG_USE_PC | IS_IT | REG_USE_LIST0 |
- REG_USE_LIST1 | REG_USE_FPCS_LIST0 | REG_USE_FPCS_LIST2 | REG_DEF_LR)) != 0) {
+ if ((flags & (REG_DEF_SP | REG_USE_SP | REG_DEF_LR)) != 0) {
if (flags & REG_DEF_SP) {
lir->u.m.def_mask |= ENCODE_ARM_REG_SP;
}
@@ -162,61 +146,6 @@
lir->u.m.use_mask |= ENCODE_ARM_REG_SP;
}
- if (flags & REG_DEF_LIST0) {
- lir->u.m.def_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]);
- }
-
- if (flags & REG_DEF_LIST1) {
- lir->u.m.def_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]);
- }
-
- if (flags & REG_DEF_FPCS_LIST0) {
- lir->u.m.def_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]);
- }
-
- if (flags & REG_DEF_FPCS_LIST2) {
- for (int i = 0; i < lir->operands[2]; i++) {
- SetupRegMask(&lir->u.m.def_mask, lir->operands[1] + i);
- }
- }
-
- if (flags & REG_USE_PC) {
- lir->u.m.use_mask |= ENCODE_ARM_REG_PC;
- }
-
- /* Conservatively treat the IT block */
- if (flags & IS_IT) {
- lir->u.m.def_mask = ENCODE_ALL;
- }
-
- if (flags & REG_USE_LIST0) {
- lir->u.m.use_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]);
- }
-
- if (flags & REG_USE_LIST1) {
- lir->u.m.use_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]);
- }
-
- if (flags & REG_USE_FPCS_LIST0) {
- lir->u.m.use_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]);
- }
-
- if (flags & REG_USE_FPCS_LIST2) {
- for (int i = 0; i < lir->operands[2]; i++) {
- SetupRegMask(&lir->u.m.use_mask, lir->operands[1] + i);
- }
- }
- /* Fixup for kThumbPush/lr and kThumbPop/pc */
- if (opcode == kThumbPush || opcode == kThumbPop) {
- uint64_t r8Mask = GetRegMaskCommon(rs_r8);
- if ((opcode == kThumbPush) && (lir->u.m.use_mask & r8Mask)) {
- lir->u.m.use_mask &= ~r8Mask;
- lir->u.m.use_mask |= ENCODE_ARM_REG_LR;
- } else if ((opcode == kThumbPop) && (lir->u.m.def_mask & r8Mask)) {
- lir->u.m.def_mask &= ~r8Mask;
- lir->u.m.def_mask |= ENCODE_ARM_REG_PC;
- }
- }
if (flags & REG_DEF_LR) {
lir->u.m.def_mask |= ENCODE_ARM_REG_LR;
}
@@ -251,92 +180,128 @@
return res;
}
-static const char* core_reg_names[16] = {
- "r0",
- "r1",
- "r2",
- "r3",
- "r4",
- "r5",
- "r6",
- "r7",
- "r8",
- "rSELF",
- "r10",
- "r11",
- "r12",
- "sp",
- "lr",
- "pc",
-};
-
-
-static const char* shift_names[4] = {
+static const char *shift_names[4] = {
"lsl",
"lsr",
"asr",
- "ror"};
+ "ror"
+};
-/* Decode and print a ARM register name */
-static char* DecodeRegList(int opcode, int vector, char* buf, size_t buf_size) {
- int i;
- bool printed = false;
- buf[0] = 0;
- for (i = 0; i < 16; i++, vector >>= 1) {
- if (vector & 0x1) {
- int reg_id = i;
- if (opcode == kThumbPush && i == 8) {
- reg_id = rs_rARM_LR.GetRegNum();
- } else if (opcode == kThumbPop && i == 8) {
- reg_id = rs_rARM_PC.GetRegNum();
- }
- if (printed) {
- snprintf(buf + strlen(buf), buf_size - strlen(buf), ", r%d", reg_id);
- } else {
- printed = true;
- snprintf(buf, buf_size, "r%d", reg_id);
+static const char* extend_names[8] = {
+ "uxtb",
+ "uxth",
+ "uxtw",
+ "uxtx",
+ "sxtb",
+ "sxth",
+ "sxtw",
+ "sxtx",
+};
+
+/* Decode and print a register extension (e.g. ", uxtb #1") */
+static void DecodeRegExtendOrShift(int operand, char *buf, size_t buf_size) {
+ if ((operand & (1 << 6)) == 0) {
+ const char *shift_name = shift_names[(operand >> 7) & 0x3];
+ int amount = operand & 0x3f;
+ snprintf(buf, buf_size, ", %s #%d", shift_name, amount);
+ } else {
+ const char *extend_name = extend_names[(operand >> 3) & 0x7];
+ int amount = operand & 0x7;
+ if (amount == 0) {
+ snprintf(buf, buf_size, ", %s", extend_name);
+ } else {
+ snprintf(buf, buf_size, ", %s #%d", extend_name, amount);
+ }
+ }
+}
+
+#define BIT_MASK(w) ((UINT64_C(1) << (w)) - UINT64_C(1))
+
+static uint64_t RotateRight(uint64_t value, unsigned rotate, unsigned width) {
+ DCHECK_LE(width, 64U);
+ rotate &= 63;
+ value = value & BIT_MASK(width);
+ return ((value & BIT_MASK(rotate)) << (width - rotate)) | (value >> rotate);
+}
+
+static uint64_t RepeatBitsAcrossReg(bool is_wide, uint64_t value, unsigned width) {
+ unsigned i;
+ unsigned reg_size = (is_wide) ? 64 : 32;
+ uint64_t result = value & BIT_MASK(width);
+ DCHECK_NE(width, reg_size);
+ for (i = width; i < reg_size; i *= 2) {
+ result |= (result << i);
+ }
+ DCHECK_EQ(i, reg_size);
+ return result;
+}
+
+/**
+ * @brief Decode an immediate in the form required by logical instructions.
+ *
+ * @param is_wide Whether @p value encodes a 64-bit (as opposed to 32-bit) immediate.
+ * @param value The encoded logical immediates that is to be decoded.
+ * @return The decoded logical immediate.
+ * @note This is the inverse of Arm64Mir2Lir::EncodeLogicalImmediate().
+ */
+uint64_t Arm64Mir2Lir::DecodeLogicalImmediate(bool is_wide, int value) {
+ unsigned n = (value >> 12) & 0x01;
+ unsigned imm_r = (value >> 6) & 0x3f;
+ unsigned imm_s = (value >> 0) & 0x3f;
+
+ // An integer is constructed from the n, imm_s and imm_r bits according to
+ // the following table:
+ //
+ // N imms immr size S R
+ // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
+ // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
+ // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
+ // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
+ // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
+ // 0 11110s xxxxxr 2 UInt(s) UInt(r)
+ // (s bits must not be all set)
+ //
+ // A pattern is constructed of size bits, where the least significant S+1
+ // bits are set. The pattern is rotated right by R, and repeated across a
+ // 32 or 64-bit value, depending on destination register width.
+
+ if (n == 1) {
+ DCHECK_NE(imm_s, 0x3fU);
+ uint64_t bits = BIT_MASK(imm_s + 1);
+ return RotateRight(bits, imm_r, 64);
+ } else {
+ DCHECK_NE((imm_s >> 1), 0x1fU);
+ for (unsigned width = 0x20; width >= 0x2; width >>= 1) {
+ if ((imm_s & width) == 0) {
+ unsigned mask = (unsigned)(width - 1);
+ DCHECK_NE((imm_s & mask), mask);
+ uint64_t bits = BIT_MASK((imm_s & mask) + 1);
+ return RepeatBitsAcrossReg(is_wide, RotateRight(bits, imm_r & mask, width), width);
}
}
}
- return buf;
+ return 0;
}
-static char* DecodeFPCSRegList(int count, int base, char* buf, size_t buf_size) {
- snprintf(buf, buf_size, "s%d", base);
- for (int i = 1; i < count; i++) {
- snprintf(buf + strlen(buf), buf_size - strlen(buf), ", s%d", base + i);
- }
- return buf;
+/**
+ * @brief Decode an 8-bit single point number encoded with EncodeImmSingle().
+ */
+static float DecodeImmSingle(uint8_t small_float) {
+ int mantissa = (small_float & 0x0f) + 0x10;
+ int sign = ((small_float & 0x80) == 0) ? 1 : -1;
+ float signed_mantissa = static_cast<float>(sign*mantissa);
+ int exponent = (((small_float >> 4) & 0x7) + 4) & 0x7;
+ return signed_mantissa*static_cast<float>(1 << exponent)*0.0078125f;
}
-static int32_t ExpandImmediate(int value) {
- int32_t mode = (value & 0xf00) >> 8;
- uint32_t bits = value & 0xff;
- switch (mode) {
- case 0:
- return bits;
- case 1:
- return (bits << 16) | bits;
- case 2:
- return (bits << 24) | (bits << 8);
- case 3:
- return (bits << 24) | (bits << 16) | (bits << 8) | bits;
- default:
- break;
- }
- bits = (bits | 0x80) << 24;
- return bits >> (((value & 0xf80) >> 7) - 8);
-}
-
-const char* cc_names[] = {"eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
- "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"};
+static const char* cc_names[] = {"eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
+ "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"};
/*
* Interpret a format string and build a string no longer than size
- * See format key in Assemble.c.
+ * See format key in assemble_arm64.cc.
*/
std::string Arm64Mir2Lir::BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr) {
std::string buf;
- int i;
const char* fmt_end = &fmt[strlen(fmt)];
char tbuf[256];
const char* name;
@@ -354,11 +319,24 @@
DCHECK_LT(static_cast<unsigned>(nc-'0'), 4U);
operand = lir->operands[nc-'0'];
switch (*fmt++) {
- case 'H':
- if (operand != 0) {
- snprintf(tbuf, arraysize(tbuf), ", %s %d", shift_names[operand & 0x3], operand >> 2);
- } else {
+ case 'e': {
+ // Omit ", uxtw #0" in strings like "add w0, w1, w3, uxtw #0" and
+ // ", uxtx #0" in strings like "add x0, x1, x3, uxtx #0"
+ int omittable = ((IS_WIDE(lir->opcode)) ? EncodeExtend(kA64Uxtw, 0) :
+ EncodeExtend(kA64Uxtw, 0));
+ if (LIKELY(operand == omittable)) {
+ strcpy(tbuf, "");
+ } else {
+ DecodeRegExtendOrShift(operand, tbuf, arraysize(tbuf));
+ }
+ }
+ break;
+ case 'o':
+ // Omit ", lsl #0"
+ if (LIKELY(operand == EncodeShift(kA64Lsl, 0))) {
strcpy(tbuf, "");
+ } else {
+ DecodeRegExtendOrShift(operand, tbuf, arraysize(tbuf));
}
break;
case 'B':
@@ -387,39 +365,60 @@
}
strcpy(tbuf, name);
break;
- case 'b':
- strcpy(tbuf, "0000");
- for (i = 3; i >= 0; i--) {
- tbuf[i] += operand & 1;
- operand >>= 1;
- }
- break;
- case 'n':
- operand = ~ExpandImmediate(operand);
- snprintf(tbuf, arraysize(tbuf), "%d [%#x]", operand, operand);
- break;
- case 'm':
- operand = ExpandImmediate(operand);
- snprintf(tbuf, arraysize(tbuf), "%d [%#x]", operand, operand);
- break;
case 's':
- snprintf(tbuf, arraysize(tbuf), "s%d", RegStorage::RegNum(operand));
+ snprintf(tbuf, arraysize(tbuf), "s%d", operand & ARM_FP_REG_MASK);
break;
case 'S':
- snprintf(tbuf, arraysize(tbuf), "d%d", RegStorage::RegNum(operand));
+ snprintf(tbuf, arraysize(tbuf), "d%d", operand & ARM_FP_REG_MASK);
break;
- case 'h':
- snprintf(tbuf, arraysize(tbuf), "%04x", operand);
+ case 'f':
+ snprintf(tbuf, arraysize(tbuf), "%c%d", (IS_FWIDE(lir->opcode)) ? 'd' : 's',
+ operand & ARM_FP_REG_MASK);
+ break;
+ case 'l': {
+ bool is_wide = IS_WIDE(lir->opcode);
+ uint64_t imm = DecodeLogicalImmediate(is_wide, operand);
+ snprintf(tbuf, arraysize(tbuf), "%" PRId64 " (%#" PRIx64 ")", imm, imm);
+ }
+ break;
+ case 'I':
+ snprintf(tbuf, arraysize(tbuf), "%f", DecodeImmSingle(operand));
break;
case 'M':
+ if (LIKELY(operand == 0))
+ strcpy(tbuf, "");
+ else
+ snprintf(tbuf, arraysize(tbuf), ", lsl #%d", 16*operand);
+ break;
case 'd':
snprintf(tbuf, arraysize(tbuf), "%d", operand);
break;
- case 'C':
- operand = RegStorage::RegNum(operand);
- DCHECK_LT(operand, static_cast<int>(
- sizeof(core_reg_names)/sizeof(core_reg_names[0])));
- snprintf(tbuf, arraysize(tbuf), "%s", core_reg_names[operand]);
+ case 'w':
+ if (LIKELY(operand != rwzr))
+ snprintf(tbuf, arraysize(tbuf), "w%d", operand & RegStorage::kRegNumMask);
+ else
+ strcpy(tbuf, "wzr");
+ break;
+ case 'W':
+ if (LIKELY(operand != rwsp))
+ snprintf(tbuf, arraysize(tbuf), "w%d", operand & RegStorage::kRegNumMask);
+ else
+ strcpy(tbuf, "wsp");
+ break;
+ case 'x':
+ if (LIKELY(operand != rxzr))
+ snprintf(tbuf, arraysize(tbuf), "x%d", operand & RegStorage::kRegNumMask);
+ else
+ strcpy(tbuf, "xzr");
+ break;
+ case 'X':
+ if (LIKELY(operand != rsp))
+ snprintf(tbuf, arraysize(tbuf), "x%d", operand & RegStorage::kRegNumMask);
+ else
+ strcpy(tbuf, "sp");
+ break;
+ case 'D':
+ snprintf(tbuf, arraysize(tbuf), "%d", operand*((IS_WIDE(lir->opcode)) ? 8 : 4));
break;
case 'E':
snprintf(tbuf, arraysize(tbuf), "%d", operand*4);
@@ -427,37 +426,51 @@
case 'F':
snprintf(tbuf, arraysize(tbuf), "%d", operand*2);
break;
+ case 'G':
+ if (LIKELY(operand == 0))
+ strcpy(tbuf, "");
+ else
+ strcpy(tbuf, (IS_WIDE(lir->opcode)) ? ", lsl #3" : ", lsl #2");
+ break;
case 'c':
strcpy(tbuf, cc_names[operand]);
break;
case 't':
snprintf(tbuf, arraysize(tbuf), "0x%08" PRIxPTR " (L%p)",
- reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4 + (operand << 1),
+ reinterpret_cast<uintptr_t>(base_addr) + lir->offset + (operand << 2),
lir->target);
break;
- case 'u': {
- int offset_1 = lir->operands[0];
- int offset_2 = NEXT_LIR(lir)->operands[0];
- uintptr_t target =
- (((reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4) &
- ~3) + (offset_1 << 21 >> 9) + (offset_2 << 1)) &
- 0xfffffffc;
- snprintf(tbuf, arraysize(tbuf), "%p", reinterpret_cast<void *>(target));
+ case 'r': {
+ bool is_wide = IS_WIDE(lir->opcode);
+ if (LIKELY(operand != rwzr && operand != rxzr)) {
+ snprintf(tbuf, arraysize(tbuf), "%c%d", (is_wide) ? 'x' : 'w',
+ operand & RegStorage::kRegNumMask);
+ } else {
+ strcpy(tbuf, (is_wide) ? "xzr" : "wzr");
+ }
+ }
break;
- }
-
- /* Nothing to print for BLX_2 */
- case 'v':
- strcpy(tbuf, "see above");
+ case 'R': {
+ bool is_wide = IS_WIDE(lir->opcode);
+ if (LIKELY(operand != rwsp || operand != rsp)) {
+ snprintf(tbuf, arraysize(tbuf), "%c%d", (is_wide) ? 'x' : 'w',
+ operand & RegStorage::kRegNumMask);
+ } else {
+ strcpy(tbuf, (is_wide) ? "sp" : "wsp");
+ }
+ }
break;
- case 'R':
- DecodeRegList(lir->opcode, operand, tbuf, arraysize(tbuf));
+ case 'p':
+ snprintf(tbuf, arraysize(tbuf), ".+%d (addr %#" PRIxPTR ")", 4*operand,
+ reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4*operand);
break;
- case 'P':
- DecodeFPCSRegList(operand, 16, tbuf, arraysize(tbuf));
- break;
- case 'Q':
- DecodeFPCSRegList(operand, 0, tbuf, arraysize(tbuf));
+ case 'T':
+ if (LIKELY(operand == 0))
+ strcpy(tbuf, "");
+ else if (operand == 1)
+ strcpy(tbuf, ", lsl #12");
+ else
+ strcpy(tbuf, ", DecodeError3");
break;
default:
strcpy(tbuf, "DecodeError1");
@@ -519,14 +532,14 @@
}
bool Arm64Mir2Lir::IsUnconditionalBranch(LIR* lir) {
- return ((lir->opcode == kThumbBUncond) || (lir->opcode == kThumb2BUncond));
+ return (lir->opcode == kA64B1t);
}
Arm64Mir2Lir::Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
: Mir2Lir(cu, mir_graph, arena) {
// Sanity check - make sure encoding map lines up.
- for (int i = 0; i < kArmLast; i++) {
- if (Arm64Mir2Lir::EncodingMap[i].opcode != i) {
+ for (int i = 0; i < kA64Last; i++) {
+ if (UNWIDE(Arm64Mir2Lir::EncodingMap[i].opcode) != i) {
LOG(FATAL) << "Encoding order for " << Arm64Mir2Lir::EncodingMap[i].name
<< " is wrong: expecting " << i << ", seeing "
<< static_cast<int>(Arm64Mir2Lir::EncodingMap[i].opcode);
@@ -534,8 +547,8 @@
}
}
-Mir2Lir* ArmCodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
- ArenaAllocator* const arena) {
+Mir2Lir* Arm64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
+ ArenaAllocator* const arena) {
return new Arm64Mir2Lir(cu, mir_graph, arena);
}
@@ -584,7 +597,7 @@
// TODO: re-enable this when we can safely save r4 over the suspension code path.
bool no_suspend = NO_SUSPEND; // || !Runtime::Current()->ExplicitSuspendChecks();
if (no_suspend) {
- GetRegInfo(rs_rARM_SUSPEND)->MarkFree();
+ GetRegInfo(rs_rA64_SUSPEND)->MarkFree();
}
// Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods.
@@ -595,15 +608,7 @@
}
void Arm64Mir2Lir::FreeRegLocTemps(RegLocation rl_keep, RegLocation rl_free) {
- DCHECK(rl_keep.wide);
- DCHECK(rl_free.wide);
- if ((rl_free.reg.GetLowReg() != rl_keep.reg.GetLowReg()) &&
- (rl_free.reg.GetLowReg() != rl_keep.reg.GetHighReg()) &&
- (rl_free.reg.GetHighReg() != rl_keep.reg.GetLowReg()) &&
- (rl_free.reg.GetHighReg() != rl_keep.reg.GetHighReg())) {
- // No overlap, free.
- FreeTemp(rl_free.reg);
- }
+ LOG(FATAL) << "Unexpected call to FreeRegLocTemps for Arm64";
}
/*
@@ -613,7 +618,7 @@
*/
void Arm64Mir2Lir::AdjustSpillMask() {
- core_spill_mask_ |= (1 << rs_rARM_LR.GetRegNum());
+ core_spill_mask_ |= (1 << rs_rA64_LR.GetRegNum());
num_core_spills_++;
}
@@ -649,100 +654,96 @@
/* Clobber all regs that might be used by an external C call */
void Arm64Mir2Lir::ClobberCallerSave() {
- // TODO: rework this - it's gotten even more ugly.
- Clobber(rs_r0);
- Clobber(rs_r1);
- Clobber(rs_r2);
- Clobber(rs_r3);
- Clobber(rs_r12);
- Clobber(rs_r14lr);
- Clobber(rs_fr0);
- Clobber(rs_fr1);
- Clobber(rs_fr2);
- Clobber(rs_fr3);
- Clobber(rs_fr4);
- Clobber(rs_fr5);
- Clobber(rs_fr6);
- Clobber(rs_fr7);
- Clobber(rs_fr8);
- Clobber(rs_fr9);
- Clobber(rs_fr10);
- Clobber(rs_fr11);
- Clobber(rs_fr12);
- Clobber(rs_fr13);
- Clobber(rs_fr14);
- Clobber(rs_fr15);
- Clobber(rs_dr0);
- Clobber(rs_dr1);
- Clobber(rs_dr2);
- Clobber(rs_dr3);
- Clobber(rs_dr4);
- Clobber(rs_dr5);
- Clobber(rs_dr6);
- Clobber(rs_dr7);
+ // TODO(Arm64): implement this.
+ UNIMPLEMENTED(WARNING);
+
+ Clobber(rs_x0);
+ Clobber(rs_x1);
+ Clobber(rs_x2);
+ Clobber(rs_x3);
+ Clobber(rs_x12);
+ Clobber(rs_x30);
+ Clobber(rs_f0);
+ Clobber(rs_f1);
+ Clobber(rs_f2);
+ Clobber(rs_f3);
+ Clobber(rs_f4);
+ Clobber(rs_f5);
+ Clobber(rs_f6);
+ Clobber(rs_f7);
+ Clobber(rs_f8);
+ Clobber(rs_f9);
+ Clobber(rs_f10);
+ Clobber(rs_f11);
+ Clobber(rs_f12);
+ Clobber(rs_f13);
+ Clobber(rs_f14);
+ Clobber(rs_f15);
}
RegLocation Arm64Mir2Lir::GetReturnWideAlt() {
RegLocation res = LocCReturnWide();
- res.reg.SetLowReg(rs_r2.GetReg());
- res.reg.SetHighReg(rs_r3.GetReg());
- Clobber(rs_r2);
- Clobber(rs_r3);
- MarkInUse(rs_r2);
- MarkInUse(rs_r3);
+ res.reg.SetReg(rx2);
+ res.reg.SetHighReg(rx3);
+ Clobber(rs_x2);
+ Clobber(rs_x3);
+ MarkInUse(rs_x2);
+ MarkInUse(rs_x3);
MarkWide(res.reg);
return res;
}
RegLocation Arm64Mir2Lir::GetReturnAlt() {
RegLocation res = LocCReturn();
- res.reg.SetReg(rs_r1.GetReg());
- Clobber(rs_r1);
- MarkInUse(rs_r1);
+ res.reg.SetReg(rx1);
+ Clobber(rs_x1);
+ MarkInUse(rs_x1);
return res;
}
/* To be used when explicitly managing register use */
void Arm64Mir2Lir::LockCallTemps() {
- LockTemp(rs_r0);
- LockTemp(rs_r1);
- LockTemp(rs_r2);
- LockTemp(rs_r3);
+ LockTemp(rs_x0);
+ LockTemp(rs_x1);
+ LockTemp(rs_x2);
+ LockTemp(rs_x3);
}
/* To be used when explicitly managing register use */
void Arm64Mir2Lir::FreeCallTemps() {
- FreeTemp(rs_r0);
- FreeTemp(rs_r1);
- FreeTemp(rs_r2);
- FreeTemp(rs_r3);
+ FreeTemp(rs_x0);
+ FreeTemp(rs_x1);
+ FreeTemp(rs_x2);
+ FreeTemp(rs_x3);
}
-RegStorage Arm64Mir2Lir::LoadHelper(ThreadOffset<4> offset) {
- LoadWordDisp(rs_rARM_SELF, offset.Int32Value(), rs_rARM_LR);
- return rs_rARM_LR;
+RegStorage Arm64Mir2Lir::LoadHelper(A64ThreadOffset offset) {
+ // TODO(Arm64): use LoadWordDisp instead.
+ // e.g. LoadWordDisp(rs_rA64_SELF, offset.Int32Value(), rs_rA64_LR);
+ LoadBaseDisp(rs_rA64_SELF, offset.Int32Value(), rs_rA64_LR, k64);
+ return rs_rA64_LR;
}
LIR* Arm64Mir2Lir::CheckSuspendUsingLoad() {
- RegStorage tmp = rs_r0;
- Load32Disp(rs_rARM_SELF, Thread::ThreadSuspendTriggerOffset<4>().Int32Value(), tmp);
- LIR* load2 = Load32Disp(tmp, 0, tmp);
+ RegStorage tmp = rs_x0;
+ LoadWordDisp(rs_rA64_SELF, A64_THREAD_SUSPEND_TRIGGER_OFFSET, tmp);
+ LIR* load2 = LoadWordDisp(tmp, 0, tmp);
return load2;
}
uint64_t Arm64Mir2Lir::GetTargetInstFlags(int opcode) {
DCHECK(!IsPseudoLirOp(opcode));
- return Arm64Mir2Lir::EncodingMap[opcode].flags;
+ return Arm64Mir2Lir::EncodingMap[UNWIDE(opcode)].flags;
}
const char* Arm64Mir2Lir::GetTargetInstName(int opcode) {
DCHECK(!IsPseudoLirOp(opcode));
- return Arm64Mir2Lir::EncodingMap[opcode].name;
+ return Arm64Mir2Lir::EncodingMap[UNWIDE(opcode)].name;
}
const char* Arm64Mir2Lir::GetTargetInstFmt(int opcode) {
DCHECK(!IsPseudoLirOp(opcode));
- return Arm64Mir2Lir::EncodingMap[opcode].fmt;
+ return Arm64Mir2Lir::EncodingMap[UNWIDE(opcode)].fmt;
}
/*
@@ -800,4 +801,140 @@
return res;
}
+// TODO(Arm64): reuse info in QuickArgumentVisitor?
+static RegStorage GetArgPhysicalReg(RegLocation* loc, int* num_gpr_used, int* num_fpr_used,
+ OpSize* op_size) {
+ if (loc->fp) {
+ int n = *num_fpr_used;
+ if (n < 8) {
+ *num_fpr_used = n + 1;
+ RegStorage::RegStorageKind reg_kind;
+ if (loc->wide) {
+ *op_size = kDouble;
+ reg_kind = RegStorage::k64BitSolo;
+ } else {
+ *op_size = kSingle;
+ reg_kind = RegStorage::k32BitSolo;
+ }
+ return RegStorage(RegStorage::kValid | reg_kind | RegStorage::kFloatingPoint | n);
+ }
+ } else {
+ int n = *num_gpr_used;
+ if (n < 7) {
+ *num_gpr_used = n + 1;
+ if (loc->wide) {
+ *op_size = k64;
+ return RegStorage::Solo64(n);
+ } else {
+ *op_size = k32;
+ return RegStorage::Solo32(n);
+ }
+ }
+ }
+
+ return RegStorage::InvalidReg();
+}
+
+/*
+ * If there are any ins passed in registers that have not been promoted
+ * to a callee-save register, flush them to the frame. Perform initial
+ * assignment of promoted arguments.
+ *
+ * ArgLocs is an array of location records describing the incoming arguments
+ * with one location record per word of argument.
+ */
+void Arm64Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) {
+ int num_gpr_used = 1;
+ int num_fpr_used = 0;
+
+ /*
+ * Dummy up a RegLocation for the incoming Method*
+ * It will attempt to keep kArg0 live (or copy it to home location
+ * if promoted).
+ */
+ RegLocation rl_src = rl_method;
+ rl_src.location = kLocPhysReg;
+ rl_src.reg = TargetReg(kArg0);
+ rl_src.home = false;
+ MarkLive(rl_src);
+
+ // TODO(Arm64): compress the Method pointer?
+ StoreValueWide(rl_method, rl_src);
+
+ // If Method* has been promoted, explicitly flush
+ if (rl_method.location == kLocPhysReg) {
+ StoreWordDisp(TargetReg(kSp), 0, TargetReg(kArg0));
+ }
+
+ if (cu_->num_ins == 0) {
+ return;
+ }
+
+ int start_vreg = cu_->num_dalvik_registers - cu_->num_ins;
+ for (int i = 0; i < cu_->num_ins; i++) {
+ PromotionMap* v_map = &promotion_map_[start_vreg + i];
+ RegLocation* t_loc = &ArgLocs[i];
+ OpSize op_size;
+ RegStorage reg = GetArgPhysicalReg(t_loc, &num_gpr_used, &num_fpr_used, &op_size);
+
+ if (reg.Valid()) {
+ if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) {
+ OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg);
+ } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) {
+ OpRegCopy(RegStorage::Solo32(v_map->FpReg), reg);
+ } else {
+ StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, op_size);
+ if (reg.Is64Bit()) {
+ if (SRegOffset(start_vreg + i) + 4 != SRegOffset(start_vreg + i + 1)) {
+ LOG(FATAL) << "64 bit value stored in non-consecutive 4 bytes slots";
+ }
+ i += 1;
+ }
+ }
+ } else {
+ // If arriving in frame & promoted
+ if (v_map->core_location == kLocPhysReg) {
+ LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i),
+ RegStorage::Solo32(v_map->core_reg));
+ }
+ if (v_map->fp_location == kLocPhysReg) {
+ LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->FpReg));
+ }
+ }
+ }
+}
+
+int Arm64Mir2Lir::LoadArgRegs(CallInfo* info, int call_state,
+ NextCallInsn next_call_insn,
+ const MethodReference& target_method,
+ uint32_t vtable_idx, uintptr_t direct_code,
+ uintptr_t direct_method, InvokeType type, bool skip_this) {
+ int last_arg_reg = TargetReg(kArg3).GetReg();
+ int next_reg = TargetReg(kArg1).GetReg();
+ int next_arg = 0;
+ if (skip_this) {
+ next_reg++;
+ next_arg++;
+ }
+ for (; (next_reg <= last_arg_reg) && (next_arg < info->num_arg_words); next_reg++) {
+ RegLocation rl_arg = info->args[next_arg++];
+ rl_arg = UpdateRawLoc(rl_arg);
+ if (rl_arg.wide && (next_reg <= TargetReg(kArg2).GetReg())) {
+ RegStorage r_tmp(RegStorage::k64BitPair, next_reg, next_reg + 1);
+ LoadValueDirectWideFixed(rl_arg, r_tmp);
+ next_reg++;
+ next_arg++;
+ } else {
+ if (rl_arg.wide) {
+ rl_arg = NarrowRegLoc(rl_arg);
+ rl_arg.is_const = false;
+ }
+ LoadValueDirectFixed(rl_arg, RegStorage::Solo32(next_reg));
+ }
+ call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+ direct_code, direct_method, type);
+ }
+ return call_state;
+}
+
} // namespace art
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index 8ff1830..e46e201 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -20,133 +20,236 @@
namespace art {
-/* This file contains codegen for the Thumb ISA. */
+/* This file contains codegen for the A64 ISA. */
-static int32_t EncodeImmSingle(int32_t value) {
- int32_t res;
- int32_t bit_a = (value & 0x80000000) >> 31;
- int32_t not_bit_b = (value & 0x40000000) >> 30;
- int32_t bit_b = (value & 0x20000000) >> 29;
- int32_t b_smear = (value & 0x3e000000) >> 25;
- int32_t slice = (value & 0x01f80000) >> 19;
- int32_t zeroes = (value & 0x0007ffff);
- if (zeroes != 0)
+static int32_t EncodeImmSingle(uint32_t bits) {
+ /*
+ * Valid values will have the form:
+ *
+ * aBbb.bbbc.defg.h000.0000.0000.0000.0000
+ *
+ * where B = not(b). In other words, if b == 1, then B == 0 and viceversa.
+ */
+
+ // bits[19..0] are cleared.
+ if ((bits & 0x0007ffff) != 0)
return -1;
- if (bit_b) {
- if ((not_bit_b != 0) || (b_smear != 0x1f))
- return -1;
- } else {
- if ((not_bit_b != 1) || (b_smear != 0x0))
- return -1;
- }
- res = (bit_a << 7) | (bit_b << 6) | slice;
- return res;
+
+ // bits[29..25] are all set or all cleared.
+ uint32_t b_pattern = (bits >> 16) & 0x3e00;
+ if (b_pattern != 0 && b_pattern != 0x3e00)
+ return -1;
+
+ // bit[30] and bit[29] are opposite.
+ if (((bits ^ (bits << 1)) & 0x40000000) == 0)
+ return -1;
+
+ // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000
+ // bit7: a000.0000
+ uint32_t bit7 = ((bits >> 31) & 0x1) << 7;
+ // bit6: 0b00.0000
+ uint32_t bit6 = ((bits >> 29) & 0x1) << 6;
+ // bit5_to_0: 00cd.efgh
+ uint32_t bit5_to_0 = (bits >> 19) & 0x3f;
+ return (bit7 | bit6 | bit5_to_0);
}
-/*
- * Determine whether value can be encoded as a Thumb2 floating point
- * immediate. If not, return -1. If so return encoded 8-bit value.
- */
-static int32_t EncodeImmDouble(int64_t value) {
- int32_t res;
- int32_t bit_a = (value & INT64_C(0x8000000000000000)) >> 63;
- int32_t not_bit_b = (value & INT64_C(0x4000000000000000)) >> 62;
- int32_t bit_b = (value & INT64_C(0x2000000000000000)) >> 61;
- int32_t b_smear = (value & INT64_C(0x3fc0000000000000)) >> 54;
- int32_t slice = (value & INT64_C(0x003f000000000000)) >> 48;
- uint64_t zeroes = (value & INT64_C(0x0000ffffffffffff));
- if (zeroes != 0ull)
+static int32_t EncodeImmDouble(uint64_t bits) {
+ /*
+ * Valid values will have the form:
+ *
+ * aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
+ * 0000.0000.0000.0000.0000.0000.0000.0000
+ *
+ * where B = not(b).
+ */
+
+ // bits[47..0] are cleared.
+ if ((bits & UINT64_C(0xffffffffffff)) != 0)
return -1;
- if (bit_b) {
- if ((not_bit_b != 0) || (b_smear != 0xff))
- return -1;
- } else {
- if ((not_bit_b != 1) || (b_smear != 0x0))
- return -1;
- }
- res = (bit_a << 7) | (bit_b << 6) | slice;
- return res;
+
+ // bits[61..54] are all set or all cleared.
+ uint32_t b_pattern = (bits >> 48) & 0x3fc0;
+ if (b_pattern != 0 && b_pattern != 0x3fc0)
+ return -1;
+
+ // bit[62] and bit[61] are opposite.
+ if (((bits ^ (bits << 1)) & UINT64_C(0x4000000000000000)) == 0)
+ return -1;
+
+ // bit7: a000.0000
+ uint32_t bit7 = ((bits >> 63) & 0x1) << 7;
+ // bit6: 0b00.0000
+ uint32_t bit6 = ((bits >> 61) & 0x1) << 6;
+ // bit5_to_0: 00cd.efgh
+ uint32_t bit5_to_0 = (bits >> 48) & 0x3f;
+ return (bit7 | bit6 | bit5_to_0);
}
-LIR* Arm64Mir2Lir::LoadFPConstantValue(int r_dest, int value) {
+LIR* Arm64Mir2Lir::LoadFPConstantValue(int r_dest, int32_t value) {
DCHECK(RegStorage::IsSingle(r_dest));
if (value == 0) {
- // TODO: we need better info about the target CPU. a vector exclusive or
- // would probably be better here if we could rely on its existance.
- // Load an immediate +2.0 (which encodes to 0)
- NewLIR2(kThumb2Vmovs_IMM8, r_dest, 0);
- // +0.0 = +2.0 - +2.0
- return NewLIR3(kThumb2Vsubs, r_dest, r_dest, r_dest);
+ return NewLIR2(kA64Fmov2sw, r_dest, rwzr);
} else {
- int encoded_imm = EncodeImmSingle(value);
+ int32_t encoded_imm = EncodeImmSingle((uint32_t)value);
if (encoded_imm >= 0) {
- return NewLIR2(kThumb2Vmovs_IMM8, r_dest, encoded_imm);
+ return NewLIR2(kA64Fmov2fI, r_dest, encoded_imm);
}
}
+
LIR* data_target = ScanLiteralPool(literal_list_, value, 0);
if (data_target == NULL) {
data_target = AddWordData(&literal_list_, value);
}
- LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kThumb2Vldrs,
- r_dest, rs_r15pc.GetReg(), 0, 0, 0, data_target);
+
+ LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kA64Ldr2fp,
+ r_dest, 0, 0, 0, 0, data_target);
SetMemRefType(load_pc_rel, true, kLiteral);
AppendLIR(load_pc_rel);
return load_pc_rel;
}
-static int LeadingZeros(uint32_t val) {
- uint32_t alt;
- int32_t n;
- int32_t count;
-
- count = 16;
- n = 32;
- do {
- alt = val >> count;
- if (alt != 0) {
- n = n - count;
- val = alt;
+LIR* Arm64Mir2Lir::LoadFPConstantValueWide(int r_dest, int64_t value) {
+ DCHECK(RegStorage::IsDouble(r_dest));
+ if (value == 0) {
+ return NewLIR2(kA64Fmov2Sx, r_dest, rwzr);
+ } else {
+ int32_t encoded_imm = EncodeImmDouble(value);
+ if (encoded_imm >= 0) {
+ return NewLIR2(FWIDE(kA64Fmov2fI), r_dest, encoded_imm);
}
- count >>= 1;
- } while (count);
- return n - val;
+ }
+
+ // No short form - load from the literal pool.
+ int32_t val_lo = Low32Bits(value);
+ int32_t val_hi = High32Bits(value);
+ LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
+ if (data_target == NULL) {
+ data_target = AddWideData(&literal_list_, val_lo, val_hi);
+ }
+
+ DCHECK(RegStorage::IsFloat(r_dest));
+ LIR* load_pc_rel = RawLIR(current_dalvik_offset_, FWIDE(kA64Ldr2fp),
+ r_dest, 0, 0, 0, 0, data_target);
+ SetMemRefType(load_pc_rel, true, kLiteral);
+ AppendLIR(load_pc_rel);
+ return load_pc_rel;
}
-/*
- * Determine whether value can be encoded as a Thumb2 modified
- * immediate. If not, return -1. If so, return i:imm3:a:bcdefgh form.
- */
-int Arm64Mir2Lir::ModifiedImmediate(uint32_t value) {
- int32_t z_leading;
- int32_t z_trailing;
- uint32_t b0 = value & 0xff;
+static int CountLeadingZeros(bool is_wide, uint64_t value) {
+ return (is_wide) ? __builtin_clzl(value) : __builtin_clz((uint32_t)value);
+}
- /* Note: case of value==0 must use 0:000:0:0000000 encoding */
- if (value <= 0xFF)
- return b0; // 0:000:a:bcdefgh
- if (value == ((b0 << 16) | b0))
- return (0x1 << 8) | b0; /* 0:001:a:bcdefgh */
- if (value == ((b0 << 24) | (b0 << 16) | (b0 << 8) | b0))
- return (0x3 << 8) | b0; /* 0:011:a:bcdefgh */
- b0 = (value >> 8) & 0xff;
- if (value == ((b0 << 24) | (b0 << 8)))
- return (0x2 << 8) | b0; /* 0:010:a:bcdefgh */
- /* Can we do it with rotation? */
- z_leading = LeadingZeros(value);
- z_trailing = 32 - LeadingZeros(~value & (value - 1));
- /* A run of eight or fewer active bits? */
- if ((z_leading + z_trailing) < 24)
- return -1; /* No - bail */
- /* left-justify the constant, discarding msb (known to be 1) */
- value <<= z_leading + 1;
- /* Create bcdefgh */
- value >>= 25;
- /* Put it all together */
- return value | ((0x8 + z_leading) << 7); /* [01000..11111]:bcdefgh */
+static int CountTrailingZeros(bool is_wide, uint64_t value) {
+ return (is_wide) ? __builtin_ctzl(value) : __builtin_ctz((uint32_t)value);
+}
+
+static int CountSetBits(bool is_wide, uint64_t value) {
+ return ((is_wide) ?
+ __builtin_popcountl(value) : __builtin_popcount((uint32_t)value));
+}
+
+/**
+ * @brief Try encoding an immediate in the form required by logical instructions.
+ *
+ * @param is_wide Whether @p value is a 64-bit (as opposed to 32-bit) value.
+ * @param value An integer to be encoded. This is interpreted as 64-bit if @p is_wide is true and as
+ * 32-bit if @p is_wide is false.
+ * @return A non-negative integer containing the encoded immediate or -1 if the encoding failed.
+ * @note This is the inverse of Arm64Mir2Lir::DecodeLogicalImmediate().
+ */
+int Arm64Mir2Lir::EncodeLogicalImmediate(bool is_wide, uint64_t value) {
+ unsigned n, imm_s, imm_r;
+
+ // Logical immediates are encoded using parameters n, imm_s and imm_r using
+ // the following table:
+ //
+ // N imms immr size S R
+ // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
+ // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
+ // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
+ // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
+ // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
+ // 0 11110s xxxxxr 2 UInt(s) UInt(r)
+ // (s bits must not be all set)
+ //
+ // A pattern is constructed of size bits, where the least significant S+1
+ // bits are set. The pattern is rotated right by R, and repeated across a
+ // 32 or 64-bit value, depending on destination register width.
+ //
+ // To test if an arbitary immediate can be encoded using this scheme, an
+ // iterative algorithm is used.
+ //
+
+ // 1. If the value has all set or all clear bits, it can't be encoded.
+ if (value == 0 || value == ~UINT64_C(0) ||
+ (!is_wide && (uint32_t)value == ~UINT32_C(0))) {
+ return -1;
+ }
+
+ unsigned lead_zero = CountLeadingZeros(is_wide, value);
+ unsigned lead_one = CountLeadingZeros(is_wide, ~value);
+ unsigned trail_zero = CountTrailingZeros(is_wide, value);
+ unsigned trail_one = CountTrailingZeros(is_wide, ~value);
+ unsigned set_bits = CountSetBits(is_wide, value);
+
+ // The fixed bits in the immediate s field.
+ // If width == 64 (X reg), start at 0xFFFFFF80.
+ // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit
+ // widths won't be executed.
+ unsigned width = (is_wide) ? 64 : 32;
+ int imm_s_fixed = (is_wide) ? -128 : -64;
+ int imm_s_mask = 0x3f;
+
+ for (;;) {
+ // 2. If the value is two bits wide, it can be encoded.
+ if (width == 2) {
+ n = 0;
+ imm_s = 0x3C;
+ imm_r = (value & 3) - 1;
+ break;
+ }
+
+ n = (width == 64) ? 1 : 0;
+ imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask);
+ if ((lead_zero + set_bits) == width) {
+ imm_r = 0;
+ } else {
+ imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one;
+ }
+
+ // 3. If the sum of leading zeros, trailing zeros and set bits is
+ // equal to the bit width of the value, it can be encoded.
+ if (lead_zero + trail_zero + set_bits == width) {
+ break;
+ }
+
+ // 4. If the sum of leading ones, trailing ones and unset bits in the
+ // value is equal to the bit width of the value, it can be encoded.
+ if (lead_one + trail_one + (width - set_bits) == width) {
+ break;
+ }
+
+ // 5. If the most-significant half of the bitwise value is equal to
+ // the least-significant half, return to step 2 using the
+ // least-significant half of the value.
+ uint64_t mask = (UINT64_C(1) << (width >> 1)) - 1;
+ if ((value & mask) == ((value >> (width >> 1)) & mask)) {
+ width >>= 1;
+ set_bits >>= 1;
+ imm_s_fixed >>= 1;
+ continue;
+ }
+
+ // 6. Otherwise, the value can't be encoded.
+ return -1;
+ }
+
+ return (n << 12 | imm_r << 6 | imm_s);
}
bool Arm64Mir2Lir::InexpensiveConstantInt(int32_t value) {
- return (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0);
+ return false; // (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0);
}
bool Arm64Mir2Lir::InexpensiveConstantFloat(int32_t value) {
@@ -162,8 +265,8 @@
}
/*
- * Load a immediate using a shortcut if possible; otherwise
- * grab from the per-translation literal pool.
+ * Load a immediate using one single instruction when possible; otherwise
+ * use a pair of movz and movk instructions.
*
* No additional register clobbering operation performed. Use this version when
* 1) r_dest is freshly returned from AllocTemp or
@@ -171,204 +274,163 @@
*/
LIR* Arm64Mir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) {
LIR* res;
- int mod_imm;
if (r_dest.IsFloat()) {
return LoadFPConstantValue(r_dest.GetReg(), value);
}
- /* See if the value can be constructed cheaply */
- if (r_dest.Low8() && (value >= 0) && (value <= 255)) {
- return NewLIR2(kThumbMovImm, r_dest.GetReg(), value);
+ // Loading SP/ZR with an immediate is not supported.
+ DCHECK_NE(r_dest.GetReg(), rwsp);
+ DCHECK_NE(r_dest.GetReg(), rwzr);
+
+ // Compute how many movk, movz instructions are needed to load the value.
+ uint16_t high_bits = High16Bits(value);
+ uint16_t low_bits = Low16Bits(value);
+
+ bool low_fast = ((uint16_t)(low_bits + 1) <= 1);
+ bool high_fast = ((uint16_t)(high_bits + 1) <= 1);
+
+ if (LIKELY(low_fast || high_fast)) {
+ // 1 instruction is enough to load the immediate.
+ if (LIKELY(low_bits == high_bits)) {
+ // Value is either 0 or -1: we can just use wzr.
+ ArmOpcode opcode = LIKELY(low_bits == 0) ? kA64Mov2rr : kA64Mvn2rr;
+ res = NewLIR2(opcode, r_dest.GetReg(), rwzr);
+ } else {
+ uint16_t uniform_bits, useful_bits;
+ int shift;
+
+ if (LIKELY(high_fast)) {
+ shift = 0;
+ uniform_bits = high_bits;
+ useful_bits = low_bits;
+ } else {
+ shift = 1;
+ uniform_bits = low_bits;
+ useful_bits = high_bits;
+ }
+
+ if (UNLIKELY(uniform_bits != 0)) {
+ res = NewLIR3(kA64Movn3rdM, r_dest.GetReg(), ~useful_bits, shift);
+ } else {
+ res = NewLIR3(kA64Movz3rdM, r_dest.GetReg(), useful_bits, shift);
+ }
+ }
+ } else {
+ // movk, movz require 2 instructions. Try detecting logical immediates.
+ int log_imm = EncodeLogicalImmediate(/*is_wide=*/false, value);
+ if (log_imm >= 0) {
+ res = NewLIR3(kA64Orr3Rrl, r_dest.GetReg(), rwzr, log_imm);
+ } else {
+ // Use 2 instructions.
+ res = NewLIR3(kA64Movz3rdM, r_dest.GetReg(), low_bits, 0);
+ NewLIR3(kA64Movk3rdM, r_dest.GetReg(), high_bits, 1);
+ }
}
- /* Check Modified immediate special cases */
- mod_imm = ModifiedImmediate(value);
- if (mod_imm >= 0) {
- res = NewLIR2(kThumb2MovI8M, r_dest.GetReg(), mod_imm);
- return res;
- }
- mod_imm = ModifiedImmediate(~value);
- if (mod_imm >= 0) {
- res = NewLIR2(kThumb2MvnI8M, r_dest.GetReg(), mod_imm);
- return res;
- }
- /* 16-bit immediate? */
- if ((value & 0xffff) == value) {
- res = NewLIR2(kThumb2MovImm16, r_dest.GetReg(), value);
- return res;
- }
- /* Do a low/high pair */
- res = NewLIR2(kThumb2MovImm16, r_dest.GetReg(), Low16Bits(value));
- NewLIR2(kThumb2MovImm16H, r_dest.GetReg(), High16Bits(value));
+
return res;
}
LIR* Arm64Mir2Lir::OpUnconditionalBranch(LIR* target) {
- LIR* res = NewLIR1(kThumbBUncond, 0 /* offset to be patched during assembly */);
+ LIR* res = NewLIR1(kA64B1t, 0 /* offset to be patched during assembly */);
res->target = target;
return res;
}
LIR* Arm64Mir2Lir::OpCondBranch(ConditionCode cc, LIR* target) {
- // This is kThumb2BCond instead of kThumbBCond for performance reasons. The assembly
- // time required for a new pass after kThumbBCond is fixed up to kThumb2BCond is
- // substantial.
- LIR* branch = NewLIR2(kThumb2BCond, 0 /* offset to be patched */,
- ArmConditionEncoding(cc));
+ LIR* branch = NewLIR2(kA64B2ct, ArmConditionEncoding(cc),
+ 0 /* offset to be patched */);
branch->target = target;
return branch;
}
LIR* Arm64Mir2Lir::OpReg(OpKind op, RegStorage r_dest_src) {
- ArmOpcode opcode = kThumbBkpt;
+ ArmOpcode opcode = kA64Brk1d;
switch (op) {
case kOpBlx:
- opcode = kThumbBlxR;
+ opcode = kA64Blr1x;
break;
- case kOpBx:
- opcode = kThumbBx;
- break;
+ // TODO(Arm64): port kThumbBx.
+ // case kOpBx:
+ // opcode = kThumbBx;
+ // break;
default:
LOG(FATAL) << "Bad opcode " << op;
}
return NewLIR1(opcode, r_dest_src.GetReg());
}
-LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2,
- int shift) {
- bool thumb_form =
- ((shift == 0) && r_dest_src1.Low8() && r_src2.Low8());
- ArmOpcode opcode = kThumbBkpt;
- switch (op) {
- case kOpAdc:
- opcode = (thumb_form) ? kThumbAdcRR : kThumb2AdcRRR;
- break;
- case kOpAnd:
- opcode = (thumb_form) ? kThumbAndRR : kThumb2AndRRR;
- break;
- case kOpBic:
- opcode = (thumb_form) ? kThumbBicRR : kThumb2BicRRR;
- break;
+LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, int r_dest_src1, int r_src2,
+ int shift, bool is_wide) {
+ ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0);
+ ArmOpcode opcode = kA64Brk1d;
+
+ switch (OP_KIND_UNWIDE(op)) {
case kOpCmn:
- DCHECK_EQ(shift, 0);
- opcode = (thumb_form) ? kThumbCmnRR : kThumb2CmnRR;
+ opcode = kA64Cmn3Rro;
break;
case kOpCmp:
- if (thumb_form)
- opcode = kThumbCmpRR;
- else if ((shift == 0) && !r_dest_src1.Low8() && !r_src2.Low8())
- opcode = kThumbCmpHH;
- else if ((shift == 0) && r_dest_src1.Low8())
- opcode = kThumbCmpLH;
- else if (shift == 0)
- opcode = kThumbCmpHL;
- else
- opcode = kThumb2CmpRR;
- break;
- case kOpXor:
- opcode = (thumb_form) ? kThumbEorRR : kThumb2EorRRR;
+ // TODO(Arm64): check the instruction above: "cmp w0, w1" is rendered as "cmp w0, w1, uxtb".
+ opcode = kA64Cmp3Rro;
break;
case kOpMov:
- DCHECK_EQ(shift, 0);
- if (r_dest_src1.Low8() && r_src2.Low8())
- opcode = kThumbMovRR;
- else if (!r_dest_src1.Low8() && !r_src2.Low8())
- opcode = kThumbMovRR_H2H;
- else if (r_dest_src1.Low8())
- opcode = kThumbMovRR_H2L;
- else
- opcode = kThumbMovRR_L2H;
- break;
- case kOpMul:
- DCHECK_EQ(shift, 0);
- opcode = (thumb_form) ? kThumbMul : kThumb2MulRRR;
+ opcode = kA64Mov2rr;
break;
case kOpMvn:
- opcode = (thumb_form) ? kThumbMvn : kThumb2MnvRR;
+ opcode = kA64Mvn2rr;
break;
case kOpNeg:
- DCHECK_EQ(shift, 0);
- opcode = (thumb_form) ? kThumbNeg : kThumb2NegRR;
- break;
- case kOpOr:
- opcode = (thumb_form) ? kThumbOrr : kThumb2OrrRRR;
- break;
- case kOpSbc:
- opcode = (thumb_form) ? kThumbSbc : kThumb2SbcRRR;
+ opcode = kA64Neg3rro;
break;
case kOpTst:
- opcode = (thumb_form) ? kThumbTst : kThumb2TstRR;
- break;
- case kOpLsl:
- DCHECK_EQ(shift, 0);
- opcode = (thumb_form) ? kThumbLslRR : kThumb2LslRRR;
- break;
- case kOpLsr:
- DCHECK_EQ(shift, 0);
- opcode = (thumb_form) ? kThumbLsrRR : kThumb2LsrRRR;
- break;
- case kOpAsr:
- DCHECK_EQ(shift, 0);
- opcode = (thumb_form) ? kThumbAsrRR : kThumb2AsrRRR;
- break;
- case kOpRor:
- DCHECK_EQ(shift, 0);
- opcode = (thumb_form) ? kThumbRorRR : kThumb2RorRRR;
- break;
- case kOpAdd:
- opcode = (thumb_form) ? kThumbAddRRR : kThumb2AddRRR;
- break;
- case kOpSub:
- opcode = (thumb_form) ? kThumbSubRRR : kThumb2SubRRR;
+ opcode = kA64Tst3rro;
break;
case kOpRev:
DCHECK_EQ(shift, 0);
- if (!thumb_form) {
- // Binary, but rm is encoded twice.
- return NewLIR3(kThumb2RevRR, r_dest_src1.GetReg(), r_src2.GetReg(), r_src2.GetReg());
- }
- opcode = kThumbRev;
+ // Binary, but rm is encoded twice.
+ return NewLIR3(kA64Rev2rr | wide, r_dest_src1, r_src2, r_src2);
break;
case kOpRevsh:
- DCHECK_EQ(shift, 0);
- if (!thumb_form) {
- // Binary, but rm is encoded twice.
- return NewLIR3(kThumb2RevshRR, r_dest_src1.GetReg(), r_src2.GetReg(), r_src2.GetReg());
- }
- opcode = kThumbRevsh;
+ // Binary, but rm is encoded twice.
+ return NewLIR3(kA64Rev162rr | wide, r_dest_src1, r_src2, r_src2);
break;
case kOp2Byte:
- DCHECK_EQ(shift, 0);
- return NewLIR4(kThumb2Sbfx, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 8);
+ DCHECK_EQ(shift, ENCODE_NO_SHIFT);
+ // "sbfx r1, r2, #imm1, #imm2" is "sbfm r1, r2, #imm1, #(imm1 + imm2 - 1)".
+ // For now we use sbfm directly.
+ return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1, r_src2, 0, 7);
case kOp2Short:
- DCHECK_EQ(shift, 0);
- return NewLIR4(kThumb2Sbfx, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 16);
+ DCHECK_EQ(shift, ENCODE_NO_SHIFT);
+ // For now we use sbfm rather than its alias, sbfx.
+ return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1, r_src2, 0, 15);
case kOp2Char:
- DCHECK_EQ(shift, 0);
- return NewLIR4(kThumb2Ubfx, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 16);
+ // "ubfx r1, r2, #imm1, #imm2" is "ubfm r1, r2, #imm1, #(imm1 + imm2 - 1)".
+ // For now we use ubfm directly.
+ DCHECK_EQ(shift, ENCODE_NO_SHIFT);
+ return NewLIR4(kA64Ubfm4rrdd | wide, r_dest_src1, r_src2, 0, 15);
default:
- LOG(FATAL) << "Bad opcode: " << op;
- break;
+ return OpRegRegRegShift(op, r_dest_src1, r_dest_src1, r_src2, shift);
}
+
DCHECK(!IsPseudoLirOp(opcode));
if (EncodingMap[opcode].flags & IS_BINARY_OP) {
- return NewLIR2(opcode, r_dest_src1.GetReg(), r_src2.GetReg());
+ DCHECK_EQ(shift, ENCODE_NO_SHIFT);
+ return NewLIR2(opcode | wide, r_dest_src1, r_src2);
} else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) {
- if (EncodingMap[opcode].field_loc[2].kind == kFmtShift) {
- return NewLIR3(opcode, r_dest_src1.GetReg(), r_src2.GetReg(), shift);
- } else {
- return NewLIR3(opcode, r_dest_src1.GetReg(), r_dest_src1.GetReg(), r_src2.GetReg());
+ ArmEncodingKind kind = EncodingMap[opcode].field_loc[2].kind;
+ if (kind == kFmtExtend || kind == kFmtShift) {
+ DCHECK_EQ(kind == kFmtExtend, IsExtendEncoding(shift));
+ return NewLIR3(opcode | wide, r_dest_src1, r_src2, shift);
}
- } else if (EncodingMap[opcode].flags & IS_QUAD_OP) {
- return NewLIR4(opcode, r_dest_src1.GetReg(), r_dest_src1.GetReg(), r_src2.GetReg(), shift);
- } else {
- LOG(FATAL) << "Unexpected encoding operand count";
- return NULL;
}
+
+ LOG(FATAL) << "Unexpected encoding operand count";
+ return NULL;
}
LIR* Arm64Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) {
- return OpRegRegShift(op, r_dest_src1, r_src2, 0);
+ return OpRegRegShift(op, r_dest_src1.GetReg(), r_src2.GetReg(), ENCODE_NO_SHIFT,
+ r_dest_src1.Is64Bit());
}
LIR* Arm64Mir2Lir::OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type) {
@@ -382,207 +444,162 @@
}
LIR* Arm64Mir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src) {
- LOG(FATAL) << "Unexpected use of OpCondRegReg for Arm";
+ LOG(FATAL) << "Unexpected use of OpCondRegReg for Arm64";
return NULL;
}
-LIR* Arm64Mir2Lir::OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1,
- RegStorage r_src2, int shift) {
- ArmOpcode opcode = kThumbBkpt;
- bool thumb_form = (shift == 0) && r_dest.Low8() && r_src1.Low8() && r_src2.Low8();
- switch (op) {
+LIR* Arm64Mir2Lir::OpRegRegRegShift(OpKind op, int r_dest, int r_src1,
+ int r_src2, int shift, bool is_wide) {
+ ArmOpcode opcode = kA64Brk1d;
+
+ switch (OP_KIND_UNWIDE(op)) {
case kOpAdd:
- opcode = (thumb_form) ? kThumbAddRRR : kThumb2AddRRR;
+ opcode = kA64Add4rrro;
break;
case kOpSub:
- opcode = (thumb_form) ? kThumbSubRRR : kThumb2SubRRR;
+ opcode = kA64Sub4rrro;
break;
- case kOpRsub:
- opcode = kThumb2RsubRRR;
- break;
+ // case kOpRsub:
+ // opcode = kA64RsubWWW;
+ // break;
case kOpAdc:
- opcode = kThumb2AdcRRR;
+ opcode = kA64Adc3rrr;
break;
case kOpAnd:
- opcode = kThumb2AndRRR;
- break;
- case kOpBic:
- opcode = kThumb2BicRRR;
+ opcode = kA64And4rrro;
break;
case kOpXor:
- opcode = kThumb2EorRRR;
+ opcode = kA64Eor4rrro;
break;
case kOpMul:
- DCHECK_EQ(shift, 0);
- opcode = kThumb2MulRRR;
+ opcode = kA64Mul3rrr;
break;
case kOpDiv:
- DCHECK_EQ(shift, 0);
- opcode = kThumb2SdivRRR;
+ opcode = kA64Sdiv3rrr;
break;
case kOpOr:
- opcode = kThumb2OrrRRR;
+ opcode = kA64Orr4rrro;
break;
case kOpSbc:
- opcode = kThumb2SbcRRR;
+ opcode = kA64Sbc3rrr;
break;
case kOpLsl:
- DCHECK_EQ(shift, 0);
- opcode = kThumb2LslRRR;
+ opcode = kA64Lsl3rrr;
break;
case kOpLsr:
- DCHECK_EQ(shift, 0);
- opcode = kThumb2LsrRRR;
+ opcode = kA64Lsr3rrr;
break;
case kOpAsr:
- DCHECK_EQ(shift, 0);
- opcode = kThumb2AsrRRR;
+ opcode = kA64Asr3rrr;
break;
case kOpRor:
- DCHECK_EQ(shift, 0);
- opcode = kThumb2RorRRR;
+ opcode = kA64Ror3rrr;
break;
default:
LOG(FATAL) << "Bad opcode: " << op;
break;
}
- DCHECK(!IsPseudoLirOp(opcode));
+
+ // The instructions above belong to two kinds:
+ // - 4-operands instructions, where the last operand is a shift/extend immediate,
+ // - 3-operands instructions with no shift/extend.
+ ArmOpcode widened_opcode = (is_wide) ? WIDE(opcode) : opcode;
if (EncodingMap[opcode].flags & IS_QUAD_OP) {
- return NewLIR4(opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), shift);
+ DCHECK_EQ(shift, ENCODE_NO_SHIFT);
+ return NewLIR4(widened_opcode, r_dest, r_src1, r_src2, shift);
} else {
DCHECK(EncodingMap[opcode].flags & IS_TERTIARY_OP);
- return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg());
+ DCHECK_EQ(shift, ENCODE_NO_SHIFT);
+ return NewLIR3(widened_opcode, r_dest, r_src1, r_src2);
}
}
LIR* Arm64Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) {
- return OpRegRegRegShift(op, r_dest, r_src1, r_src2, 0);
+ return OpRegRegRegShift(op, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), ENCODE_NO_SHIFT);
}
LIR* Arm64Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) {
LIR* res;
bool neg = (value < 0);
- int32_t abs_value = (neg) ? -value : value;
- ArmOpcode opcode = kThumbBkpt;
- ArmOpcode alt_opcode = kThumbBkpt;
- bool all_low_regs = r_dest.Low8() && r_src1.Low8();
- int32_t mod_imm = ModifiedImmediate(value);
+ int64_t abs_value = (neg) ? -value : value;
+ ArmOpcode opcode = kA64Brk1d;
+ ArmOpcode alt_opcode = kA64Brk1d;
+ int32_t log_imm = -1;
+ bool is_wide = OP_KIND_IS_WIDE(op);
+ ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0);
- switch (op) {
- case kOpLsl:
- if (all_low_regs)
- return NewLIR3(kThumbLslRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
- else
- return NewLIR3(kThumb2LslRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
+ switch (OP_KIND_UNWIDE(op)) {
+ case kOpLsl: {
+ // "lsl w1, w2, #imm" is an alias of "ubfm w1, w2, #(-imm MOD 32), #(31-imm)"
+ // and "lsl x1, x2, #imm" of "ubfm x1, x2, #(-imm MOD 32), #(31-imm)".
+ // For now, we just use ubfm directly.
+ int max_value = (is_wide) ? 64 : 32;
+ return NewLIR4(kA64Ubfm4rrdd | wide, r_dest.GetReg(), r_src1.GetReg(),
+ (-value) & (max_value - 1), max_value - value);
+ }
case kOpLsr:
- if (all_low_regs)
- return NewLIR3(kThumbLsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
- else
- return NewLIR3(kThumb2LsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
+ return NewLIR3(kA64Lsr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value);
case kOpAsr:
- if (all_low_regs)
- return NewLIR3(kThumbAsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
- else
- return NewLIR3(kThumb2AsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
+ return NewLIR3(kA64Asr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value);
case kOpRor:
- return NewLIR3(kThumb2RorRRI5, r_dest.GetReg(), r_src1.GetReg(), value);
+ // "ror r1, r2, #imm" is an alias of "extr r1, r2, r2, #imm".
+ // For now, we just use extr directly.
+ return NewLIR4(kA64Extr4rrrd | wide, r_dest.GetReg(), r_src1.GetReg(), r_src1.GetReg(),
+ value);
case kOpAdd:
- if (r_dest.Low8() && (r_src1 == rs_r13sp) && (value <= 1020) && ((value & 0x3) == 0)) {
- return NewLIR3(kThumbAddSpRel, r_dest.GetReg(), r_src1.GetReg(), value >> 2);
- } else if (r_dest.Low8() && (r_src1 == rs_r15pc) &&
- (value <= 1020) && ((value & 0x3) == 0)) {
- return NewLIR3(kThumbAddPcRel, r_dest.GetReg(), r_src1.GetReg(), value >> 2);
- }
+ neg = !neg;
// Note: intentional fallthrough
case kOpSub:
- if (all_low_regs && ((abs_value & 0x7) == abs_value)) {
- if (op == kOpAdd)
- opcode = (neg) ? kThumbSubRRI3 : kThumbAddRRI3;
- else
- opcode = (neg) ? kThumbAddRRI3 : kThumbSubRRI3;
- return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), abs_value);
- }
- if (mod_imm < 0) {
- mod_imm = ModifiedImmediate(-value);
- if (mod_imm >= 0) {
- op = (op == kOpAdd) ? kOpSub : kOpAdd;
- }
- }
- if (mod_imm < 0 && (abs_value & 0x3ff) == abs_value) {
- // This is deliberately used only if modified immediate encoding is inadequate since
- // we sometimes actually use the flags for small values but not necessarily low regs.
- if (op == kOpAdd)
- opcode = (neg) ? kThumb2SubRRI12 : kThumb2AddRRI12;
- else
- opcode = (neg) ? kThumb2AddRRI12 : kThumb2SubRRI12;
- return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), abs_value);
- }
- if (op == kOpSub) {
- opcode = kThumb2SubRRI8M;
- alt_opcode = kThumb2SubRRR;
+ // Add and sub below read/write sp rather than xzr.
+ if (abs_value < 0x1000) {
+ opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT;
+ return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value, 0);
+ } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) {
+ opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT;
+ return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value >> 12, 1);
} else {
- opcode = kThumb2AddRRI8M;
- alt_opcode = kThumb2AddRRR;
+ log_imm = -1;
+ alt_opcode = (neg) ? kA64Add4rrro : kA64Sub4rrro;
}
break;
- case kOpRsub:
- opcode = kThumb2RsubRRI8M;
- alt_opcode = kThumb2RsubRRR;
- break;
+ // case kOpRsub:
+ // opcode = kThumb2RsubRRI8M;
+ // alt_opcode = kThumb2RsubRRR;
+ // break;
case kOpAdc:
- opcode = kThumb2AdcRRI8M;
- alt_opcode = kThumb2AdcRRR;
+ log_imm = -1;
+ alt_opcode = kA64Adc3rrr;
break;
case kOpSbc:
- opcode = kThumb2SbcRRI8M;
- alt_opcode = kThumb2SbcRRR;
+ log_imm = -1;
+ alt_opcode = kA64Sbc3rrr;
break;
case kOpOr:
- opcode = kThumb2OrrRRI8M;
- alt_opcode = kThumb2OrrRRR;
+ log_imm = EncodeLogicalImmediate(is_wide, value);
+ opcode = kA64Orr3Rrl;
+ alt_opcode = kA64Orr4rrro;
break;
case kOpAnd:
- if (mod_imm < 0) {
- mod_imm = ModifiedImmediate(~value);
- if (mod_imm >= 0) {
- return NewLIR3(kThumb2BicRRI8M, r_dest.GetReg(), r_src1.GetReg(), mod_imm);
- }
- }
- opcode = kThumb2AndRRI8M;
- alt_opcode = kThumb2AndRRR;
+ log_imm = EncodeLogicalImmediate(is_wide, value);
+ opcode = kA64And3Rrl;
+ alt_opcode = kA64And4rrro;
break;
case kOpXor:
- opcode = kThumb2EorRRI8M;
- alt_opcode = kThumb2EorRRR;
+ log_imm = EncodeLogicalImmediate(is_wide, value);
+ opcode = kA64Eor3Rrl;
+ alt_opcode = kA64Eor4rrro;
break;
case kOpMul:
// TUNING: power of 2, shift & add
- mod_imm = -1;
- alt_opcode = kThumb2MulRRR;
+ log_imm = -1;
+ alt_opcode = kA64Mul3rrr;
break;
- case kOpCmp: {
- LIR* res;
- if (mod_imm >= 0) {
- res = NewLIR2(kThumb2CmpRI8M, r_src1.GetReg(), mod_imm);
- } else {
- mod_imm = ModifiedImmediate(-value);
- if (mod_imm >= 0) {
- res = NewLIR2(kThumb2CmnRI8M, r_src1.GetReg(), mod_imm);
- } else {
- RegStorage r_tmp = AllocTemp();
- res = LoadConstant(r_tmp, value);
- OpRegReg(kOpCmp, r_src1, r_tmp);
- FreeTemp(r_tmp);
- }
- }
- return res;
- }
default:
LOG(FATAL) << "Bad opcode: " << op;
}
- if (mod_imm >= 0) {
- return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), mod_imm);
+ if (log_imm >= 0) {
+ return NewLIR3(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), log_imm);
} else {
RegStorage r_scratch = AllocTemp();
LoadConstant(r_scratch, value);
@@ -595,226 +612,209 @@
}
}
-/* Handle Thumb-only variants here - otherwise punt to OpRegRegImm */
LIR* Arm64Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) {
+ return OpRegImm64(op, r_dest_src1, static_cast<int64_t>(value), /*is_wide*/false);
+}
+
+LIR* Arm64Mir2Lir::OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value, bool is_wide) {
+ ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0);
+ ArmOpcode opcode = kA64Brk1d;
+ ArmOpcode neg_opcode = kA64Brk1d;
+ bool shift;
bool neg = (value < 0);
- int32_t abs_value = (neg) ? -value : value;
- bool short_form = (((abs_value & 0xff) == abs_value) && r_dest_src1.Low8());
- ArmOpcode opcode = kThumbBkpt;
- switch (op) {
+ uint64_t abs_value = (neg) ? -value : value;
+
+ if (LIKELY(abs_value < 0x1000)) {
+ // abs_value is a 12-bit immediate.
+ shift = false;
+ } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) {
+ // abs_value is a shifted 12-bit immediate.
+ shift = true;
+ abs_value >>= 12;
+ } else {
+ RegStorage r_tmp = AllocTemp();
+ LIR* res = LoadConstant(r_tmp, value);
+ OpRegReg(op, r_dest_src1, r_tmp);
+ FreeTemp(r_tmp);
+ return res;
+ }
+
+ switch (OP_KIND_UNWIDE(op)) {
case kOpAdd:
- if (!neg && (r_dest_src1 == rs_r13sp) && (value <= 508)) { /* sp */
- DCHECK_EQ((value & 0x3), 0);
- return NewLIR1(kThumbAddSpI7, value >> 2);
- } else if (short_form) {
- opcode = (neg) ? kThumbSubRI8 : kThumbAddRI8;
- }
+ neg_opcode = kA64Sub4RRdT;
+ opcode = kA64Add4RRdT;
break;
case kOpSub:
- if (!neg && (r_dest_src1 == rs_r13sp) && (value <= 508)) { /* sp */
- DCHECK_EQ((value & 0x3), 0);
- return NewLIR1(kThumbSubSpI7, value >> 2);
- } else if (short_form) {
- opcode = (neg) ? kThumbAddRI8 : kThumbSubRI8;
- }
+ neg_opcode = kA64Add4RRdT;
+ opcode = kA64Sub4RRdT;
break;
case kOpCmp:
- if (!neg && short_form) {
- opcode = kThumbCmpRI8;
- } else {
- short_form = false;
- }
+ neg_opcode = kA64Cmn3RdT;
+ opcode = kA64Cmp3RdT;
break;
default:
- /* Punt to OpRegRegImm - if bad case catch it there */
- short_form = false;
+ LOG(FATAL) << "Bad op-kind in OpRegImm: " << op;
break;
}
- if (short_form) {
- return NewLIR2(opcode, r_dest_src1.GetReg(), abs_value);
- } else {
- return OpRegRegImm(op, r_dest_src1, r_dest_src1, value);
- }
+
+ if (UNLIKELY(neg))
+ opcode = neg_opcode;
+
+ if (EncodingMap[opcode].flags & IS_QUAD_OP)
+ return NewLIR4(opcode | wide, r_dest_src1.GetReg(), r_dest_src1.GetReg(), abs_value,
+ (shift) ? 1 : 0);
+ else
+ return NewLIR3(opcode | wide, r_dest_src1.GetReg(), abs_value, (shift) ? 1 : 0);
}
LIR* Arm64Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) {
- LIR* res = NULL;
- int32_t val_lo = Low32Bits(value);
- int32_t val_hi = High32Bits(value);
if (r_dest.IsFloat()) {
- DCHECK(!r_dest.IsPair());
- if ((val_lo == 0) && (val_hi == 0)) {
- // TODO: we need better info about the target CPU. a vector exclusive or
- // would probably be better here if we could rely on its existance.
- // Load an immediate +2.0 (which encodes to 0)
- NewLIR2(kThumb2Vmovd_IMM8, r_dest.GetReg(), 0);
- // +0.0 = +2.0 - +2.0
- res = NewLIR3(kThumb2Vsubd, r_dest.GetReg(), r_dest.GetReg(), r_dest.GetReg());
- } else {
- int encoded_imm = EncodeImmDouble(value);
- if (encoded_imm >= 0) {
- res = NewLIR2(kThumb2Vmovd_IMM8, r_dest.GetReg(), encoded_imm);
- }
- }
+ return LoadFPConstantValueWide(r_dest.GetReg(), value);
} else {
- // NOTE: Arm32 assumption here.
- DCHECK(r_dest.IsPair());
- if ((InexpensiveConstantInt(val_lo) && (InexpensiveConstantInt(val_hi)))) {
- res = LoadConstantNoClobber(r_dest.GetLow(), val_lo);
- LoadConstantNoClobber(r_dest.GetHigh(), val_hi);
- }
- }
- if (res == NULL) {
+ // TODO(Arm64): check whether we can load the immediate with a short form.
+ // e.g. via movz, movk or via logical immediate.
+
// No short form - load from the literal pool.
+ int32_t val_lo = Low32Bits(value);
+ int32_t val_hi = High32Bits(value);
LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
if (data_target == NULL) {
data_target = AddWideData(&literal_list_, val_lo, val_hi);
}
- if (r_dest.IsFloat()) {
- res = RawLIR(current_dalvik_offset_, kThumb2Vldrd,
- r_dest.GetReg(), rs_r15pc.GetReg(), 0, 0, 0, data_target);
- } else {
- DCHECK(r_dest.IsPair());
- res = RawLIR(current_dalvik_offset_, kThumb2LdrdPcRel8,
- r_dest.GetLowReg(), r_dest.GetHighReg(), rs_r15pc.GetReg(), 0, 0, data_target);
- }
+
+ LIR* res = RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp),
+ r_dest.GetReg(), 0, 0, 0, 0, data_target);
SetMemRefType(res, true, kLiteral);
AppendLIR(res);
+ return res;
}
- return res;
}
-int Arm64Mir2Lir::EncodeShift(int code, int amount) {
- return ((amount & 0x1f) << 2) | code;
+int Arm64Mir2Lir::EncodeShift(int shift_type, int amount) {
+ return ((shift_type & 0x3) << 7) | (amount & 0x1f);
+}
+
+int Arm64Mir2Lir::EncodeExtend(int extend_type, int amount) {
+ return (1 << 6) | ((extend_type & 0x7) << 3) | (amount & 0x7);
+}
+
+bool Arm64Mir2Lir::IsExtendEncoding(int encoded_value) {
+ return ((1 << 6) & encoded_value) != 0;
}
LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
- int scale, OpSize size) {
- bool all_low_regs = r_base.Low8() && r_index.Low8() && r_dest.Low8();
+ int scale, OpSize size) {
LIR* load;
- ArmOpcode opcode = kThumbBkpt;
- bool thumb_form = (all_low_regs && (scale == 0));
- RegStorage reg_ptr;
+ ArmOpcode opcode = kA64Brk1d;
+ ArmOpcode wide = kA64NotWide;
+
+ DCHECK(scale == 0 || scale == 1);
if (r_dest.IsFloat()) {
- if (r_dest.IsSingle()) {
- DCHECK((size == k32) || (size == kSingle) || (size == kReference));
- opcode = kThumb2Vldrs;
- size = kSingle;
- } else {
- DCHECK(r_dest.IsDouble());
- DCHECK((size == k64) || (size == kDouble));
- opcode = kThumb2Vldrd;
- size = kDouble;
- }
- } else {
- if (size == kSingle)
- size = k32;
+ bool is_double = r_dest.IsDouble();
+ bool is_single = !is_double;
+ DCHECK_EQ(is_single, r_dest.IsSingle());
+
+ // If r_dest is a single, then size must be either k32 or kSingle.
+ // If r_dest is a double, then size must be either k64 or kDouble.
+ DCHECK(!is_single || size == k32 || size == kSingle);
+ DCHECK(!is_double || size == k64 || size == kDouble);
+ return NewLIR4((is_double) ? FWIDE(kA64Ldr4fXxG) : kA64Ldr4fXxG,
+ r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), scale);
}
switch (size) {
- case kDouble: // fall-through
- // Intentional fall-though.
+ case kDouble:
+ case kWord:
+ case k64:
+ wide = kA64Wide;
+ // Intentional fall-trough.
case kSingle:
- reg_ptr = AllocTemp();
- if (scale) {
- NewLIR4(kThumb2AddRRR, reg_ptr.GetReg(), r_base.GetReg(), r_index.GetReg(),
- EncodeShift(kArmLsl, scale));
- } else {
- OpRegRegReg(kOpAdd, reg_ptr, r_base, r_index);
- }
- load = NewLIR3(opcode, r_dest.GetReg(), reg_ptr.GetReg(), 0);
- FreeTemp(reg_ptr);
- return load;
case k32:
- // Intentional fall-though.
case kReference:
- opcode = (thumb_form) ? kThumbLdrRRR : kThumb2LdrRRR;
+ opcode = kA64Ldr4rXxG;
break;
case kUnsignedHalf:
- opcode = (thumb_form) ? kThumbLdrhRRR : kThumb2LdrhRRR;
+ opcode = kA64Ldrh4wXxd;
break;
case kSignedHalf:
- opcode = (thumb_form) ? kThumbLdrshRRR : kThumb2LdrshRRR;
+ opcode = kA64Ldrsh4rXxd;
break;
case kUnsignedByte:
- opcode = (thumb_form) ? kThumbLdrbRRR : kThumb2LdrbRRR;
+ opcode = kA64Ldrb3wXx;
break;
case kSignedByte:
- opcode = (thumb_form) ? kThumbLdrsbRRR : kThumb2LdrsbRRR;
+ opcode = kA64Ldrsb3rXx;
break;
default:
LOG(FATAL) << "Bad size: " << size;
}
- if (thumb_form)
- load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg());
- else
- load = NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), scale);
+
+ if (UNLIKELY((EncodingMap[opcode].flags & IS_TERTIARY_OP) != 0)) {
+ // Tertiary ops (e.g. ldrb, ldrsb) do not support scale.
+ DCHECK_EQ(scale, 0);
+ load = NewLIR3(opcode | wide, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg());
+ } else {
+ DCHECK(scale == 0 || scale == ((wide == kA64Wide) ? 3 : 2));
+ load = NewLIR4(opcode | wide, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(),
+ (scale != 0) ? 1 : 0);
+ }
return load;
}
LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
- int scale, OpSize size) {
- bool all_low_regs = r_base.Low8() && r_index.Low8() && r_src.Low8();
- LIR* store = NULL;
- ArmOpcode opcode = kThumbBkpt;
- bool thumb_form = (all_low_regs && (scale == 0));
- RegStorage reg_ptr;
+ int scale, OpSize size) {
+ LIR* store;
+ ArmOpcode opcode = kA64Brk1d;
+ ArmOpcode wide = kA64NotWide;
+
+ DCHECK(scale == 0 || scale == 1);
if (r_src.IsFloat()) {
- if (r_src.IsSingle()) {
- DCHECK((size == k32) || (size == kSingle) || (size == kReference));
- opcode = kThumb2Vstrs;
- size = kSingle;
- } else {
- DCHECK(r_src.IsDouble());
- DCHECK((size == k64) || (size == kDouble));
- DCHECK_EQ((r_src.GetReg() & 0x1), 0);
- opcode = kThumb2Vstrd;
- size = kDouble;
- }
- } else {
- if (size == kSingle)
- size = k32;
+ bool is_double = r_src.IsDouble();
+ bool is_single = !is_double;
+ DCHECK_EQ(is_single, r_src.IsSingle());
+
+ // If r_src is a single, then size must be either k32 or kSingle.
+ // If r_src is a double, then size must be either k64 or kDouble.
+ DCHECK(!is_single || size == k32 || size == kSingle);
+ DCHECK(!is_double || size == k64 || size == kDouble);
+ return NewLIR4((is_double) ? FWIDE(kA64Str4fXxG) : kA64Str4fXxG,
+ r_src.GetReg(), r_base.GetReg(), r_index.GetReg(), scale);
}
switch (size) {
- case kDouble: // fall-through
- // Intentional fall-though.
- case kSingle:
- reg_ptr = AllocTemp();
- if (scale) {
- NewLIR4(kThumb2AddRRR, reg_ptr.GetReg(), r_base.GetReg(), r_index.GetReg(),
- EncodeShift(kArmLsl, scale));
- } else {
- OpRegRegReg(kOpAdd, reg_ptr, r_base, r_index);
- }
- store = NewLIR3(opcode, r_src.GetReg(), reg_ptr.GetReg(), 0);
- FreeTemp(reg_ptr);
- return store;
- case k32:
- // Intentional fall-though.
+ case kDouble: // Intentional fall-trough.
+ case kWord: // Intentional fall-trough.
+ case k64:
+ opcode = kA64Str4rXxG;
+ wide = kA64Wide;
+ break;
+ case kSingle: // Intentional fall-trough.
+ case k32: // Intentional fall-trough.
case kReference:
- opcode = (thumb_form) ? kThumbStrRRR : kThumb2StrRRR;
+ opcode = kA64Str4rXxG;
break;
case kUnsignedHalf:
- // Intentional fall-though.
case kSignedHalf:
- opcode = (thumb_form) ? kThumbStrhRRR : kThumb2StrhRRR;
+ opcode = kA64Strh4wXxd;
break;
case kUnsignedByte:
- // Intentional fall-though.
case kSignedByte:
- opcode = (thumb_form) ? kThumbStrbRRR : kThumb2StrbRRR;
+ opcode = kA64Strb3wXx;
break;
default:
LOG(FATAL) << "Bad size: " << size;
}
- if (thumb_form)
- store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg());
- else
+
+ if (UNLIKELY((EncodingMap[opcode].flags & IS_TERTIARY_OP) != 0)) {
+ // Tertiary ops (e.g. strb) do not support scale.
+ DCHECK_EQ(scale, 0);
+ store = NewLIR3(opcode | wide, r_src.GetReg(), r_base.GetReg(), r_index.GetReg());
+ } else {
store = NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(), scale);
+ }
return store;
}
@@ -825,313 +825,247 @@
* performing null check, incoming MIR can be null.
*/
LIR* Arm64Mir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest,
- OpSize size, int s_reg) {
+ OpSize size) {
LIR* load = NULL;
- ArmOpcode opcode = kThumbBkpt;
+ ArmOpcode opcode = kA64Brk1d;
bool short_form = false;
- bool thumb2Form = (displacement < 4092 && displacement >= 0);
- bool all_low = r_dest.Is32Bit() && r_base.Low8() && r_dest.Low8();
int encoded_disp = displacement;
- bool already_generated = false;
- bool null_pointer_safepoint = false;
switch (size) {
- case kDouble:
- // Intentional fall-though.
+ case kDouble: // Intentional fall-through.
+ case kWord: // Intentional fall-through.
case k64:
+ DCHECK_EQ(encoded_disp & 0x3, 0);
if (r_dest.IsFloat()) {
- DCHECK(!r_dest.IsPair());
- opcode = kThumb2Vldrd;
- if (displacement <= 1020) {
+ // Currently double values may be misaligned.
+ if ((displacement & 0x7) == 0 && displacement >= 0 && displacement <= 32760) {
+ // Can use scaled load.
+ opcode = FWIDE(kA64Ldr3fXD);
+ encoded_disp >>= 3;
short_form = true;
- encoded_disp >>= 2;
+ } else if (IS_SIGNED_IMM9(displacement)) {
+ // Can use unscaled load.
+ opcode = FWIDE(kA64Ldur3fXd);
+ short_form = true;
+ } else {
+ short_form = false;
}
} else {
- if (displacement <= 1020) {
- load = NewLIR4(kThumb2LdrdI8, r_dest.GetLowReg(), r_dest.GetHighReg(), r_base.GetReg(),
- displacement >> 2);
- } else {
- load = LoadBaseDispBody(r_base, displacement, r_dest.GetLow(), k32, s_reg);
- null_pointer_safepoint = true;
- LoadBaseDispBody(r_base, displacement + 4, r_dest.GetHigh(), k32, INVALID_SREG);
- }
- already_generated = true;
+ // Currently long values may be misaligned.
+ if ((displacement & 0x7) == 0 && displacement >= 0 && displacement <= 32760) {
+ // Can use scaled store.
+ opcode = FWIDE(kA64Ldr3rXD);
+ encoded_disp >>= 3;
+ short_form = true;
+ } else if (IS_SIGNED_IMM9(displacement)) {
+ // Can use unscaled store.
+ opcode = FWIDE(kA64Ldur3rXd);
+ short_form = true;
+ } // else: use long sequence (short_form = false).
}
break;
- case kSingle:
- // Intentional fall-though.
- case k32:
- // Intentional fall-though.
+ case kSingle: // Intentional fall-through.
+ case k32: // Intentional fall-trough.
case kReference:
if (r_dest.IsFloat()) {
- opcode = kThumb2Vldrs;
+ opcode = kA64Ldr3fXD;
if (displacement <= 1020) {
short_form = true;
encoded_disp >>= 2;
}
break;
}
- if (r_dest.Low8() && (r_base == rs_rARM_PC) && (displacement <= 1020) &&
- (displacement >= 0)) {
- short_form = true;
- encoded_disp >>= 2;
- opcode = kThumbLdrPcRel;
- } else if (r_dest.Low8() && (r_base == rs_rARM_SP) && (displacement <= 1020) &&
- (displacement >= 0)) {
- short_form = true;
- encoded_disp >>= 2;
- opcode = kThumbLdrSpRel;
- } else if (all_low && displacement < 128 && displacement >= 0) {
+ if (displacement <= 16380 && displacement >= 0) {
DCHECK_EQ((displacement & 0x3), 0);
short_form = true;
encoded_disp >>= 2;
- opcode = kThumbLdrRRI5;
- } else if (thumb2Form) {
- short_form = true;
- opcode = kThumb2LdrRRI12;
+ opcode = kA64Ldr3rXD;
}
break;
case kUnsignedHalf:
- if (all_low && displacement < 64 && displacement >= 0) {
+ if (displacement < 64 && displacement >= 0) {
DCHECK_EQ((displacement & 0x1), 0);
short_form = true;
encoded_disp >>= 1;
- opcode = kThumbLdrhRRI5;
+ opcode = kA64Ldrh3wXF;
} else if (displacement < 4092 && displacement >= 0) {
short_form = true;
- opcode = kThumb2LdrhRRI12;
+ opcode = kA64Ldrh3wXF;
}
break;
case kSignedHalf:
- if (thumb2Form) {
- short_form = true;
- opcode = kThumb2LdrshRRI12;
- }
+ short_form = true;
+ opcode = kA64Ldrsh3rXF;
break;
case kUnsignedByte:
- if (all_low && displacement < 32 && displacement >= 0) {
- short_form = true;
- opcode = kThumbLdrbRRI5;
- } else if (thumb2Form) {
- short_form = true;
- opcode = kThumb2LdrbRRI12;
- }
+ short_form = true;
+ opcode = kA64Ldrb3wXd;
break;
case kSignedByte:
- if (thumb2Form) {
- short_form = true;
- opcode = kThumb2LdrsbRRI12;
- }
+ short_form = true;
+ opcode = kA64Ldrsb3rXd;
break;
default:
LOG(FATAL) << "Bad size: " << size;
}
- if (!already_generated) {
- if (short_form) {
- load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), encoded_disp);
+ if (short_form) {
+ load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), encoded_disp);
+ } else {
+ RegStorage reg_offset = AllocTemp();
+ LoadConstant(reg_offset, encoded_disp);
+ if (r_dest.IsFloat()) {
+ // No index ops - must use a long sequence. Turn the offset into a direct pointer.
+ OpRegReg(kOpAdd, reg_offset, r_base);
+ load = LoadBaseDispBody(reg_offset, 0, r_dest, size);
} else {
- RegStorage reg_offset = AllocTemp();
- LoadConstant(reg_offset, encoded_disp);
- if (r_dest.IsFloat()) {
- // No index ops - must use a long sequence. Turn the offset into a direct pointer.
- OpRegReg(kOpAdd, reg_offset, r_base);
- load = LoadBaseDispBody(reg_offset, 0, r_dest, size, s_reg);
- } else {
- load = LoadBaseIndexed(r_base, reg_offset, r_dest, 0, size);
- }
- FreeTemp(reg_offset);
+ load = LoadBaseIndexed(r_base, reg_offset, r_dest, 0, size);
}
+ FreeTemp(reg_offset);
}
// TODO: in future may need to differentiate Dalvik accesses w/ spills
- if (r_base == rs_rARM_SP) {
+ if (r_base == rs_rA64_SP) {
AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit());
- } else {
- // We might need to generate a safepoint if we have two store instructions (wide or double).
- if (!Runtime::Current()->ExplicitNullChecks() && null_pointer_safepoint) {
- MarkSafepointPC(load);
- }
}
return load;
}
-LIR* Arm64Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size,
- int s_reg) {
- // TODO: base this on target.
- if (size == kWord) {
- size = k32;
- }
- return LoadBaseDispBody(r_base, displacement, r_dest, size, s_reg);
+LIR* Arm64Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
+ OpSize size) {
+ return LoadBaseDispBody(r_base, displacement, r_dest, size);
}
LIR* Arm64Mir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src,
- OpSize size) {
+ OpSize size) {
LIR* store = NULL;
- ArmOpcode opcode = kThumbBkpt;
+ ArmOpcode opcode = kA64Brk1d;
bool short_form = false;
- bool thumb2Form = (displacement < 4092 && displacement >= 0);
- bool all_low = r_src.Is32Bit() && r_base.Low8() && r_src.Low8();
int encoded_disp = displacement;
- bool already_generated = false;
- bool null_pointer_safepoint = false;
switch (size) {
+ case kDouble: // Intentional fall-through.
+ case kWord: // Intentional fall-through.
case k64:
- case kDouble:
- if (!r_src.IsFloat()) {
- if (displacement <= 1020) {
- store = NewLIR4(kThumb2StrdI8, r_src.GetLowReg(), r_src.GetHighReg(), r_base.GetReg(),
- displacement >> 2);
- } else {
- store = StoreBaseDispBody(r_base, displacement, r_src.GetLow(), k32);
- null_pointer_safepoint = true;
- StoreBaseDispBody(r_base, displacement + 4, r_src.GetHigh(), k32);
- }
- already_generated = true;
- } else {
- DCHECK(!r_src.IsPair());
- opcode = kThumb2Vstrd;
- if (displacement <= 1020) {
+ DCHECK_EQ(encoded_disp & 0x3, 0);
+ if (r_src.IsFloat()) {
+ // Currently double values may be misaligned.
+ if ((displacement & 0x7) == 0 && displacement >= 0 && displacement <= 32760) {
+ // Can use scaled store.
+ opcode = FWIDE(kA64Str3fXD);
+ encoded_disp >>= 3;
short_form = true;
- encoded_disp >>= 2;
- }
+ } else if (IS_SIGNED_IMM9(displacement)) {
+ // Can use unscaled store.
+ opcode = FWIDE(kA64Stur3fXd);
+ short_form = true;
+ } // else: use long sequence (short_form = false).
+ } else {
+ // Currently long values may be misaligned.
+ if ((displacement & 0x7) == 0 && displacement >= 0 && displacement <= 32760) {
+ // Can use scaled store.
+ opcode = FWIDE(kA64Str3rXD);
+ encoded_disp >>= 3;
+ short_form = true;
+ } else if (IS_SIGNED_IMM9(displacement)) {
+ // Can use unscaled store.
+ opcode = FWIDE(kA64Stur3rXd);
+ short_form = true;
+ } // else: use long sequence (short_form = false).
}
break;
- case kSingle:
- // Intentional fall-through.
- case k32:
- // Intentional fall-through.
+ case kSingle: // Intentional fall-through.
+ case k32: // Intentional fall-trough.
case kReference:
if (r_src.IsFloat()) {
DCHECK(r_src.IsSingle());
- opcode = kThumb2Vstrs;
+ DCHECK_EQ(encoded_disp & 0x3, 0);
+ opcode = kA64Str3fXD;
if (displacement <= 1020) {
short_form = true;
encoded_disp >>= 2;
}
break;
}
- if (r_src.Low8() && (r_base == rs_r13sp) && (displacement <= 1020) && (displacement >= 0)) {
- short_form = true;
- encoded_disp >>= 2;
- opcode = kThumbStrSpRel;
- } else if (all_low && displacement < 128 && displacement >= 0) {
+
+ if (displacement <= 16380 && displacement >= 0) {
DCHECK_EQ((displacement & 0x3), 0);
short_form = true;
encoded_disp >>= 2;
- opcode = kThumbStrRRI5;
- } else if (thumb2Form) {
- short_form = true;
- opcode = kThumb2StrRRI12;
+ opcode = kA64Str3rXD;
}
break;
case kUnsignedHalf:
case kSignedHalf:
- if (all_low && displacement < 64 && displacement >= 0) {
- DCHECK_EQ((displacement & 0x1), 0);
- short_form = true;
- encoded_disp >>= 1;
- opcode = kThumbStrhRRI5;
- } else if (thumb2Form) {
- short_form = true;
- opcode = kThumb2StrhRRI12;
- }
+ DCHECK_EQ((displacement & 0x1), 0);
+ short_form = true;
+ encoded_disp >>= 1;
+ opcode = kA64Strh3wXF;
break;
case kUnsignedByte:
case kSignedByte:
- if (all_low && displacement < 32 && displacement >= 0) {
- short_form = true;
- opcode = kThumbStrbRRI5;
- } else if (thumb2Form) {
- short_form = true;
- opcode = kThumb2StrbRRI12;
- }
+ short_form = true;
+ opcode = kA64Strb3wXd;
break;
default:
LOG(FATAL) << "Bad size: " << size;
}
- if (!already_generated) {
- if (short_form) {
- store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), encoded_disp);
+
+ if (short_form) {
+ store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), encoded_disp);
+ } else {
+ RegStorage r_scratch = AllocTemp();
+ LoadConstant(r_scratch, encoded_disp);
+ if (r_src.IsFloat()) {
+ // No index ops - must use a long sequence. Turn the offset into a direct pointer.
+ OpRegReg(kOpAdd, r_scratch, r_base);
+ store = StoreBaseDispBody(r_scratch, 0, r_src, size);
} else {
- RegStorage r_scratch = AllocTemp();
- LoadConstant(r_scratch, encoded_disp);
- if (r_src.IsFloat()) {
- // No index ops - must use a long sequence. Turn the offset into a direct pointer.
- OpRegReg(kOpAdd, r_scratch, r_base);
- store = StoreBaseDispBody(r_scratch, 0, r_src, size);
- } else {
- store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size);
- }
- FreeTemp(r_scratch);
+ store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size);
}
+ FreeTemp(r_scratch);
}
// TODO: In future, may need to differentiate Dalvik & spill accesses
- if (r_base == rs_rARM_SP) {
+ if (r_base == rs_rA64_SP) {
AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit());
- } else {
- // We might need to generate a safepoint if we have two store instructions (wide or double).
- if (!Runtime::Current()->ExplicitNullChecks() && null_pointer_safepoint) {
- MarkSafepointPC(store);
- }
}
return store;
}
LIR* Arm64Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
OpSize size) {
- // TODO: base this on target.
- if (size == kWord) {
- size = k32;
- }
return StoreBaseDispBody(r_base, displacement, r_src, size);
}
LIR* Arm64Mir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) {
- int opcode;
- DCHECK_EQ(r_dest.IsDouble(), r_src.IsDouble());
- if (r_dest.IsDouble()) {
- opcode = kThumb2Vmovd;
- } else {
- if (r_dest.IsSingle()) {
- opcode = r_src.IsSingle() ? kThumb2Vmovs : kThumb2Fmsr;
- } else {
- DCHECK(r_src.IsSingle());
- opcode = kThumb2Fmrs;
- }
- }
- LIR* res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg());
- if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
- res->flags.is_nop = true;
- }
- return res;
+ LOG(FATAL) << "Unexpected use of OpFpRegCopy for Arm64";
+ return NULL;
}
-LIR* Arm64Mir2Lir::OpThreadMem(OpKind op, ThreadOffset<4> thread_offset) {
- LOG(FATAL) << "Unexpected use of OpThreadMem for Arm";
+LIR* Arm64Mir2Lir::OpThreadMem(OpKind op, A64ThreadOffset thread_offset) {
+ LOG(FATAL) << "Unexpected use of OpThreadMem for Arm64";
return NULL;
}
LIR* Arm64Mir2Lir::OpMem(OpKind op, RegStorage r_base, int disp) {
- LOG(FATAL) << "Unexpected use of OpMem for Arm";
+ LOG(FATAL) << "Unexpected use of OpMem for Arm64";
return NULL;
}
LIR* Arm64Mir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
- int displacement, RegStorage r_src, OpSize size, int s_reg) {
- LOG(FATAL) << "Unexpected use of StoreBaseIndexedDisp for Arm";
+ int displacement, RegStorage r_src, OpSize size) {
+ LOG(FATAL) << "Unexpected use of StoreBaseIndexedDisp for Arm64";
return NULL;
}
LIR* Arm64Mir2Lir::OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset) {
- LOG(FATAL) << "Unexpected use of OpRegMem for Arm";
+ LOG(FATAL) << "Unexpected use of OpRegMem for Arm64";
return NULL;
}
LIR* Arm64Mir2Lir::LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
- int displacement, RegStorage r_dest, OpSize size, int s_reg) {
- LOG(FATAL) << "Unexpected use of LoadBaseIndexedDisp for Arm";
+ int displacement, RegStorage r_dest, OpSize size) {
+ LOG(FATAL) << "Unexpected use of LoadBaseIndexedDisp for Arm64";
return NULL;
}
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index fbf8a0c..784dfaf 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -497,6 +497,7 @@
case kX86_64:
bx_offset = 0;
break;
+ case kArm64:
case kMips:
bx_offset = tab_rec->anchor->offset;
break;
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 395cff7..83d5045 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -642,7 +642,7 @@
RegLocation rl_result = EvalLoc(rl_dest, result_reg_kind, true);
OpSize size = LoadStoreOpSize(is_long_or_double, rl_result.ref);
- LoadBaseDisp(r_base, field_info.FieldOffset().Int32Value(), rl_result.reg, size, INVALID_SREG);
+ LoadBaseDisp(r_base, field_info.FieldOffset().Int32Value(), rl_result.reg, size);
FreeTemp(r_base);
if (field_info.IsVolatile()) {
@@ -704,8 +704,7 @@
result_reg_kind = kFPReg;
}
rl_result = EvalLoc(rl_dest, result_reg_kind, true);
- LoadBaseDisp(rl_obj.reg, field_info.FieldOffset().Int32Value(), rl_result.reg,
- size, rl_obj.s_reg_low);
+ LoadBaseDisp(rl_obj.reg, field_info.FieldOffset().Int32Value(), rl_result.reg, size);
MarkPossibleNullPointerException(opt_flags);
if (field_info.IsVolatile()) {
// Without context sensitive analysis, we must issue the most conservative barriers.
@@ -717,7 +716,7 @@
RegStorage reg_ptr = AllocTemp();
OpRegRegImm(kOpAdd, reg_ptr, rl_obj.reg, field_info.FieldOffset().Int32Value());
rl_result = EvalLoc(rl_dest, reg_class, true);
- LoadBaseDisp(reg_ptr, 0, rl_result.reg, size, INVALID_SREG);
+ LoadBaseDisp(reg_ptr, 0, rl_result.reg, size);
MarkPossibleNullPointerException(opt_flags);
if (field_info.IsVolatile()) {
// Without context sensitive analysis, we must issue the most conservative barriers.
@@ -731,8 +730,7 @@
} else {
rl_result = EvalLoc(rl_dest, reg_class, true);
GenNullCheck(rl_obj.reg, opt_flags);
- LoadBaseDisp(rl_obj.reg, field_info.FieldOffset().Int32Value(), rl_result.reg, k32,
- rl_obj.s_reg_low);
+ LoadBaseDisp(rl_obj.reg, field_info.FieldOffset().Int32Value(), rl_result.reg, k32);
MarkPossibleNullPointerException(opt_flags);
if (field_info.IsVolatile()) {
// Without context sensitive analysis, we must issue the most conservative barriers.
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 960ac10..d321b00 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -346,7 +346,7 @@
/*
* If there are any ins passed in registers that have not been promoted
- * to a callee-save register, flush them to the frame. Perform intial
+ * to a callee-save register, flush them to the frame. Perform initial
* assignment of promoted arguments.
*
* ArgLocs is an array of location records describing the incoming arguments
@@ -1133,8 +1133,7 @@
if (cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64) {
LoadBaseIndexed(reg_ptr, reg_off, rl_result.reg, 1, kUnsignedHalf);
} else {
- LoadBaseIndexedDisp(reg_ptr, reg_off, 1, data_offset, rl_result.reg, kUnsignedHalf,
- INVALID_SREG);
+ LoadBaseIndexedDisp(reg_ptr, reg_off, 1, data_offset, rl_result.reg, kUnsignedHalf);
}
FreeTemp(reg_off);
FreeTemp(reg_ptr);
@@ -1429,11 +1428,11 @@
RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
if (is_long) {
if (cu_->instruction_set == kX86) {
- LoadBaseIndexedDisp(rl_object.reg, rl_offset.reg, 0, 0, rl_result.reg, k64, INVALID_SREG);
+ LoadBaseIndexedDisp(rl_object.reg, rl_offset.reg, 0, 0, rl_result.reg, k64);
} else {
RegStorage rl_temp_offset = AllocTemp();
OpRegRegReg(kOpAdd, rl_temp_offset, rl_object.reg, rl_offset.reg);
- LoadBaseDisp(rl_temp_offset, 0, rl_result.reg, k64, INVALID_SREG);
+ LoadBaseDisp(rl_temp_offset, 0, rl_result.reg, k64);
FreeTemp(rl_temp_offset);
}
} else {
@@ -1476,7 +1475,7 @@
if (is_long) {
rl_value = LoadValueWide(rl_src_value, kCoreReg);
if (cu_->instruction_set == kX86) {
- StoreBaseIndexedDisp(rl_object.reg, rl_offset.reg, 0, 0, rl_value.reg, k64, INVALID_SREG);
+ StoreBaseIndexedDisp(rl_object.reg, rl_offset.reg, 0, 0, rl_value.reg, k64);
} else {
RegStorage rl_temp_offset = AllocTemp();
OpRegRegReg(kOpAdd, rl_temp_offset, rl_object.reg, rl_offset.reg);
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index 6fe1e31..fc6af29 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -123,7 +123,7 @@
} else {
DCHECK((rl_src.location == kLocDalvikFrame) ||
(rl_src.location == kLocCompilerTemp));
- LoadBaseDisp(TargetReg(kSp), SRegOffset(rl_src.s_reg_low), r_dest, k64, INVALID_SREG);
+ LoadBaseDisp(TargetReg(kSp), SRegOffset(rl_src.s_reg_low), r_dest, k64);
}
}
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index cdabf8e..20fd4b1 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -32,20 +32,20 @@
bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
LIR* CheckSuspendUsingLoad() OVERRIDE;
RegStorage LoadHelper(ThreadOffset<4> offset);
- LIR* LoadBaseDisp(int r_base, int displacement, int r_dest, OpSize size, int s_reg);
- LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size,
- int s_reg);
+ LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
+ OpSize size) OVERRIDE;
LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale,
- OpSize size);
+ OpSize size) OVERRIDE;
LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
- RegStorage r_dest, OpSize size, int s_reg);
+ RegStorage r_dest, OpSize size) OVERRIDE;
LIR* LoadConstantNoClobber(RegStorage r_dest, int value);
LIR* LoadConstantWide(RegStorage r_dest, int64_t value);
- LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, OpSize size);
+ LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
+ OpSize size) OVERRIDE;
LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale,
- OpSize size);
+ OpSize size) OVERRIDE;
LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
- RegStorage r_src, OpSize size, int s_reg);
+ RegStorage r_src, OpSize size) OVERRIDE;
void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
// Required for target - register utilities.
@@ -171,7 +171,7 @@
// TODO: collapse r_dest.
LIR* LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest,
- RegStorage r_dest_hi, OpSize size, int s_reg);
+ RegStorage r_dest_hi, OpSize size);
// TODO: collapse r_src.
LIR* StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src,
RegStorage r_src_hi, OpSize size);
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index fe2e495..fdfe7fe 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -290,7 +290,7 @@
RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
DCHECK(size == kSignedByte);
- LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, INVALID_SREG);
+ LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size);
StoreValue(rl_dest, rl_result);
return true;
}
@@ -511,7 +511,7 @@
GenArrayBoundsCheck(rl_index.reg, reg_len);
FreeTemp(reg_len);
}
- LoadBaseDisp(reg_ptr, 0, rl_result.reg, size, INVALID_SREG);
+ LoadBaseDisp(reg_ptr, 0, rl_result.reg, size);
FreeTemp(reg_ptr);
StoreValueWide(rl_dest, rl_result);
diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc
index 9aa929c..8397411 100644
--- a/compiler/dex/quick/mips/utility_mips.cc
+++ b/compiler/dex/quick/mips/utility_mips.cc
@@ -448,7 +448,7 @@
// FIXME: don't split r_dest into 2 containers.
LIR* MipsMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest,
- RegStorage r_dest_hi, OpSize size, int s_reg) {
+ RegStorage r_dest_hi, OpSize size) {
/*
* Load value from base + displacement. Optionally perform null check
* on base (which must have an associated s_reg and MIR). If not
@@ -546,16 +546,15 @@
}
LIR* MipsMir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
- OpSize size, int s_reg) {
+ OpSize size) {
// TODO: base this on target.
if (size == kWord) {
size = k32;
}
if (size == k64 || size == kDouble) {
- return LoadBaseDispBody(r_base, displacement, r_dest.GetLow(), r_dest.GetHigh(), k64, s_reg);
+ return LoadBaseDispBody(r_base, displacement, r_dest.GetLow(), r_dest.GetHigh(), size);
} else {
- return LoadBaseDispBody(r_base, displacement, r_dest, RegStorage::InvalidReg(), size,
- s_reg);
+ return LoadBaseDispBody(r_base, displacement, r_dest, RegStorage::InvalidReg(), size);
}
}
@@ -665,7 +664,7 @@
}
LIR* MipsMir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
- int displacement, RegStorage r_src, OpSize size, int s_reg) {
+ int displacement, RegStorage r_src, OpSize size) {
LOG(FATAL) << "Unexpected use of StoreBaseIndexedDisp for MIPS";
return NULL;
}
@@ -676,7 +675,7 @@
}
LIR* MipsMir2Lir::LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
- int displacement, RegStorage r_dest, OpSize size, int s_reg) {
+ int displacement, RegStorage r_dest, OpSize size) {
LOG(FATAL) << "Unexpected use of LoadBaseIndexedDisp for MIPS";
return NULL;
}
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 9915ff6..d10296f 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -59,7 +59,7 @@
RegStorage new_regs = AllocTypedTempWide(false, kAnyReg);
reg_arg_low = new_regs.GetLow();
reg_arg_high = new_regs.GetHigh();
- LoadBaseDisp(TargetReg(kSp), offset, new_regs, k64, INVALID_SREG);
+ LoadBaseDisp(TargetReg(kSp), offset, new_regs, k64);
} else {
reg_arg_high = AllocTemp();
int offset_high = offset + sizeof(uint32_t);
@@ -112,7 +112,7 @@
OpRegCopy(rl_dest.reg.GetHigh(), reg_arg_high);
Load32Disp(TargetReg(kSp), offset, rl_dest.reg.GetLow());
} else {
- LoadBaseDisp(TargetReg(kSp), offset, rl_dest.reg, k64, INVALID_SREG);
+ LoadBaseDisp(TargetReg(kSp), offset, rl_dest.reg, k64);
}
}
}
@@ -137,7 +137,7 @@
LockArg(data.object_arg);
RegLocation rl_dest = wide ? GetReturnWide(double_or_float) : GetReturn(double_or_float);
RegStorage reg_obj = LoadArg(data.object_arg);
- LoadBaseDisp(reg_obj, data.field_offset, rl_dest.reg, size, INVALID_SREG);
+ LoadBaseDisp(reg_obj, data.field_offset, rl_dest.reg, size);
if (data.is_volatile) {
// Without context sensitive analysis, we must issue the most conservative barriers.
// In this case, either a load or store may follow so we issue both barriers.
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index cc6532c..4b1de4b 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -91,6 +91,7 @@
// Common combo register usage patterns.
#define REG_DEF01 (REG_DEF0 | REG_DEF1)
+#define REG_DEF012 (REG_DEF0 | REG_DEF1 | REG_DEF2)
#define REG_DEF01_USE2 (REG_DEF0 | REG_DEF1 | REG_USE2)
#define REG_DEF0_USE01 (REG_DEF0 | REG_USE01)
#define REG_DEF0_USE0 (REG_DEF0 | REG_USE0)
@@ -167,6 +168,8 @@
// Target-specific initialization.
Mir2Lir* ArmCodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
ArenaAllocator* const arena);
+Mir2Lir* Arm64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
+ ArenaAllocator* const arena);
Mir2Lir* MipsCodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
ArenaAllocator* const arena);
Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
@@ -783,7 +786,7 @@
bool safepoint_pc);
void GenInvoke(CallInfo* info);
void GenInvokeNoInline(CallInfo* info);
- void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
+ virtual void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
NextCallInsn next_call_insn,
const MethodReference& target_method,
@@ -830,7 +833,7 @@
bool GenInlinedUnsafeGet(CallInfo* info, bool is_long, bool is_volatile);
bool GenInlinedUnsafePut(CallInfo* info, bool is_long, bool is_object,
bool is_volatile, bool is_ordered);
- int LoadArgRegs(CallInfo* info, int call_state,
+ virtual int LoadArgRegs(CallInfo* info, int call_state,
NextCallInsn next_call_insn,
const MethodReference& target_method,
uint32_t vtable_idx,
@@ -843,15 +846,15 @@
LIR* LoadConstant(RegStorage r_dest, int value);
// Natural word size.
LIR* LoadWordDisp(RegStorage r_base, int displacement, RegStorage r_dest) {
- return LoadBaseDisp(r_base, displacement, r_dest, kWord, INVALID_SREG);
+ return LoadBaseDisp(r_base, displacement, r_dest, kWord);
}
// Load 32 bits, regardless of target.
LIR* Load32Disp(RegStorage r_base, int displacement, RegStorage r_dest) {
- return LoadBaseDisp(r_base, displacement, r_dest, k32, INVALID_SREG);
+ return LoadBaseDisp(r_base, displacement, r_dest, k32);
}
// Load a reference at base + displacement and decompress into register.
LIR* LoadRefDisp(RegStorage r_base, int displacement, RegStorage r_dest) {
- return LoadBaseDisp(r_base, displacement, r_dest, kReference, INVALID_SREG);
+ return LoadBaseDisp(r_base, displacement, r_dest, kReference);
}
// Load Dalvik value with 32-bit memory storage. If compressed object reference, decompress.
RegLocation LoadValue(RegLocation rl_src, RegisterClass op_kind);
@@ -975,13 +978,12 @@
virtual bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) = 0;
virtual LIR* CheckSuspendUsingLoad() = 0;
virtual RegStorage LoadHelper(ThreadOffset<4> offset) = 0;
- virtual LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size,
- int s_reg) = 0;
+ virtual LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
+ OpSize size) = 0;
virtual LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
int scale, OpSize size) = 0;
virtual LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
- int displacement, RegStorage r_dest, OpSize size,
- int s_reg) = 0;
+ int displacement, RegStorage r_dest, OpSize size) = 0;
virtual LIR* LoadConstantNoClobber(RegStorage r_dest, int value) = 0;
virtual LIR* LoadConstantWide(RegStorage r_dest, int64_t value) = 0;
virtual LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
@@ -989,8 +991,7 @@
virtual LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
int scale, OpSize size) = 0;
virtual LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
- int displacement, RegStorage r_src, OpSize size,
- int s_reg) = 0;
+ int displacement, RegStorage r_src, OpSize size) = 0;
virtual void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) = 0;
// Required for target - register utilities.
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 1898738..a03e5f2 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -32,19 +32,20 @@
bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
LIR* CheckSuspendUsingLoad() OVERRIDE;
RegStorage LoadHelper(ThreadOffset<4> offset);
- LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size,
- int s_reg);
+ LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
+ OpSize size) OVERRIDE;
LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale,
- OpSize size);
+ OpSize size) OVERRIDE;
LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
- RegStorage r_dest, OpSize size, int s_reg);
+ RegStorage r_dest, OpSize size) OVERRIDE;
LIR* LoadConstantNoClobber(RegStorage r_dest, int value);
LIR* LoadConstantWide(RegStorage r_dest, int64_t value);
- LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, OpSize size);
+ LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
+ OpSize size) OVERRIDE;
LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale,
- OpSize size);
+ OpSize size) OVERRIDE;
LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
- RegStorage r_src, OpSize size, int s_reg);
+ RegStorage r_src, OpSize size) OVERRIDE;
void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
// Required for target - register utilities.
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index 74828c7..d1c2e70 100644
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -183,7 +183,7 @@
if (is_double) {
rl_result = EvalLocWide(rl_dest, kFPReg, true);
- LoadBaseDisp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg, k64, INVALID_SREG);
+ LoadBaseDisp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg, k64);
StoreFinalValueWide(rl_dest, rl_result);
} else {
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index b71a2ce..ce5766f 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -142,8 +142,10 @@
} else {
if (src_fp) {
NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetReg());
- NewLIR2(kX86PsrlqRI, r_src.GetReg(), 32);
- NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), r_src.GetReg());
+ RegStorage temp_reg = AllocTempDouble();
+ NewLIR2(kX86MovsdRR, temp_reg.GetReg(), r_src.GetReg());
+ NewLIR2(kX86PsrlqRI, temp_reg.GetReg(), 32);
+ NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), temp_reg.GetReg());
} else {
DCHECK(r_dest.IsPair());
DCHECK(r_src.IsPair());
@@ -689,7 +691,7 @@
RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
// Unaligned access is allowed on x86.
- LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, INVALID_SREG);
+ LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size);
if (size == k64) {
StoreValueWide(rl_dest, rl_result);
} else {
@@ -1005,7 +1007,7 @@
NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg());
break;
case 1:
- LoadBaseDisp(rs_rX86_SP, displacement, dest, k32, sreg);
+ LoadBaseDisp(rs_rX86_SP, displacement, dest, k32);
break;
default:
m = NewLIR4(IS_SIMM8(val) ? kX86Imul32RMI8 : kX86Imul32RMI, dest.GetReg(),
@@ -1109,8 +1111,7 @@
if (src1_in_reg) {
NewLIR2(kX86Mov32RR, rs_r1.GetReg(), rl_src1.reg.GetHighReg());
} else {
- LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, rs_r1,
- k32, GetSRegHi(rl_src1.s_reg_low));
+ LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, rs_r1, k32);
}
if (is_square) {
@@ -1133,8 +1134,7 @@
if (src2_in_reg) {
NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetHighReg());
} else {
- LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, rs_r0,
- k32, GetSRegHi(rl_src2.s_reg_low));
+ LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, rs_r0, k32);
}
// EAX <- EAX * 1L (2H * 1L)
@@ -1167,8 +1167,7 @@
if (src2_in_reg) {
NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetLowReg());
} else {
- LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, rs_r0,
- k32, rl_src2.s_reg_low);
+ LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, rs_r0, k32);
}
// EDX:EAX <- 2L * 1L (double precision)
@@ -1417,8 +1416,7 @@
}
}
rl_result = EvalLoc(rl_dest, reg_class, true);
- LoadBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_result.reg, size,
- INVALID_SREG);
+ LoadBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_result.reg, size);
if ((size == k64) || (size == kDouble)) {
StoreValueWide(rl_dest, rl_result);
} else {
@@ -1475,10 +1473,9 @@
rl_src.reg.GetRegNum() >= rs_rX86_SP.GetRegNum()) {
RegStorage temp = AllocTemp();
OpRegCopy(temp, rl_src.reg);
- StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, temp, size, INVALID_SREG);
+ StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, temp, size);
} else {
- StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_src.reg, size,
- INVALID_SREG);
+ StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_src.reg, size);
}
if (card_mark) {
// Free rl_index if its a temp. Ensures there are 2 free regs for card mark.
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 7fe0d1f..8423ec4 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -520,7 +520,7 @@
// 4 byte offset. We will fix this up in the assembler later to have the right
// value.
res = LoadBaseDisp(rl_method.reg, 256 /* bogus */, RegStorage::Solo64(low_reg_val),
- kDouble, INVALID_SREG);
+ kDouble);
res->target = data_target;
res->flags.fixup = kFixupLoad;
SetMemRefType(res, true, kLiteral);
@@ -546,7 +546,7 @@
}
LIR* X86Mir2Lir::LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
- int displacement, RegStorage r_dest, OpSize size, int s_reg) {
+ int displacement, RegStorage r_dest, OpSize size) {
LIR *load = NULL;
LIR *load2 = NULL;
bool is_array = r_index.Valid();
@@ -663,21 +663,21 @@
/* Load value from base + scaled index. */
LIR* X86Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
int scale, OpSize size) {
- return LoadBaseIndexedDisp(r_base, r_index, scale, 0, r_dest, size, INVALID_SREG);
+ return LoadBaseIndexedDisp(r_base, r_index, scale, 0, r_dest, size);
}
LIR* X86Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
- OpSize size, int s_reg) {
+ OpSize size) {
// TODO: base this on target.
if (size == kWord) {
size = k32;
}
return LoadBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement, r_dest,
- size, s_reg);
+ size);
}
LIR* X86Mir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale,
- int displacement, RegStorage r_src, OpSize size, int s_reg) {
+ int displacement, RegStorage r_src, OpSize size) {
LIR *store = NULL;
LIR *store2 = NULL;
bool is_array = r_index.Valid();
@@ -752,7 +752,7 @@
/* store value base base + scaled index. */
LIR* X86Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
int scale, OpSize size) {
- return StoreBaseIndexedDisp(r_base, r_index, scale, 0, r_src, size, INVALID_SREG);
+ return StoreBaseIndexedDisp(r_base, r_index, scale, 0, r_src, size);
}
LIR* X86Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement,
@@ -761,8 +761,7 @@
if (size == kWord) {
size = k32;
}
- return StoreBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement, r_src, size,
- INVALID_SREG);
+ return StoreBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement, r_src, size);
}
LIR* X86Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index cdf26f1..7c0befc 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -1010,7 +1010,7 @@
}
if (compiler_filter_string == NULL) {
- if (instruction_set == kX86_64 || instruction_set == kArm64 || instruction_set == kMips) {
+ if (instruction_set == kX86_64 || instruction_set == kMips) {
// TODO: implement/fix compilers for these architectures.
compiler_filter_string = "interpret-only";
} else if (image) {
diff --git a/runtime/base/bit_vector.cc b/runtime/base/bit_vector.cc
index 47e85a3..0e01dc2 100644
--- a/runtime/base/bit_vector.cc
+++ b/runtime/base/bit_vector.cc
@@ -115,23 +115,24 @@
// If the highest bit set is different, we are different.
if (our_highest != src_highest) {
- return true;
+ return false;
}
// If the highest bit set is -1, both are cleared, we are the same.
// If the highest bit set is 0, both have a unique bit set, we are the same.
- if (our_highest >= 0) {
+ if (our_highest <= 0) {
return true;
}
- // Get the highest bit set's cell's index.
- int our_highest_index = (our_highest >> 5);
+ // Get the highest bit set's cell's index
+ // No need of highest + 1 here because it can't be 0 so BitsToWords will work here.
+ int our_highest_index = BitsToWords(our_highest);
// This memcmp is enough: we know that the highest bit set is the same for both:
// - Therefore, min_size goes up to at least that, we are thus comparing at least what we need to, but not less.
// ie. we are comparing all storage cells that could have difference, if both vectors have cells above our_highest_index,
// they are automatically at 0.
- return (memcmp(storage_, src->GetRawStorage(), our_highest_index * sizeof(*storage_)) != 0);
+ return (memcmp(storage_, src->GetRawStorage(), our_highest_index * sizeof(*storage_)) == 0);
}
// Intersect with another bit vector.
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index f5d6299..a0659e7 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -64,8 +64,8 @@
static constexpr bool kProtectFromSpace = true;
static constexpr bool kStoreStackTraces = false;
-static constexpr bool kUseBytesPromoted = true;
static constexpr size_t kBytesPromotedThreshold = 4 * MB;
+static constexpr size_t kLargeObjectBytesAllocatedThreshold = 16 * MB;
void SemiSpace::BindBitmaps() {
timings_.StartSplit("BindBitmaps");
@@ -104,8 +104,8 @@
last_gc_to_space_end_(nullptr),
bytes_promoted_(0),
bytes_promoted_since_last_whole_heap_collection_(0),
+ large_object_bytes_allocated_at_last_whole_heap_collection_(0),
whole_heap_collection_(true),
- whole_heap_collection_interval_counter_(0),
collector_name_(name_),
swap_semi_spaces_(true) {
}
@@ -187,12 +187,8 @@
if (gc_cause_ == kGcCauseExplicit || gc_cause_ == kGcCauseForNativeAlloc ||
clear_soft_references_) {
// If an explicit, native allocation-triggered, or last attempt
- // collection, collect the whole heap (and reset the interval
- // counter to be consistent.)
+ // collection, collect the whole heap.
whole_heap_collection_ = true;
- if (!kUseBytesPromoted) {
- whole_heap_collection_interval_counter_ = 0;
- }
}
if (whole_heap_collection_) {
VLOG(heap) << "Whole heap collection";
@@ -798,32 +794,27 @@
// only space collection at the next collection by updating
// whole_heap_collection.
if (!whole_heap_collection_) {
- if (!kUseBytesPromoted) {
- // Enable whole_heap_collection once every
- // kDefaultWholeHeapCollectionInterval collections.
- --whole_heap_collection_interval_counter_;
- DCHECK_GE(whole_heap_collection_interval_counter_, 0);
- if (whole_heap_collection_interval_counter_ == 0) {
- whole_heap_collection_ = true;
- }
- } else {
- // Enable whole_heap_collection if the bytes promoted since
- // the last whole heap collection exceeds a threshold.
- bytes_promoted_since_last_whole_heap_collection_ += bytes_promoted_;
- if (bytes_promoted_since_last_whole_heap_collection_ >= kBytesPromotedThreshold) {
- whole_heap_collection_ = true;
- }
+ // Enable whole_heap_collection if the bytes promoted since the
+ // last whole heap collection or the large object bytes
+ // allocated exceeds a threshold.
+ bytes_promoted_since_last_whole_heap_collection_ += bytes_promoted_;
+ bool bytes_promoted_threshold_exceeded =
+ bytes_promoted_since_last_whole_heap_collection_ >= kBytesPromotedThreshold;
+ uint64_t current_los_bytes_allocated = GetHeap()->GetLargeObjectsSpace()->GetBytesAllocated();
+ uint64_t last_los_bytes_allocated =
+ large_object_bytes_allocated_at_last_whole_heap_collection_;
+ bool large_object_bytes_threshold_exceeded =
+ current_los_bytes_allocated >=
+ last_los_bytes_allocated + kLargeObjectBytesAllocatedThreshold;
+ if (bytes_promoted_threshold_exceeded || large_object_bytes_threshold_exceeded) {
+ whole_heap_collection_ = true;
}
} else {
- if (!kUseBytesPromoted) {
- DCHECK_EQ(whole_heap_collection_interval_counter_, 0);
- whole_heap_collection_interval_counter_ = kDefaultWholeHeapCollectionInterval;
- whole_heap_collection_ = false;
- } else {
- // Reset it.
- bytes_promoted_since_last_whole_heap_collection_ = bytes_promoted_;
- whole_heap_collection_ = false;
- }
+ // Reset the counters.
+ bytes_promoted_since_last_whole_heap_collection_ = bytes_promoted_;
+ large_object_bytes_allocated_at_last_whole_heap_collection_ =
+ GetHeap()->GetLargeObjectsSpace()->GetBytesAllocated();
+ whole_heap_collection_ = false;
}
}
// Clear all of the spaces' mark bitmaps.
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index 3b3e1b1..9fdf471 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -234,14 +234,14 @@
// the non-moving space, since the last whole heap collection.
uint64_t bytes_promoted_since_last_whole_heap_collection_;
+ // Used for the generational mode. Keeps track of how many bytes of
+ // large objects were allocated at the last whole heap collection.
+ uint64_t large_object_bytes_allocated_at_last_whole_heap_collection_;
+
// Used for the generational mode. When true, collect the whole
// heap. When false, collect only the bump pointer spaces.
bool whole_heap_collection_;
- // Used for the generational mode. A counter used to enable
- // whole_heap_collection_ once per interval.
- int whole_heap_collection_interval_counter_;
-
// How many objects and bytes we moved, used so that we don't need to get the size of the
// to_space_ when calculating how many objects and bytes we freed.
size_t bytes_moved_;
diff --git a/runtime/globals.h b/runtime/globals.h
index eb52a46..07fadb9 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -55,9 +55,8 @@
// but ARM ELF requires 8..
static constexpr size_t kArmAlignment = 8;
-// ARM64 instruction alignment. AArch64 require code to be 4-byte aligned.
-// AArch64 ELF requires at least 4.
-static constexpr size_t kArm64Alignment = 4;
+// ARM64 instruction alignment. This is the recommended alignment for maximum performance.
+static constexpr size_t kArm64Alignment = 16;
// MIPS instruction alignment. MIPS processors require code to be 4-byte aligned.
// TODO: Can this be 4?