Inline x86 String.indexOf
Take advantage of the presence of a constant search char or start index
to tune the generated code.
Change-Id: I0adcf184fb91b899a95aa4d8ef044a14deb51d88
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index fa9a944..ad5b154 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -185,6 +185,14 @@
if (flags & REG_USEB) {
SetupRegMask(&lir->u.m.use_mask, rBX);
}
+
+ // Fixup hard to describe instruction: Uses rAX, rCX, rDI; sets rDI.
+ if (lir->opcode == kX86RepneScasw) {
+ SetupRegMask(&lir->u.m.use_mask, rAX);
+ SetupRegMask(&lir->u.m.use_mask, rCX);
+ SetupRegMask(&lir->u.m.use_mask, rDI);
+ SetupRegMask(&lir->u.m.def_mask, rDI);
+ }
}
/* For dumping instructions */
@@ -936,4 +944,174 @@
Mir2Lir::InstallLiteralPools();
}
+// Offsets within java.lang.String.
+#define STRING_VALUE_OFFSET 8
+#define STRING_COUNT_OFFSET 12
+#define STRING_OFFSET_OFFSET 20
+#define STRING_DATA_OFFSET 12
+
+/*
+ * Fast string.index_of(I) & (II). Inline check for simple case of char <= 0xffff,
+ * otherwise bails to standard library code.
+ */
+bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) {
+ ClobberCallerSave();
+ LockCallTemps(); // Using fixed registers
+
+ // EAX: 16 bit character being searched.
+ // ECX: count: number of words to be searched.
+ // EDI: String being searched.
+ // EDX: temporary during execution.
+ // EBX: temporary during execution.
+
+ RegLocation rl_obj = info->args[0];
+ RegLocation rl_char = info->args[1];
+ RegLocation rl_start = info->args[2];
+
+ uint32_t char_value =
+ rl_char.is_const ? mir_graph_->ConstantValue(rl_char.orig_sreg) : 0;
+
+ if (char_value > 0xFFFF) {
+ // We have to punt to the real String.indexOf.
+ return false;
+ }
+
+ // Okay, we are commited to inlining this.
+ RegLocation rl_return = GetReturn(false);
+ RegLocation rl_dest = InlineTarget(info);
+
+ // Is the string non-NULL?
+ LoadValueDirectFixed(rl_obj, rDX);
+ GenNullCheck(rl_obj.s_reg_low, rDX, info->opt_flags);
+
+ // Record that we have inlined & null checked the object.
+ info->opt_flags |= (MIR_INLINED | MIR_IGNORE_NULL_CHECK);
+
+ // Does the character fit in 16 bits?
+ LIR* launch_pad = nullptr;
+ if (rl_char.is_const) {
+ // We need the value in EAX.
+ LoadConstantNoClobber(rAX, char_value);
+ } else {
+ // Character is not a constant; compare at runtime.
+ LoadValueDirectFixed(rl_char, rAX);
+ launch_pad = RawLIR(0, kPseudoIntrinsicRetry, WrapPointer(info));
+ intrinsic_launchpads_.Insert(launch_pad);
+ OpCmpImmBranch(kCondGt, rAX, 0xFFFF, launch_pad);
+ }
+
+ // From here down, we know that we are looking for a char that fits in 16 bits.
+
+ // Character is in EAX.
+ // Object pointer is in EDX.
+
+ // We need to preserve EDI, but have no spare registers, so push it on the stack.
+ // We have to remember that all stack addresses after this are offset by sizeof(EDI).
+ NewLIR1(kX86Push32R, rDI);
+
+ // Compute the number of words to search in to rCX.
+ LoadWordDisp(rDX, STRING_COUNT_OFFSET, rCX);
+ LIR *length_compare = nullptr;
+ int start_value = 0;
+ if (zero_based) {
+ // We have to handle an empty string. Use special instruction JECXZ.
+ length_compare = NewLIR0(kX86Jecxz8);
+ } else {
+ // We have to offset by the start index.
+ if (rl_start.is_const) {
+ start_value = mir_graph_->ConstantValue(rl_start.orig_sreg);
+ start_value = std::max(start_value, 0);
+
+ // Is the start > count?
+ length_compare = OpCmpImmBranch(kCondLe, rCX, start_value, nullptr);
+
+ if (start_value != 0) {
+ OpRegImm(kOpSub, rCX, start_value);
+ }
+ } else {
+ // Runtime start index.
+ rl_start = UpdateLoc(rl_start);
+ if (rl_start.location == kLocPhysReg) {
+ length_compare = OpCmpBranch(kCondLe, rCX, rl_start.low_reg, nullptr);
+ OpRegReg(kOpSub, rCX, rl_start.low_reg);
+ } else {
+ // Compare to memory to avoid a register load. Handle pushed EDI.
+ int displacement = SRegOffset(rl_start.s_reg_low) + sizeof(uint32_t);
+ OpRegMem(kOpCmp, rDX, rX86_SP, displacement);
+ length_compare = NewLIR2(kX86Jcc8, 0, kX86CondLe);
+ OpRegMem(kOpSub, rCX, rX86_SP, displacement);
+ }
+ }
+ }
+ DCHECK(length_compare != nullptr);
+
+ // ECX now contains the count in words to be searched.
+
+ // Load the address of the string into EBX.
+ // The string starts at VALUE(String) + 2 * OFFSET(String) + STRING_DATA_OFFSET.
+ LoadWordDisp(rDX, STRING_VALUE_OFFSET, rDI);
+ LoadWordDisp(rDX, STRING_OFFSET_OFFSET, rBX);
+ OpLea(rBX, rDI, rBX, 1, STRING_DATA_OFFSET);
+
+ // Now compute into EDI where the search will start.
+ if (zero_based || rl_start.is_const) {
+ if (start_value == 0) {
+ OpRegCopy(rDI, rBX);
+ } else {
+ NewLIR3(kX86Lea32RM, rDI, rBX, 2 * start_value);
+ }
+ } else {
+ if (rl_start.location == kLocPhysReg) {
+ if (rl_start.low_reg == rDI) {
+ // We have a slight problem here. We are already using RDI!
+ // Grab the value from the stack.
+ LoadWordDisp(rX86_SP, 0, rDX);
+ OpLea(rDI, rBX, rDX, 1, 0);
+ } else {
+ OpLea(rDI, rBX, rl_start.low_reg, 1, 0);
+ }
+ } else {
+ OpRegCopy(rDI, rBX);
+ // Load the start index from stack, remembering that we pushed EDI.
+ int displacement = SRegOffset(rl_start.s_reg_low) + sizeof(uint32_t);
+ LoadWordDisp(rX86_SP, displacement, rDX);
+ OpLea(rDI, rBX, rDX, 1, 0);
+ }
+ }
+
+ // EDI now contains the start of the string to be searched.
+ // We are all prepared to do the search for the character.
+ NewLIR0(kX86RepneScasw);
+
+ // Did we find a match?
+ LIR* failed_branch = OpCondBranch(kCondNe, nullptr);
+
+ // yes, we matched. Compute the index of the result.
+ // index = ((curr_ptr - orig_ptr) / 2) - 1.
+ OpRegReg(kOpSub, rDI, rBX);
+ OpRegImm(kOpAsr, rDI, 1);
+ NewLIR3(kX86Lea32RM, rl_return.low_reg, rDI, -1);
+ LIR *all_done = NewLIR1(kX86Jmp8, 0);
+
+ // Failed to match; return -1.
+ LIR *not_found = NewLIR0(kPseudoTargetLabel);
+ length_compare->target = not_found;
+ failed_branch->target = not_found;
+ LoadConstantNoClobber(rl_return.low_reg, -1);
+
+ // And join up at the end.
+ all_done->target = NewLIR0(kPseudoTargetLabel);
+ // Restore EDI from the stack.
+ NewLIR1(kX86Pop32R, rDI);
+
+ // Out of line code returns here.
+ if (launch_pad != nullptr) {
+ LIR *return_point = NewLIR0(kPseudoTargetLabel);
+ launch_pad->operands[2] = WrapPointer(return_point);
+ }
+
+ StoreValue(rl_dest, rl_return);
+ return true;
+}
+
} // namespace art