Merge "ART: Refactor CompileOptimized"
diff --git a/compiler/compiler.h b/compiler/compiler.h
index a04641e..94b0fe3 100644
--- a/compiler/compiler.h
+++ b/compiler/compiler.h
@@ -63,13 +63,6 @@
virtual uintptr_t GetEntryPointOf(mirror::ArtMethod* method) const
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
- virtual bool WriteElf(art::File* file,
- OatWriter* oat_writer,
- const std::vector<const art::DexFile*>& dex_files,
- const std::string& android_root,
- bool is_host) const
- SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
-
uint64_t GetMaximumCompilationTimeBeforeWarning() const {
return maximum_compilation_time_before_warning_;
}
@@ -107,9 +100,6 @@
return driver_;
}
- // Whether to produce 64-bit ELF files for 64-bit targets. Leave this off for now.
- static constexpr bool kProduce64BitELFFiles = false;
-
private:
CompilerDriver* const driver_;
const uint64_t maximum_compilation_time_before_warning_;
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 2c0bd47..fc3e687 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -793,20 +793,6 @@
InstructionSetPointerSize(GetCompilerDriver()->GetInstructionSet())));
}
-bool QuickCompiler::WriteElf(art::File* file,
- OatWriter* oat_writer,
- const std::vector<const art::DexFile*>& dex_files,
- const std::string& android_root,
- bool is_host) const {
- if (kProduce64BitELFFiles && Is64BitInstructionSet(GetCompilerDriver()->GetInstructionSet())) {
- return art::ElfWriterQuick64::Create(file, oat_writer, dex_files, android_root, is_host,
- *GetCompilerDriver());
- } else {
- return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host,
- *GetCompilerDriver());
- }
-}
-
Mir2Lir* QuickCompiler::GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) {
UNUSED(compilation_unit);
Mir2Lir* mir_to_lir = nullptr;
diff --git a/compiler/dex/quick/quick_compiler.h b/compiler/dex/quick/quick_compiler.h
index 09b08ac..8d2c324 100644
--- a/compiler/dex/quick/quick_compiler.h
+++ b/compiler/dex/quick/quick_compiler.h
@@ -52,14 +52,6 @@
uintptr_t GetEntryPointOf(mirror::ArtMethod* method) const OVERRIDE
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
- bool WriteElf(art::File* file,
- OatWriter* oat_writer,
- const std::vector<const art::DexFile*>& dex_files,
- const std::string& android_root,
- bool is_host) const
- OVERRIDE
- SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
static Mir2Lir* GetCodeGenerator(CompilationUnit* cu, void* compilation_unit);
void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE;
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index c2b8375..f263f6d 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -40,6 +40,7 @@
#include "dex/verified_method.h"
#include "dex/quick/dex_file_method_inliner.h"
#include "driver/compiler_options.h"
+#include "elf_writer_quick.h"
#include "jni_internal.h"
#include "object_lock.h"
#include "profiler.h"
@@ -72,6 +73,9 @@
static constexpr bool kTimeCompileMethod = !kIsDebugBuild;
+// Whether to produce 64-bit ELF files for 64-bit targets. Leave this off for now.
+static constexpr bool kProduce64BitELFFiles = false;
+
static double Percentage(size_t x, size_t y) {
return 100.0 * (static_cast<double>(x)) / (static_cast<double>(x + y));
}
@@ -2368,7 +2372,11 @@
OatWriter* oat_writer,
art::File* file)
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
- return compiler_->WriteElf(file, oat_writer, dex_files, android_root, is_host);
+ if (kProduce64BitELFFiles && Is64BitInstructionSet(GetInstructionSet())) {
+ return art::ElfWriterQuick64::Create(file, oat_writer, dex_files, android_root, is_host, *this);
+ } else {
+ return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host, *this);
+ }
}
bool CompilerDriver::SkipCompilation(const std::string& method_name) {
diff --git a/compiler/optimizing/code_generator_utils.cc b/compiler/optimizing/code_generator_utils.cc
index 26cab2f..921c1d8 100644
--- a/compiler/optimizing/code_generator_utils.cc
+++ b/compiler/optimizing/code_generator_utils.cc
@@ -18,13 +18,15 @@
#include "base/logging.h"
+namespace art {
+
void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long,
int64_t* magic, int* shift) {
// It does not make sense to calculate magic and shift for zero divisor.
DCHECK_NE(divisor, 0);
- /* According to implementation from H.S.Warren's "Hacker's Delight" (Addison Wesley, 2002)
- * Chapter 10 and T,Grablund, P.L.Montogomery's "Division by Invariant Integers Using
+ /* Implementation according to H.S.Warren's "Hacker's Delight" (Addison Wesley, 2002)
+ * Chapter 10 and T.Grablund, P.L.Montogomery's "Division by Invariant Integers Using
* Multiplication" (PLDI 1994).
* The magic number M and shift S can be calculated in the following way:
* Let nc be the most positive value of numerator(n) such that nc = kd - 1,
@@ -39,11 +41,11 @@
* 2^p > nc * (d - 2^p % d), where d >= 2
* 2^p > nc * (d + 2^p % d), where d <= -2.
*
- * The magic number M is calcuated by
+ * The magic number M is calculated by
* M = (2^p + d - 2^p % d) / d, where d >= 2
* M = (2^p - d - 2^p % d) / d, where d <= -2.
*
- * Notice that p is always bigger than or equal to 32 (resp. 64), so we just return 32-p
+ * Notice that p is always bigger than or equal to 32 (resp. 64), so we just return 32 - p
* (resp. 64 - p) as the shift number S.
*/
@@ -52,9 +54,10 @@
// Initialize the computations.
uint64_t abs_d = (divisor >= 0) ? divisor : -divisor;
- uint64_t tmp = exp + (is_long ? static_cast<uint64_t>(divisor) >> 63 :
- static_cast<uint32_t>(divisor) >> 31);
- uint64_t abs_nc = tmp - 1 - tmp % abs_d;
+ uint64_t sign_bit = is_long ? static_cast<uint64_t>(divisor) >> 63 :
+ static_cast<uint32_t>(divisor) >> 31;
+ uint64_t tmp = exp + sign_bit;
+ uint64_t abs_nc = tmp - 1 - (tmp % abs_d);
uint64_t quotient1 = exp / abs_nc;
uint64_t remainder1 = exp % abs_nc;
uint64_t quotient2 = exp / abs_d;
@@ -91,3 +94,4 @@
*shift = is_long ? p - 64 : p - 32;
}
+} // namespace art
diff --git a/compiler/optimizing/code_generator_utils.h b/compiler/optimizing/code_generator_utils.h
index 742d675..59b495c 100644
--- a/compiler/optimizing/code_generator_utils.h
+++ b/compiler/optimizing/code_generator_utils.h
@@ -19,7 +19,12 @@
#include <cstdint>
-// Computes the magic number and the shift needed in the div/rem by constant algorithm
+namespace art {
+
+// Computes the magic number and the shift needed in the div/rem by constant algorithm, as out
+// arguments `magic` and `shift`
void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long, int64_t* magic, int* shift);
+} // namespace art
+
#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_UTILS_H_
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 4d74683..a6fb07f 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -2304,10 +2304,11 @@
LocationSummary* locations = instruction->GetLocations();
DCHECK(locations->InAt(1).IsConstant());
+ DCHECK(locations->InAt(1).GetConstant()->IsIntConstant());
Register out_register = locations->Out().AsRegister<Register>();
Register input_register = locations->InAt(0).AsRegister<Register>();
- int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
DCHECK(imm == 1 || imm == -1);
@@ -2322,16 +2323,14 @@
}
-void InstructionCodeGeneratorX86::DivByPowerOfTwo(HBinaryOperation* instruction) {
- DCHECK(instruction->IsDiv());
-
+void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) {
LocationSummary* locations = instruction->GetLocations();
Register out_register = locations->Out().AsRegister<Register>();
Register input_register = locations->InAt(0).AsRegister<Register>();
- int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
- DCHECK(instruction->IsDiv() && IsPowerOfTwo(std::abs(imm)));
+ DCHECK(IsPowerOfTwo(std::abs(imm)));
Register num = locations->GetTemp(0).AsRegister<Register>();
__ leal(num, Address(input_register, std::abs(imm) - 1));
@@ -2440,15 +2439,15 @@
DCHECK_EQ(EAX, first.AsRegister<Register>());
DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>());
- if (second.IsConstant()) {
- int imm = second.GetConstant()->AsIntConstant()->GetValue();
+ if (instruction->InputAt(1)->IsIntConstant()) {
+ int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
if (imm == 0) {
// Do not generate anything for 0. DivZeroCheck would forbid any generated code.
} else if (imm == 1 || imm == -1) {
DivRemOneOrMinusOne(instruction);
} else if (is_div && IsPowerOfTwo(std::abs(imm))) {
- DivByPowerOfTwo(instruction);
+ DivByPowerOfTwo(instruction->AsDiv());
} else {
DCHECK(imm <= -2 || imm >= 2);
GenerateDivRemWithAnyConstant(instruction);
@@ -2519,7 +2518,7 @@
// We need to save the numerator while we tweak eax and edx. As we are using imul in a way
// which enforces results to be in EAX and EDX, things are simpler if we use EAX also as
// output and request another temp.
- if (div->InputAt(1)->IsConstant()) {
+ if (div->InputAt(1)->IsIntConstant()) {
locations->AddTemp(Location::RequiresRegister());
}
break;
@@ -2593,7 +2592,7 @@
// We need to save the numerator while we tweak eax and edx. As we are using imul in a way
// which enforces results to be in EAX and EDX, things are simpler if we use EDX also as
// output and request another temp.
- if (rem->InputAt(1)->IsConstant()) {
+ if (rem->InputAt(1)->IsIntConstant()) {
locations->AddTemp(Location::RequiresRegister());
}
break;
@@ -3958,23 +3957,43 @@
}
void ParallelMoveResolverX86::MoveMemoryToMemory32(int dst, int src) {
- ScratchRegisterScope ensure_scratch(
- this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
- Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
- int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
- __ movl(temp_reg, Address(ESP, src + stack_offset));
- __ movl(Address(ESP, dst + stack_offset), temp_reg);
+ ScratchRegisterScope possible_scratch(
+ this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+ int temp = possible_scratch.GetRegister();
+ if (temp == kNoRegister) {
+ // Use the stack.
+ __ pushl(Address(ESP, src));
+ __ popl(Address(ESP, dst));
+ } else {
+ Register temp_reg = static_cast<Register>(temp);
+ __ movl(temp_reg, Address(ESP, src));
+ __ movl(Address(ESP, dst), temp_reg);
+ }
}
void ParallelMoveResolverX86::MoveMemoryToMemory64(int dst, int src) {
- ScratchRegisterScope ensure_scratch(
- this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
- Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
- int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
- __ movl(temp_reg, Address(ESP, src + stack_offset));
- __ movl(Address(ESP, dst + stack_offset), temp_reg);
- __ movl(temp_reg, Address(ESP, src + stack_offset + kX86WordSize));
- __ movl(Address(ESP, dst + stack_offset + kX86WordSize), temp_reg);
+ ScratchRegisterScope possible_scratch(
+ this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+ int temp = possible_scratch.GetRegister();
+ if (temp == kNoRegister) {
+ // Use the stack instead.
+ // Push src low word.
+ __ pushl(Address(ESP, src));
+ // Push src high word. Stack offset = 4.
+ __ pushl(Address(ESP, src + 4 /* offset */ + kX86WordSize /* high */));
+
+ // Pop into dst high word. Stack offset = 8.
+ // Pop with ESP address uses the 'after increment' value of ESP.
+ __ popl(Address(ESP, dst + 4 /* offset */ + kX86WordSize /* high */));
+ // Finally dst low word. Stack offset = 4.
+ __ popl(Address(ESP, dst));
+ } else {
+ Register temp_reg = static_cast<Register>(temp);
+ __ movl(temp_reg, Address(ESP, src));
+ __ movl(Address(ESP, dst), temp_reg);
+ __ movl(temp_reg, Address(ESP, src + kX86WordSize));
+ __ movl(Address(ESP, dst + kX86WordSize), temp_reg);
+ }
}
void ParallelMoveResolverX86::EmitMove(size_t index) {
@@ -4039,10 +4058,18 @@
__ xorps(dest, dest);
} else {
ScratchRegisterScope ensure_scratch(
- this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
- Register temp = static_cast<Register>(ensure_scratch.GetRegister());
- __ movl(temp, Immediate(value));
- __ movd(dest, temp);
+ this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+ int temp_reg = ensure_scratch.GetRegister();
+ if (temp_reg == kNoRegister) {
+ // Avoid spilling/restoring a scratch register by using the stack.
+ __ pushl(Immediate(value));
+ __ movss(dest, Address(ESP, 0));
+ __ addl(ESP, Immediate(4));
+ } else {
+ Register temp = static_cast<Register>(temp_reg);
+ __ movl(temp, Immediate(value));
+ __ movd(dest, temp);
+ }
}
} else {
DCHECK(destination.IsStackSlot()) << destination;
@@ -4091,42 +4118,96 @@
}
}
-void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
- Register suggested_scratch = reg == EAX ? EBX : EAX;
- ScratchRegisterScope ensure_scratch(
- this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters());
+void ParallelMoveResolverX86::Exchange(Register reg1, Register reg2) {
+ // Prefer to avoid xchg as it isn't speedy on smaller processors.
+ ScratchRegisterScope possible_scratch(
+ this, reg1, codegen_->GetNumberOfCoreRegisters());
+ int temp_reg = possible_scratch.GetRegister();
+ if (temp_reg == kNoRegister || temp_reg == reg2) {
+ __ pushl(reg1);
+ __ movl(reg1, reg2);
+ __ popl(reg2);
+ } else {
+ Register temp = static_cast<Register>(temp_reg);
+ __ movl(temp, reg1);
+ __ movl(reg1, reg2);
+ __ movl(reg2, temp);
+ }
+}
- int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
- __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset));
- __ movl(Address(ESP, mem + stack_offset), reg);
- __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister()));
+void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
+ ScratchRegisterScope possible_scratch(
+ this, reg, codegen_->GetNumberOfCoreRegisters());
+ int temp_reg = possible_scratch.GetRegister();
+ if (temp_reg == kNoRegister) {
+ __ pushl(Address(ESP, mem));
+ __ movl(Address(ESP, mem + kX86WordSize), reg);
+ __ popl(reg);
+ } else {
+ Register temp = static_cast<Register>(temp_reg);
+ __ movl(temp, Address(ESP, mem));
+ __ movl(Address(ESP, mem), reg);
+ __ movl(reg, temp);
+ }
}
void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) {
- ScratchRegisterScope ensure_scratch(
- this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
-
- Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
- int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
- __ movl(temp_reg, Address(ESP, mem + stack_offset));
- __ movss(Address(ESP, mem + stack_offset), reg);
- __ movd(reg, temp_reg);
+ ScratchRegisterScope possible_scratch(
+ this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+ int temp_reg = possible_scratch.GetRegister();
+ if (temp_reg == kNoRegister) {
+ __ pushl(Address(ESP, mem));
+ __ movss(Address(ESP, mem + kX86WordSize), reg);
+ __ movss(reg, Address(ESP, 0));
+ __ addl(ESP, Immediate(kX86WordSize));
+ } else {
+ Register temp = static_cast<Register>(temp_reg);
+ __ movl(temp, Address(ESP, mem));
+ __ movss(Address(ESP, mem), reg);
+ __ movd(reg, temp);
+ }
}
void ParallelMoveResolverX86::Exchange(int mem1, int mem2) {
- ScratchRegisterScope ensure_scratch1(
- this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
+ ScratchRegisterScope possible_scratch1(
+ this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+ int temp_reg1 = possible_scratch1.GetRegister();
+ if (temp_reg1 == kNoRegister) {
+ // No free registers. Use the stack.
+ __ pushl(Address(ESP, mem1));
+ __ pushl(Address(ESP, mem2 + kX86WordSize));
+ // Pop with ESP address uses the 'after increment' value of ESP.
+ __ popl(Address(ESP, mem1 + kX86WordSize));
+ __ popl(Address(ESP, mem2));
+ } else {
+ // Got the first one. Try for a second.
+ ScratchRegisterScope possible_scratch2(
+ this, temp_reg1, codegen_->GetNumberOfCoreRegisters());
+ int temp_reg2 = possible_scratch2.GetRegister();
+ if (temp_reg2 == kNoRegister) {
+ Register temp = static_cast<Register>(temp_reg1);
+ // Bummer. Only have one free register to use.
+ // Save mem1 on the stack.
+ __ pushl(Address(ESP, mem1));
- Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX;
- ScratchRegisterScope ensure_scratch2(
- this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters());
+ // Copy mem2 into mem1.
+ __ movl(temp, Address(ESP, mem2 + kX86WordSize));
+ __ movl(Address(ESP, mem1 + kX86WordSize), temp);
- int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0;
- stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0;
- __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset));
- __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset));
- __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister()));
- __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister()));
+ // Now pop mem1 into mem2.
+ // Pop with ESP address uses the 'after increment' value of ESP.
+ __ popl(Address(ESP, mem2));
+ } else {
+ // Great. We have 2 registers to play with.
+ Register temp1 = static_cast<Register>(temp_reg1);
+ Register temp2 = static_cast<Register>(temp_reg2);
+ DCHECK_NE(temp1, temp2);
+ __ movl(temp1, Address(ESP, mem1));
+ __ movl(temp2, Address(ESP, mem2));
+ __ movl(Address(ESP, mem2), temp1);
+ __ movl(Address(ESP, mem1), temp2);
+ }
+ }
}
void ParallelMoveResolverX86::EmitSwap(size_t index) {
@@ -4135,7 +4216,7 @@
Location destination = move->GetDestination();
if (source.IsRegister() && destination.IsRegister()) {
- __ xchgl(destination.AsRegister<Register>(), source.AsRegister<Register>());
+ Exchange(destination.AsRegister<Register>(), source.AsRegister<Register>());
} else if (source.IsRegister() && destination.IsStackSlot()) {
Exchange(source.AsRegister<Register>(), destination.GetStackIndex());
} else if (source.IsStackSlot() && destination.IsRegister()) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index e6e7fb7..8c56e35 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -106,6 +106,7 @@
X86Assembler* GetAssembler() const;
private:
+ void Exchange(Register reg1, Register Reg2);
void Exchange(Register reg, int mem);
void Exchange(int mem1, int mem2);
void Exchange32(XmmRegister reg, int mem);
@@ -164,7 +165,7 @@
void HandleBitwiseOperation(HBinaryOperation* instruction);
void GenerateDivRemIntegral(HBinaryOperation* instruction);
void DivRemOneOrMinusOne(HBinaryOperation* instruction);
- void DivByPowerOfTwo(HBinaryOperation* instruction);
+ void DivByPowerOfTwo(HDiv* instruction);
void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
void GenerateRemFP(HRem *rem);
void HandleShift(HBinaryOperation* instruction);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 5710ec5..01b24ea 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -2348,12 +2348,7 @@
CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
- int64_t imm;
- if (second.GetConstant()->IsLongConstant()) {
- imm = second.GetConstant()->AsLongConstant()->GetValue();
- } else {
- imm = second.GetConstant()->AsIntConstant()->GetValue();
- }
+ int64_t imm = Int64FromConstant(second.GetConstant());
DCHECK(imm == 1 || imm == -1);
@@ -2383,25 +2378,18 @@
}
default:
- LOG(FATAL) << "Unreachable";
+ LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
}
}
-void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HBinaryOperation* instruction) {
- DCHECK(instruction->IsDiv());
-
+void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
LocationSummary* locations = instruction->GetLocations();
Location second = locations->InAt(1);
CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
- int64_t imm;
- if (instruction->GetResultType() == Primitive::kPrimLong) {
- imm = second.GetConstant()->AsLongConstant()->GetValue();
- } else {
- imm = second.GetConstant()->AsIntConstant()->GetValue();
- }
+ int64_t imm = Int64FromConstant(second.GetConstant());
DCHECK(IsPowerOfTwo(std::abs(imm)));
@@ -2462,7 +2450,7 @@
int64_t magic;
int shift;
- // TODO: can these branch be written as one?
+ // TODO: can these branches be written as one?
if (instruction->GetResultType() == Primitive::kPrimInt) {
int imm = second.GetConstant()->AsIntConstant()->GetValue();
@@ -2526,7 +2514,7 @@
__ imulq(numerator);
if (imm > 0 && magic < 0) {
- // RDX += numeratorerator
+ // RDX += numerator
__ addq(rdx, numerator);
} else if (imm < 0 && magic > 0) {
// RDX -= numerator
@@ -2576,19 +2564,14 @@
DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
if (second.IsConstant()) {
- int64_t imm;
- if (second.GetConstant()->AsLongConstant()) {
- imm = second.GetConstant()->AsLongConstant()->GetValue();
- } else {
- imm = second.GetConstant()->AsIntConstant()->GetValue();
- }
+ int64_t imm = Int64FromConstant(second.GetConstant());
if (imm == 0) {
// Do not generate anything. DivZeroCheck would prevent any code to be executed.
} else if (imm == 1 || imm == -1) {
DivRemOneOrMinusOne(instruction);
} else if (instruction->IsDiv() && IsPowerOfTwo(std::abs(imm))) {
- DivByPowerOfTwo(instruction);
+ DivByPowerOfTwo(instruction->AsDiv());
} else {
DCHECK(imm <= -2 || imm >= 2);
GenerateDivRemWithAnyConstant(instruction);
@@ -3838,15 +3821,27 @@
void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) {
ScratchRegisterScope ensure_scratch(
- this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
+ this, TMP, codegen_->GetNumberOfCoreRegisters());
- int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
- __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
- __ movq(CpuRegister(ensure_scratch.GetRegister()),
- Address(CpuRegister(RSP), mem2 + stack_offset));
- __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
- __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
- CpuRegister(ensure_scratch.GetRegister()));
+ int temp_reg = ensure_scratch.GetRegister();
+ if (temp_reg == kNoRegister) {
+ // Use the stack as a temporary.
+ // Save mem1 on the stack.
+ __ pushq(Address(CpuRegister(RSP), mem1));
+
+ // Copy mem2 into mem1.
+ __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem2 + kX86_64WordSize));
+ __ movq(Address(CpuRegister(RSP), mem1 + kX86_64WordSize), CpuRegister(TMP));
+
+ // Now pop mem1 into mem2.
+ __ popq(Address(CpuRegister(RSP), mem2));
+ } else {
+ CpuRegister temp = CpuRegister(temp_reg);
+ __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1));
+ __ movq(temp, Address(CpuRegister(RSP), mem2));
+ __ movq(Address(CpuRegister(RSP), mem2), CpuRegister(TMP));
+ __ movq(Address(CpuRegister(RSP), mem1), temp);
+ }
}
void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
@@ -3855,6 +3850,13 @@
__ movd(reg, CpuRegister(TMP));
}
+void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
+ // Prefer to avoid xchg as it isn't speedy on smaller processors.
+ __ movq(CpuRegister(TMP), reg1);
+ __ movq(reg1, reg2);
+ __ movq(reg2, CpuRegister(TMP));
+}
+
void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
__ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
__ movsd(Address(CpuRegister(RSP), mem), reg);
@@ -3867,7 +3869,7 @@
Location destination = move->GetDestination();
if (source.IsRegister() && destination.IsRegister()) {
- __ xchgq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
+ Exchange64(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
} else if (source.IsRegister() && destination.IsStackSlot()) {
Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
} else if (source.IsStackSlot() && destination.IsRegister()) {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index aae7de0..61bf6ac 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -118,6 +118,7 @@
void Exchange32(CpuRegister reg, int mem);
void Exchange32(XmmRegister reg, int mem);
void Exchange32(int mem1, int mem2);
+ void Exchange64(CpuRegister reg1, CpuRegister reg2);
void Exchange64(CpuRegister reg, int mem);
void Exchange64(XmmRegister reg, int mem);
void Exchange64(int mem1, int mem2);
@@ -174,7 +175,7 @@
void HandleBitwiseOperation(HBinaryOperation* operation);
void GenerateRemFP(HRem *rem);
void DivRemOneOrMinusOne(HBinaryOperation* instruction);
- void DivByPowerOfTwo(HBinaryOperation* instruction);
+ void DivByPowerOfTwo(HDiv* instruction);
void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
void GenerateDivRemIntegral(HBinaryOperation* instruction);
void HandleShift(HBinaryOperation* operation);
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 56ec8a7..afbc490 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -24,9 +24,21 @@
class InstructionSimplifierVisitor : public HGraphVisitor {
public:
InstructionSimplifierVisitor(HGraph* graph, OptimizingCompilerStats* stats)
- : HGraphVisitor(graph), stats_(stats) {}
+ : HGraphVisitor(graph),
+ stats_(stats) {}
+
+ void Run();
private:
+ void RecordSimplification() {
+ simplification_occurred_ = true;
+ simplifications_at_current_position_++;
+ if (stats_) {
+ stats_->RecordStat(kInstructionSimplifications);
+ }
+ }
+
+ bool TryMoveNegOnInputsAfterBinop(HBinaryOperation* binop);
void VisitShift(HBinaryOperation* shift);
void VisitSuspendCheck(HSuspendCheck* check) OVERRIDE;
@@ -40,6 +52,8 @@
void VisitAnd(HAnd* instruction) OVERRIDE;
void VisitDiv(HDiv* instruction) OVERRIDE;
void VisitMul(HMul* instruction) OVERRIDE;
+ void VisitNeg(HNeg* instruction) OVERRIDE;
+ void VisitNot(HNot* instruction) OVERRIDE;
void VisitOr(HOr* instruction) OVERRIDE;
void VisitShl(HShl* instruction) OVERRIDE;
void VisitShr(HShr* instruction) OVERRIDE;
@@ -48,11 +62,38 @@
void VisitXor(HXor* instruction) OVERRIDE;
OptimizingCompilerStats* stats_;
+ bool simplification_occurred_ = false;
+ int simplifications_at_current_position_ = 0;
+ // We ensure we do not loop infinitely. The value is a finger in the air guess
+ // that should allow enough simplification.
+ static constexpr int kMaxSamePositionSimplifications = 10;
};
void InstructionSimplifier::Run() {
InstructionSimplifierVisitor visitor(graph_, stats_);
- visitor.VisitInsertionOrder();
+ visitor.Run();
+}
+
+void InstructionSimplifierVisitor::Run() {
+ for (HReversePostOrderIterator it(*GetGraph()); !it.Done();) {
+ // The simplification of an instruction to another instruction may yield
+ // possibilities for other simplifications. So although we perform a reverse
+ // post order visit, we sometimes need to revisit an instruction index.
+ simplification_occurred_ = false;
+ VisitBasicBlock(it.Current());
+ if (simplification_occurred_ &&
+ (simplifications_at_current_position_ < kMaxSamePositionSimplifications)) {
+ // New simplifications may be applicable to the instruction at the
+ // current index, so don't advance the iterator.
+ continue;
+ }
+ if (simplifications_at_current_position_ >= kMaxSamePositionSimplifications) {
+ LOG(WARNING) << "Too many simplifications (" << simplifications_at_current_position_
+ << ") occurred at the current position.";
+ }
+ simplifications_at_current_position_ = 0;
+ it.Advance();
+ }
}
namespace {
@@ -63,6 +104,35 @@
} // namespace
+// Returns true if the code was simplified to use only one negation operation
+// after the binary operation instead of one on each of the inputs.
+bool InstructionSimplifierVisitor::TryMoveNegOnInputsAfterBinop(HBinaryOperation* binop) {
+ DCHECK(binop->IsAdd() || binop->IsSub());
+ DCHECK(binop->GetLeft()->IsNeg() && binop->GetRight()->IsNeg());
+ HNeg* left_neg = binop->GetLeft()->AsNeg();
+ HNeg* right_neg = binop->GetRight()->AsNeg();
+ if (!left_neg->HasOnlyOneNonEnvironmentUse() ||
+ !right_neg->HasOnlyOneNonEnvironmentUse()) {
+ return false;
+ }
+ // Replace code looking like
+ // NEG tmp1, a
+ // NEG tmp2, b
+ // ADD dst, tmp1, tmp2
+ // with
+ // ADD tmp, a, b
+ // NEG dst, tmp
+ binop->ReplaceInput(left_neg->GetInput(), 0);
+ binop->ReplaceInput(right_neg->GetInput(), 1);
+ left_neg->GetBlock()->RemoveInstruction(left_neg);
+ right_neg->GetBlock()->RemoveInstruction(right_neg);
+ HNeg* neg = new (GetGraph()->GetArena()) HNeg(binop->GetType(), binop);
+ binop->GetBlock()->InsertInstructionBefore(neg, binop->GetNext());
+ binop->ReplaceWithExceptInReplacementAtIndex(neg, 0);
+ RecordSimplification();
+ return true;
+}
+
void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) {
DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr());
HConstant* input_cst = instruction->GetConstantRight();
@@ -182,6 +252,36 @@
// src
instruction->ReplaceWith(input_other);
instruction->GetBlock()->RemoveInstruction(instruction);
+ return;
+ }
+
+ HInstruction* left = instruction->GetLeft();
+ HInstruction* right = instruction->GetRight();
+ bool left_is_neg = left->IsNeg();
+ bool right_is_neg = right->IsNeg();
+
+ if (left_is_neg && right_is_neg) {
+ if (TryMoveNegOnInputsAfterBinop(instruction)) {
+ return;
+ }
+ }
+
+ HNeg* neg = left_is_neg ? left->AsNeg() : right->AsNeg();
+ if ((left_is_neg ^ right_is_neg) && neg->HasOnlyOneNonEnvironmentUse()) {
+ // Replace code looking like
+ // NEG tmp, b
+ // ADD dst, a, tmp
+ // with
+ // SUB dst, a, b
+ // We do not perform the optimization if the input negation has environment
+ // uses or multiple non-environment uses as it could lead to worse code. In
+ // particular, we do not want the live range of `b` to be extended if we are
+ // not sure the initial 'NEG' instruction can be removed.
+ HInstruction* other = left_is_neg ? right : left;
+ HSub* sub = new(GetGraph()->GetArena()) HSub(instruction->GetType(), other, neg->GetInput());
+ instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, sub);
+ RecordSimplification();
+ neg->GetBlock()->RemoveInstruction(neg);
}
}
@@ -201,7 +301,7 @@
// We assume that GVN has run before, so we only perform a pointer comparison.
// If for some reason the values are equal but the pointers are different, we
- // are still correct and only miss an optimisation opportunity.
+ // are still correct and only miss an optimization opportunity.
if (instruction->GetLeft() == instruction->GetRight()) {
// Replace code looking like
// AND dst, src, src
@@ -235,6 +335,7 @@
// NEG dst, src
instruction->GetBlock()->ReplaceAndRemoveInstructionWith(
instruction, (new (GetGraph()->GetArena()) HNeg(type, input_other)));
+ RecordSimplification();
}
}
@@ -267,6 +368,7 @@
// NEG dst, src
HNeg* neg = new (allocator) HNeg(type, input_other);
block->ReplaceAndRemoveInstructionWith(instruction, neg);
+ RecordSimplification();
return;
}
@@ -280,6 +382,7 @@
// The 'int' and 'long' cases are handled below.
block->ReplaceAndRemoveInstructionWith(instruction,
new (allocator) HAdd(type, input_other, input_other));
+ RecordSimplification();
return;
}
@@ -295,10 +398,75 @@
HIntConstant* shift = GetGraph()->GetIntConstant(WhichPowerOf2(factor));
HShl* shl = new(allocator) HShl(type, input_other, shift);
block->ReplaceAndRemoveInstructionWith(instruction, shl);
+ RecordSimplification();
}
}
}
+void InstructionSimplifierVisitor::VisitNeg(HNeg* instruction) {
+ HInstruction* input = instruction->GetInput();
+ if (input->IsNeg()) {
+ // Replace code looking like
+ // NEG tmp, src
+ // NEG dst, tmp
+ // with
+ // src
+ HNeg* previous_neg = input->AsNeg();
+ instruction->ReplaceWith(previous_neg->GetInput());
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ // We perform the optimization even if the input negation has environment
+ // uses since it allows removing the current instruction. But we only delete
+ // the input negation only if it is does not have any uses left.
+ if (!previous_neg->HasUses()) {
+ previous_neg->GetBlock()->RemoveInstruction(previous_neg);
+ }
+ RecordSimplification();
+ return;
+ }
+
+ if (input->IsSub() && input->HasOnlyOneNonEnvironmentUse()) {
+ // Replace code looking like
+ // SUB tmp, a, b
+ // NEG dst, tmp
+ // with
+ // SUB dst, b, a
+ // We do not perform the optimization if the input subtraction has
+ // environment uses or multiple non-environment uses as it could lead to
+ // worse code. In particular, we do not want the live ranges of `a` and `b`
+ // to be extended if we are not sure the initial 'SUB' instruction can be
+ // removed.
+ HSub* sub = input->AsSub();
+ HSub* new_sub =
+ new (GetGraph()->GetArena()) HSub(instruction->GetType(), sub->GetRight(), sub->GetLeft());
+ instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, new_sub);
+ if (!sub->HasUses()) {
+ sub->GetBlock()->RemoveInstruction(sub);
+ }
+ RecordSimplification();
+ }
+}
+
+void InstructionSimplifierVisitor::VisitNot(HNot* instruction) {
+ HInstruction* input = instruction->GetInput();
+ if (input->IsNot()) {
+ // Replace code looking like
+ // NOT tmp, src
+ // NOT dst, tmp
+ // with
+ // src
+ // We perform the optimization even if the input negation has environment
+ // uses since it allows removing the current instruction. But we only delete
+ // the input negation only if it is does not have any uses left.
+ HNot* previous_not = input->AsNot();
+ instruction->ReplaceWith(previous_not->GetInput());
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ if (!previous_not->HasUses()) {
+ previous_not->GetBlock()->RemoveInstruction(previous_not);
+ }
+ RecordSimplification();
+ }
+}
+
void InstructionSimplifierVisitor::VisitOr(HOr* instruction) {
HConstant* input_cst = instruction->GetConstantRight();
HInstruction* input_other = instruction->GetLeastConstantLeft();
@@ -315,7 +483,7 @@
// We assume that GVN has run before, so we only perform a pointer comparison.
// If for some reason the values are equal but the pointers are different, we
- // are still correct and only miss an optimisation opportunity.
+ // are still correct and only miss an optimization opportunity.
if (instruction->GetLeft() == instruction->GetRight()) {
// Replace code looking like
// OR dst, src, src
@@ -356,20 +524,61 @@
HBasicBlock* block = instruction->GetBlock();
ArenaAllocator* allocator = GetGraph()->GetArena();
- if (instruction->GetLeft()->IsConstant()) {
- int64_t left = Int64FromConstant(instruction->GetLeft()->AsConstant());
- if (left == 0) {
+ HInstruction* left = instruction->GetLeft();
+ HInstruction* right = instruction->GetRight();
+ if (left->IsConstant()) {
+ if (Int64FromConstant(left->AsConstant()) == 0) {
// Replace code looking like
// SUB dst, 0, src
// with
// NEG dst, src
- // Note that we cannot optimise `0.0 - x` to `-x` for floating-point. When
+ // Note that we cannot optimize `0.0 - x` to `-x` for floating-point. When
// `x` is `0.0`, the former expression yields `0.0`, while the later
// yields `-0.0`.
- HNeg* neg = new (allocator) HNeg(type, instruction->GetRight());
+ HNeg* neg = new (allocator) HNeg(type, right);
block->ReplaceAndRemoveInstructionWith(instruction, neg);
+ RecordSimplification();
+ return;
}
}
+
+ if (left->IsNeg() && right->IsNeg()) {
+ if (TryMoveNegOnInputsAfterBinop(instruction)) {
+ return;
+ }
+ }
+
+ if (right->IsNeg() && right->HasOnlyOneNonEnvironmentUse()) {
+ // Replace code looking like
+ // NEG tmp, b
+ // SUB dst, a, tmp
+ // with
+ // ADD dst, a, b
+ HAdd* add = new(GetGraph()->GetArena()) HAdd(type, left, right->AsNeg()->GetInput());
+ instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, add);
+ RecordSimplification();
+ right->GetBlock()->RemoveInstruction(right);
+ return;
+ }
+
+ if (left->IsNeg() && left->HasOnlyOneNonEnvironmentUse()) {
+ // Replace code looking like
+ // NEG tmp, a
+ // SUB dst, tmp, b
+ // with
+ // ADD tmp, a, b
+ // NEG dst, tmp
+ // The second version is not intrinsically better, but enables more
+ // transformations.
+ HAdd* add = new(GetGraph()->GetArena()) HAdd(type, left->AsNeg()->GetInput(), right);
+ instruction->GetBlock()->InsertInstructionBefore(add, instruction);
+ HNeg* neg = new (GetGraph()->GetArena()) HNeg(instruction->GetType(), add);
+ instruction->GetBlock()->InsertInstructionBefore(neg, instruction);
+ instruction->ReplaceWith(neg);
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ RecordSimplification();
+ left->GetBlock()->RemoveInstruction(left);
+ }
}
void InstructionSimplifierVisitor::VisitUShr(HUShr* instruction) {
@@ -397,6 +606,7 @@
// NOT dst, src
HNot* bitwise_not = new (GetGraph()->GetArena()) HNot(instruction->GetType(), input_other);
instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, bitwise_not);
+ RecordSimplification();
return;
}
}
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index f764eb4..5f50494 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1177,6 +1177,9 @@
bool HasUses() const { return !uses_.IsEmpty() || !env_uses_.IsEmpty(); }
bool HasEnvironmentUses() const { return !env_uses_.IsEmpty(); }
bool HasNonEnvironmentUses() const { return !uses_.IsEmpty(); }
+ bool HasOnlyOneNonEnvironmentUse() const {
+ return !HasEnvironmentUses() && GetUses().HasOnlyOneUse();
+ }
// Does this instruction strictly dominate `other_instruction`?
// Returns false if this instruction and `other_instruction` are the same.
@@ -1214,6 +1217,13 @@
void ReplaceWith(HInstruction* instruction);
void ReplaceInput(HInstruction* replacement, size_t index);
+ // This is almost the same as doing `ReplaceWith()`. But in this helper, the
+ // uses of this instruction by `other` are *not* updated.
+ void ReplaceWithExceptInReplacementAtIndex(HInstruction* other, size_t use_index) {
+ ReplaceWith(other);
+ other->ReplaceInput(this, use_index);
+ }
+
// Move `this` instruction before `cursor`.
void MoveBefore(HInstruction* cursor);
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index f642dfc..c2b5c99 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -200,20 +200,6 @@
InstructionSetPointerSize(GetCompilerDriver()->GetInstructionSet())));
}
- bool WriteElf(art::File* file,
- OatWriter* oat_writer,
- const std::vector<const art::DexFile*>& dex_files,
- const std::string& android_root,
- bool is_host) const OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
- if (kProduce64BitELFFiles && Is64BitInstructionSet(GetCompilerDriver()->GetInstructionSet())) {
- return art::ElfWriterQuick64::Create(file, oat_writer, dex_files, android_root, is_host,
- *GetCompilerDriver());
- } else {
- return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host,
- *GetCompilerDriver());
- }
- }
-
void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE;
void Init() OVERRIDE;
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index b97a667..4d5b8d0 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -47,6 +47,7 @@
kNotCompiledUnhandledInstruction,
kRemovedCheckedCast,
kRemovedNullCheck,
+ kInstructionSimplifications,
kLastStat
};
@@ -110,6 +111,7 @@
case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction";
case kRemovedCheckedCast: return "kRemovedCheckedCast";
case kRemovedNullCheck: return "kRemovedNullCheck";
+ case kInstructionSimplifications: return "kInstructionSimplifications";
default: LOG(FATAL) << "invalid stat";
}
return "";
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 9df8f56..4936685 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -269,6 +269,20 @@
}
+int ParallelMoveResolver::AllocateScratchRegister(int blocked,
+ int register_count) {
+ int scratch = -1;
+ for (int reg = 0; reg < register_count; ++reg) {
+ if ((blocked != reg) && IsScratchLocation(Location::RegisterLocation(reg))) {
+ scratch = reg;
+ break;
+ }
+ }
+
+ return scratch;
+}
+
+
ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
ParallelMoveResolver* resolver, int blocked, int if_scratch, int number_of_registers)
: resolver_(resolver),
@@ -282,6 +296,16 @@
}
+ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
+ ParallelMoveResolver* resolver, int blocked, int number_of_registers)
+ : resolver_(resolver),
+ reg_(kNoRegister),
+ spilled_(false) {
+ // We don't want to spill a register if none are free.
+ reg_ = resolver_->AllocateScratchRegister(blocked, number_of_registers);
+}
+
+
ParallelMoveResolver::ScratchRegisterScope::~ScratchRegisterScope() {
if (spilled_) {
resolver_->RestoreScratch(reg_);
diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h
index 3fa1b37..173cffc 100644
--- a/compiler/optimizing/parallel_move_resolver.h
+++ b/compiler/optimizing/parallel_move_resolver.h
@@ -42,10 +42,15 @@
protected:
class ScratchRegisterScope : public ValueObject {
public:
+ // Spill a scratch register if no regs are free.
ScratchRegisterScope(ParallelMoveResolver* resolver,
int blocked,
int if_scratch,
int number_of_registers);
+ // Grab a scratch register only if available.
+ ScratchRegisterScope(ParallelMoveResolver* resolver,
+ int blocked,
+ int number_of_registers);
~ScratchRegisterScope();
int GetRegister() const { return reg_; }
@@ -62,6 +67,8 @@
// Allocate a scratch register for performing a move. The method will try to use
// a register that is the destination of a move, but that move has not been emitted yet.
int AllocateScratchRegister(int blocked, int if_scratch, int register_count, bool* spilled);
+ // As above, but return -1 if no free register.
+ int AllocateScratchRegister(int blocked, int register_count);
// Emit a move.
virtual void EmitMove(size_t index) = 0;
diff --git a/runtime/mirror/art_method-inl.h b/runtime/mirror/art_method-inl.h
index 0ccf5db..fb427dc 100644
--- a/runtime/mirror/art_method-inl.h
+++ b/runtime/mirror/art_method-inl.h
@@ -36,7 +36,7 @@
namespace mirror {
inline uint32_t ArtMethod::ClassSize() {
- uint32_t vtable_entries = Object::kVTableLength + 8;
+ uint32_t vtable_entries = Object::kVTableLength + 7;
return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 0, 0);
}
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index 22481ce..55b8068 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -183,6 +183,10 @@
SetField32<false>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, method_index_), new_method_index);
}
+ static MemberOffset DexMethodIndexOffset() {
+ return OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_method_index_);
+ }
+
static MemberOffset MethodIndexOffset() {
return OFFSET_OF_OBJECT_MEMBER(ArtMethod, method_index_);
}
@@ -214,6 +218,8 @@
return OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_cache_resolved_types_);
}
+ ALWAYS_INLINE ObjectArray<ArtMethod>* GetDexCacheResolvedMethods()
+ SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
ALWAYS_INLINE ArtMethod* GetDexCacheResolvedMethod(uint16_t method_idx)
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
ALWAYS_INLINE void SetDexCacheResolvedMethod(uint16_t method_idx, ArtMethod* new_method)
@@ -434,10 +440,6 @@
EntryPointFromJniOffset(pointer_size), entrypoint, pointer_size);
}
- static MemberOffset GetMethodIndexOffset() {
- return OFFSET_OF_OBJECT_MEMBER(ArtMethod, method_index_);
- }
-
// Is this a CalleSaveMethod or ResolutionMethod and therefore doesn't adhere to normal
// conventions for a method of managed code. Returns false for Proxy methods.
bool IsRuntimeMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -604,9 +606,6 @@
private:
ALWAYS_INLINE void CheckObjectSizeEqualsMirrorSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
- ALWAYS_INLINE ObjectArray<ArtMethod>* GetDexCacheResolvedMethods()
- SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
ALWAYS_INLINE ObjectArray<Class>* GetDexCacheResolvedTypes()
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/mirror/dex_cache-inl.h b/runtime/mirror/dex_cache-inl.h
index 288e88e..6758e22 100644
--- a/runtime/mirror/dex_cache-inl.h
+++ b/runtime/mirror/dex_cache-inl.h
@@ -27,7 +27,7 @@
namespace mirror {
inline uint32_t DexCache::ClassSize() {
- uint32_t vtable_entries = Object::kVTableLength + 1;
+ uint32_t vtable_entries = Object::kVTableLength + 5;
return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 0, 0);
}
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index c893f0a..b657aec 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -91,6 +91,18 @@
return soa.AddLocalReference<jclass>(c.Get());
}
+static jobject Class_findOverriddenMethodIfProxy(JNIEnv* env, jclass, jobject art_method) {
+ ScopedFastNativeObjectAccess soa(env);
+ mirror::ArtMethod* method = soa.Decode<mirror::ArtMethod*>(art_method);
+ mirror::Class* declaring_klass = method->GetDeclaringClass();
+ if (!declaring_klass->IsProxyClass()) {
+ return art_method;
+ }
+ uint32_t dex_method_index = method->GetDexMethodIndex();
+ mirror::ArtMethod* overriden_method = method->GetDexCacheResolvedMethods()->Get(dex_method_index);
+ return soa.AddLocalReference<jobject>(overriden_method);
+}
+
static jstring Class_getNameNative(JNIEnv* env, jobject javaThis) {
ScopedFastNativeObjectAccess soa(env);
StackHandleScope<1> hs(soa.Self());
@@ -264,6 +276,8 @@
static JNINativeMethod gMethods[] = {
NATIVE_METHOD(Class, classForName, "!(Ljava/lang/String;ZLjava/lang/ClassLoader;)Ljava/lang/Class;"),
+ NATIVE_METHOD(Class, findOverriddenMethodIfProxy,
+ "!(Ljava/lang/reflect/ArtMethod;)Ljava/lang/reflect/ArtMethod;"),
NATIVE_METHOD(Class, getNameNative, "!()Ljava/lang/String;"),
NATIVE_METHOD(Class, getProxyInterfaces, "!()[Ljava/lang/Class;"),
NATIVE_METHOD(Class, getDeclaredFields, "!()[Ljava/lang/reflect/Field;"),
diff --git a/runtime/native/java_lang_DexCache.cc b/runtime/native/java_lang_DexCache.cc
index 27eae46..8944270 100644
--- a/runtime/native/java_lang_DexCache.cc
+++ b/runtime/native/java_lang_DexCache.cc
@@ -48,8 +48,38 @@
args);
}
+static jobject DexCache_getResolvedType(JNIEnv* env, jobject javaDexCache, jint type_index) {
+ ScopedFastNativeObjectAccess soa(env);
+ mirror::DexCache* dex_cache = soa.Decode<mirror::DexCache*>(javaDexCache);
+ return soa.AddLocalReference<jobject>(dex_cache->GetResolvedType(type_index));
+}
+
+static jobject DexCache_getResolvedString(JNIEnv* env, jobject javaDexCache, jint string_index) {
+ ScopedFastNativeObjectAccess soa(env);
+ mirror::DexCache* dex_cache = soa.Decode<mirror::DexCache*>(javaDexCache);
+ return soa.AddLocalReference<jobject>(dex_cache->GetResolvedString(string_index));
+}
+
+static void DexCache_setResolvedType(JNIEnv* env, jobject javaDexCache, jint type_index,
+ jobject type) {
+ ScopedFastNativeObjectAccess soa(env);
+ mirror::DexCache* dex_cache = soa.Decode<mirror::DexCache*>(javaDexCache);
+ dex_cache->SetResolvedType(type_index, soa.Decode<mirror::Class*>(type));
+}
+
+static void DexCache_setResolvedString(JNIEnv* env, jobject javaDexCache, jint string_index,
+ jobject string) {
+ ScopedFastNativeObjectAccess soa(env);
+ mirror::DexCache* dex_cache = soa.Decode<mirror::DexCache*>(javaDexCache);
+ dex_cache->SetResolvedString(string_index, soa.Decode<mirror::String*>(string));
+}
+
static JNINativeMethod gMethods[] = {
NATIVE_METHOD(DexCache, getDexNative, "!()Lcom/android/dex/Dex;"),
+ NATIVE_METHOD(DexCache, getResolvedType, "!(I)Ljava/lang/Class;"),
+ NATIVE_METHOD(DexCache, getResolvedString, "!(I)Ljava/lang/String;"),
+ NATIVE_METHOD(DexCache, setResolvedType, "!(ILjava/lang/Class;)V"),
+ NATIVE_METHOD(DexCache, setResolvedString, "!(ILjava/lang/String;)V"),
};
void register_java_lang_DexCache(JNIEnv* env) {
diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java
index 1f0017e..3cbcebb 100644
--- a/test/458-checker-instruction-simplification/src/Main.java
+++ b/test/458-checker-instruction-simplification/src/Main.java
@@ -309,6 +309,457 @@
return arg ^ -1;
}
+ /**
+ * Test that addition or subtraction operation with both inputs negated are
+ * optimized to use a single negation after the operation.
+ * The transformation tested is implemented in
+ * `InstructionSimplifierVisitor::TryMoveNegOnInputsAfterBinop`.
+ */
+
+ // CHECK-START: int Main.AddNegs1(int, int) instruction_simplifier (before)
+ // CHECK-DAG: [[Arg1:i\d+]] ParameterValue
+ // CHECK-DAG: [[Arg2:i\d+]] ParameterValue
+ // CHECK-DAG: [[Neg1:i\d+]] Neg [ [[Arg1]] ]
+ // CHECK-DAG: [[Neg2:i\d+]] Neg [ [[Arg2]] ]
+ // CHECK-DAG: [[Add:i\d+]] Add [ [[Neg1]] [[Neg2]] ]
+ // CHECK-DAG: Return [ [[Add]] ]
+
+ // CHECK-START: int Main.AddNegs1(int, int) instruction_simplifier (after)
+ // CHECK-DAG: [[Arg1:i\d+]] ParameterValue
+ // CHECK-DAG: [[Arg2:i\d+]] ParameterValue
+ // CHECK-NOT: Neg
+ // CHECK-DAG: [[Add:i\d+]] Add [ [[Arg1]] [[Arg2]] ]
+ // CHECK-DAG: [[Neg:i\d+]] Neg [ [[Add]] ]
+ // CHECK-DAG: Return [ [[Neg]] ]
+
+ public static int AddNegs1(int arg1, int arg2) {
+ return -arg1 + -arg2;
+ }
+
+ /**
+ * This is similar to the test-case AddNegs1, but the negations have
+ * multiple uses.
+ * The transformation tested is implemented in
+ * `InstructionSimplifierVisitor::TryMoveNegOnInputsAfterBinop`.
+ * The current code won't perform the previous optimization. The
+ * transformations do not look at other uses of their inputs. As they don't
+ * know what will happen with other uses, they do not take the risk of
+ * increasing the register pressure by creating or extending live ranges.
+ */
+
+ // CHECK-START: int Main.AddNegs2(int, int) instruction_simplifier (before)
+ // CHECK-DAG: [[Arg1:i\d+]] ParameterValue
+ // CHECK-DAG: [[Arg2:i\d+]] ParameterValue
+ // CHECK-DAG: [[Neg1:i\d+]] Neg [ [[Arg1]] ]
+ // CHECK-DAG: [[Neg2:i\d+]] Neg [ [[Arg2]] ]
+ // CHECK-DAG: [[Add1:i\d+]] Add [ [[Neg1]] [[Neg2]] ]
+ // CHECK-DAG: [[Add2:i\d+]] Add [ [[Neg1]] [[Neg2]] ]
+ // CHECK-DAG: [[Or:i\d+]] Or [ [[Add1]] [[Add2]] ]
+ // CHECK-DAG: Return [ [[Or]] ]
+
+ // CHECK-START: int Main.AddNegs2(int, int) instruction_simplifier (after)
+ // CHECK-DAG: [[Arg1:i\d+]] ParameterValue
+ // CHECK-DAG: [[Arg2:i\d+]] ParameterValue
+ // CHECK-DAG: [[Neg1:i\d+]] Neg [ [[Arg1]] ]
+ // CHECK-DAG: [[Neg2:i\d+]] Neg [ [[Arg2]] ]
+ // CHECK-DAG: [[Add1:i\d+]] Add [ [[Neg1]] [[Neg2]] ]
+ // CHECK-DAG: [[Add2:i\d+]] Add [ [[Neg1]] [[Neg2]] ]
+ // CHECK-NOT: Neg
+ // CHECK-DAG: [[Or:i\d+]] Or [ [[Add1]] [[Add2]] ]
+ // CHECK-DAG: Return [ [[Or]] ]
+
+ public static int AddNegs2(int arg1, int arg2) {
+ int temp1 = -arg1;
+ int temp2 = -arg2;
+ return (temp1 + temp2) | (temp1 + temp2);
+ }
+
+ /**
+ * This follows test-cases AddNegs1 and AddNegs2.
+ * The transformation tested is implemented in
+ * `InstructionSimplifierVisitor::TryMoveNegOnInputsAfterBinop`.
+ * The optimization should not happen if it moves an additional instruction in
+ * the loop.
+ */
+
+ // CHECK-START: long Main.AddNegs3(long, long) instruction_simplifier (before)
+ // -------------- Arguments and initial negation operations.
+ // CHECK-DAG: [[Arg1:j\d+]] ParameterValue
+ // CHECK-DAG: [[Arg2:j\d+]] ParameterValue
+ // CHECK-DAG: [[Neg1:j\d+]] Neg [ [[Arg1]] ]
+ // CHECK-DAG: [[Neg2:j\d+]] Neg [ [[Arg2]] ]
+ // CHECK: Goto
+ // -------------- Loop
+ // CHECK: SuspendCheck
+ // CHECK: [[Add:j\d+]] Add [ [[Neg1]] [[Neg2]] ]
+ // CHECK: Goto
+
+ // CHECK-START: long Main.AddNegs3(long, long) instruction_simplifier (after)
+ // -------------- Arguments and initial negation operations.
+ // CHECK-DAG: [[Arg1:j\d+]] ParameterValue
+ // CHECK-DAG: [[Arg2:j\d+]] ParameterValue
+ // CHECK-DAG: [[Neg1:j\d+]] Neg [ [[Arg1]] ]
+ // CHECK-DAG: [[Neg2:j\d+]] Neg [ [[Arg2]] ]
+ // CHECK: Goto
+ // -------------- Loop
+ // CHECK: SuspendCheck
+ // CHECK: [[Add:j\d+]] Add [ [[Neg1]] [[Neg2]] ]
+ // CHECK-NOT: Neg
+ // CHECK: Goto
+
+ public static long AddNegs3(long arg1, long arg2) {
+ long res = 0;
+ long n_arg1 = -arg1;
+ long n_arg2 = -arg2;
+ for (long i = 0; i < 1; i++) {
+ res += n_arg1 + n_arg2 + i;
+ }
+ return res;
+ }
+
+ /**
+ * Test the simplification of an addition with a negated argument into a
+ * subtraction.
+ * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitAdd`.
+ */
+
+ // CHECK-START: long Main.AddNeg1(long, long) instruction_simplifier (before)
+ // CHECK-DAG: [[Arg1:j\d+]] ParameterValue
+ // CHECK-DAG: [[Arg2:j\d+]] ParameterValue
+ // CHECK-DAG: [[Neg:j\d+]] Neg [ [[Arg1]] ]
+ // CHECK-DAG: [[Add:j\d+]] Add [ [[Neg]] [[Arg2]] ]
+ // CHECK-DAG: Return [ [[Add]] ]
+
+ // CHECK-START: long Main.AddNeg1(long, long) instruction_simplifier (after)
+ // CHECK-DAG: [[Arg1:j\d+]] ParameterValue
+ // CHECK-DAG: [[Arg2:j\d+]] ParameterValue
+ // CHECK-DAG: [[Sub:j\d+]] Sub [ [[Arg2]] [[Arg1]] ]
+ // CHECK-DAG: Return [ [[Sub]] ]
+
+ // CHECK-START: long Main.AddNeg1(long, long) instruction_simplifier (after)
+ // CHECK-NOT: Neg
+ // CHECK-NOT: Add
+
+ public static long AddNeg1(long arg1, long arg2) {
+ return -arg1 + arg2;
+ }
+
+ /**
+ * This is similar to the test-case AddNeg1, but the negation has two uses.
+ * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitAdd`.
+ * The current code won't perform the previous optimization. The
+ * transformations do not look at other uses of their inputs. As they don't
+ * know what will happen with other uses, they do not take the risk of
+ * increasing the register pressure by creating or extending live ranges.
+ */
+
+ // CHECK-START: long Main.AddNeg2(long, long) instruction_simplifier (before)
+ // CHECK-DAG: [[Arg1:j\d+]] ParameterValue
+ // CHECK-DAG: [[Arg2:j\d+]] ParameterValue
+ // CHECK-DAG: [[Neg:j\d+]] Neg [ [[Arg2]] ]
+ // CHECK-DAG: [[Add1:j\d+]] Add [ [[Arg1]] [[Neg]] ]
+ // CHECK-DAG: [[Add2:j\d+]] Add [ [[Arg1]] [[Neg]] ]
+ // CHECK-DAG: [[Res:j\d+]] Or [ [[Add1]] [[Add2]] ]
+ // CHECK-DAG: Return [ [[Res]] ]
+
+ // CHECK-START: long Main.AddNeg2(long, long) instruction_simplifier (after)
+ // CHECK-DAG: [[Arg1:j\d+]] ParameterValue
+ // CHECK-DAG: [[Arg2:j\d+]] ParameterValue
+ // CHECK-DAG: [[Neg:j\d+]] Neg [ [[Arg2]] ]
+ // CHECK-DAG: [[Add1:j\d+]] Add [ [[Arg1]] [[Neg]] ]
+ // CHECK-DAG: [[Add2:j\d+]] Add [ [[Arg1]] [[Neg]] ]
+ // CHECK-DAG: [[Res:j\d+]] Or [ [[Add1]] [[Add2]] ]
+ // CHECK-DAG: Return [ [[Res]] ]
+
+ // CHECK-START: long Main.AddNeg2(long, long) instruction_simplifier (after)
+ // CHECK-NOT: Sub
+
+ public static long AddNeg2(long arg1, long arg2) {
+ long temp = -arg2;
+ return (arg1 + temp) | (arg1 + temp);
+ }
+
+ /**
+ * Test simplification of the `-(-var)` pattern.
+ * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitNeg`.
+ */
+
+ // CHECK-START: long Main.NegNeg1(long) instruction_simplifier (before)
+ // CHECK-DAG: [[Arg:j\d+]] ParameterValue
+ // CHECK-DAG: [[Neg1:j\d+]] Neg [ [[Arg]] ]
+ // CHECK-DAG: [[Neg2:j\d+]] Neg [ [[Neg1]] ]
+ // CHECK-DAG: Return [ [[Neg2]] ]
+
+ // CHECK-START: long Main.NegNeg1(long) instruction_simplifier (after)
+ // CHECK-DAG: [[Arg:j\d+]] ParameterValue
+ // CHECK-DAG: Return [ [[Arg]] ]
+
+ // CHECK-START: long Main.NegNeg1(long) instruction_simplifier (after)
+ // CHECK-NOT: Neg
+
+ public static long NegNeg1(long arg) {
+ return -(-arg);
+ }
+
+ /**
+ * Test 'multi-step' simplification, where a first transformation yields a
+ * new simplification possibility for the current instruction.
+ * The transformations tested are implemented in `InstructionSimplifierVisitor::VisitNeg`
+ * and in `InstructionSimplifierVisitor::VisitAdd`.
+ */
+
+ // CHECK-START: int Main.NegNeg2(int) instruction_simplifier (before)
+ // CHECK-DAG: [[Arg:i\d+]] ParameterValue
+ // CHECK-DAG: [[Neg1:i\d+]] Neg [ [[Arg]] ]
+ // CHECK-DAG: [[Neg2:i\d+]] Neg [ [[Neg1]] ]
+ // CHECK-DAG: [[Add:i\d+]] Add [ [[Neg1]] [[Neg2]] ]
+ // CHECK-DAG: Return [ [[Add]] ]
+
+ // CHECK-START: int Main.NegNeg2(int) instruction_simplifier (after)
+ // CHECK-DAG: [[Arg:i\d+]] ParameterValue
+ // CHECK-DAG: [[Sub:i\d+]] Sub [ [[Arg]] [[Arg]] ]
+ // CHECK-DAG: Return [ [[Sub]] ]
+
+ // CHECK-START: int Main.NegNeg2(int) instruction_simplifier (after)
+ // CHECK-NOT: Neg
+ // CHECK-NOT: Add
+
+ public static int NegNeg2(int arg) {
+ int temp = -arg;
+ return temp + -temp;
+ }
+
+ /**
+ * Test another 'multi-step' simplification, where a first transformation
+ * yields a new simplification possibility for the current instruction.
+ * The transformations tested are implemented in `InstructionSimplifierVisitor::VisitNeg`
+ * and in `InstructionSimplifierVisitor::VisitSub`.
+ */
+
+ // CHECK-START: long Main.NegNeg3(long) instruction_simplifier (before)
+ // CHECK-DAG: [[Arg:j\d+]] ParameterValue
+ // CHECK-DAG: [[Const0:j\d+]] LongConstant 0
+ // CHECK-DAG: [[Neg:j\d+]] Neg [ [[Arg]] ]
+ // CHECK-DAG: [[Sub:j\d+]] Sub [ [[Const0]] [[Neg]] ]
+ // CHECK-DAG: Return [ [[Sub]] ]
+
+ // CHECK-START: long Main.NegNeg3(long) instruction_simplifier (after)
+ // CHECK-DAG: [[Arg:j\d+]] ParameterValue
+ // CHECK-DAG: Return [ [[Arg]] ]
+
+ // CHECK-START: long Main.NegNeg3(long) instruction_simplifier (after)
+ // CHECK-NOT: Neg
+ // CHECK-NOT: Sub
+
+ public static long NegNeg3(long arg) {
+ return 0 - -arg;
+ }
+
+ /**
+ * Test that a negated subtraction is simplified to a subtraction with its
+ * arguments reversed.
+ * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitNeg`.
+ */
+
+ // CHECK-START: int Main.NegSub1(int, int) instruction_simplifier (before)
+ // CHECK-DAG: [[Arg1:i\d+]] ParameterValue
+ // CHECK-DAG: [[Arg2:i\d+]] ParameterValue
+ // CHECK-DAG: [[Sub:i\d+]] Sub [ [[Arg1]] [[Arg2]] ]
+ // CHECK-DAG: [[Neg:i\d+]] Neg [ [[Sub]] ]
+ // CHECK-DAG: Return [ [[Neg]] ]
+
+ // CHECK-START: int Main.NegSub1(int, int) instruction_simplifier (after)
+ // CHECK-DAG: [[Arg1:i\d+]] ParameterValue
+ // CHECK-DAG: [[Arg2:i\d+]] ParameterValue
+ // CHECK-DAG: [[Sub:i\d+]] Sub [ [[Arg2]] [[Arg1]] ]
+ // CHECK-DAG: Return [ [[Sub]] ]
+
+ // CHECK-START: int Main.NegSub1(int, int) instruction_simplifier (after)
+ // CHECK-NOT: Neg
+
+ public static int NegSub1(int arg1, int arg2) {
+ return -(arg1 - arg2);
+ }
+
+ /**
+ * This is similar to the test-case NegSub1, but the subtraction has
+ * multiple uses.
+ * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitNeg`.
+ * The current code won't perform the previous optimization. The
+ * transformations do not look at other uses of their inputs. As they don't
+ * know what will happen with other uses, they do not take the risk of
+ * increasing the register pressure by creating or extending live ranges.
+ */
+
+ // CHECK-START: int Main.NegSub2(int, int) instruction_simplifier (before)
+ // CHECK-DAG: [[Arg1:i\d+]] ParameterValue
+ // CHECK-DAG: [[Arg2:i\d+]] ParameterValue
+ // CHECK-DAG: [[Sub:i\d+]] Sub [ [[Arg1]] [[Arg2]] ]
+ // CHECK-DAG: [[Neg1:i\d+]] Neg [ [[Sub]] ]
+ // CHECK-DAG: [[Neg2:i\d+]] Neg [ [[Sub]] ]
+ // CHECK-DAG: [[Or:i\d+]] Or [ [[Neg1]] [[Neg2]] ]
+ // CHECK-DAG: Return [ [[Or]] ]
+
+ // CHECK-START: int Main.NegSub2(int, int) instruction_simplifier (after)
+ // CHECK-DAG: [[Arg1:i\d+]] ParameterValue
+ // CHECK-DAG: [[Arg2:i\d+]] ParameterValue
+ // CHECK-DAG: [[Sub:i\d+]] Sub [ [[Arg1]] [[Arg2]] ]
+ // CHECK-DAG: [[Neg1:i\d+]] Neg [ [[Sub]] ]
+ // CHECK-DAG: [[Neg2:i\d+]] Neg [ [[Sub]] ]
+ // CHECK-DAG: [[Or:i\d+]] Or [ [[Neg1]] [[Neg2]] ]
+ // CHECK-DAG: Return [ [[Or]] ]
+
+ public static int NegSub2(int arg1, int arg2) {
+ int temp = arg1 - arg2;
+ return -temp | -temp;
+ }
+
+ /**
+ * Test simplification of the `~~var` pattern.
+ * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitNot`.
+ */
+
+ // CHECK-START: long Main.NotNot1(long) instruction_simplifier (before)
+ // CHECK-DAG: [[Arg:j\d+]] ParameterValue
+ // CHECK-DAG: [[ConstF1:j\d+]] LongConstant -1
+ // CHECK-DAG: [[Xor1:j\d+]] Xor [ [[Arg]] [[ConstF1]] ]
+ // CHECK-DAG: [[Xor2:j\d+]] Xor [ [[Xor1]] [[ConstF1]] ]
+ // CHECK-DAG: Return [ [[Xor2]] ]
+
+ // CHECK-START: long Main.NotNot1(long) instruction_simplifier (after)
+ // CHECK-DAG: [[Arg:j\d+]] ParameterValue
+ // CHECK-DAG: Return [ [[Arg]] ]
+
+ // CHECK-START: long Main.NotNot1(long) instruction_simplifier (after)
+ // CHECK-NOT: Xor
+
+ public static long NotNot1(long arg) {
+ return ~~arg;
+ }
+
+ // CHECK-START: int Main.NotNot2(int) instruction_simplifier (before)
+ // CHECK-DAG: [[Arg:i\d+]] ParameterValue
+ // CHECK-DAG: [[ConstF1:i\d+]] IntConstant -1
+ // CHECK-DAG: [[Xor1:i\d+]] Xor [ [[Arg]] [[ConstF1]] ]
+ // CHECK-DAG: [[Xor2:i\d+]] Xor [ [[Xor1]] [[ConstF1]] ]
+ // CHECK-DAG: [[Add:i\d+]] Add [ [[Xor1]] [[Xor2]] ]
+ // CHECK-DAG: Return [ [[Add]] ]
+
+ // CHECK-START: int Main.NotNot2(int) instruction_simplifier (after)
+ // CHECK-DAG: [[Arg:i\d+]] ParameterValue
+ // CHECK-DAG: [[Not:i\d+]] Not [ [[Arg]] ]
+ // CHECK-DAG: [[Add:i\d+]] Add [ [[Not]] [[Arg]] ]
+ // CHECK-DAG: Return [ [[Add]] ]
+
+ // CHECK-START: int Main.NotNot2(int) instruction_simplifier (after)
+ // CHECK-NOT: Xor
+
+ public static int NotNot2(int arg) {
+ int temp = ~arg;
+ return temp + ~temp;
+ }
+
+ /**
+ * Test the simplification of a subtraction with a negated argument.
+ * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitSub`.
+ */
+
+ // CHECK-START: int Main.SubNeg1(int, int) instruction_simplifier (before)
+ // CHECK-DAG: [[Arg1:i\d+]] ParameterValue
+ // CHECK-DAG: [[Arg2:i\d+]] ParameterValue
+ // CHECK-DAG: [[Neg:i\d+]] Neg [ [[Arg1]] ]
+ // CHECK-DAG: [[Sub:i\d+]] Sub [ [[Neg]] [[Arg2]] ]
+ // CHECK-DAG: Return [ [[Sub]] ]
+
+ // CHECK-START: int Main.SubNeg1(int, int) instruction_simplifier (after)
+ // CHECK-DAG: [[Arg1:i\d+]] ParameterValue
+ // CHECK-DAG: [[Arg2:i\d+]] ParameterValue
+ // CHECK-DAG: [[Add:i\d+]] Add [ [[Arg1]] [[Arg2]] ]
+ // CHECK-DAG: [[Neg:i\d+]] Neg [ [[Add]] ]
+ // CHECK-DAG: Return [ [[Neg]] ]
+
+ // CHECK-START: int Main.SubNeg1(int, int) instruction_simplifier (after)
+ // CHECK-NOT: Sub
+
+ public static int SubNeg1(int arg1, int arg2) {
+ return -arg1 - arg2;
+ }
+
+ /**
+ * This is similar to the test-case SubNeg1, but the negation has
+ * multiple uses.
+ * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitSub`.
+ * The current code won't perform the previous optimization. The
+ * transformations do not look at other uses of their inputs. As they don't
+ * know what will happen with other uses, they do not take the risk of
+ * increasing the register pressure by creating or extending live ranges.
+ */
+
+ // CHECK-START: int Main.SubNeg2(int, int) instruction_simplifier (before)
+ // CHECK-DAG: [[Arg1:i\d+]] ParameterValue
+ // CHECK-DAG: [[Arg2:i\d+]] ParameterValue
+ // CHECK-DAG: [[Neg:i\d+]] Neg [ [[Arg1]] ]
+ // CHECK-DAG: [[Sub1:i\d+]] Sub [ [[Neg]] [[Arg2]] ]
+ // CHECK-DAG: [[Sub2:i\d+]] Sub [ [[Neg]] [[Arg2]] ]
+ // CHECK-DAG: [[Or:i\d+]] Or [ [[Sub1]] [[Sub2]] ]
+ // CHECK-DAG: Return [ [[Or]] ]
+
+ // CHECK-START: int Main.SubNeg2(int, int) instruction_simplifier (after)
+ // CHECK-DAG: [[Arg1:i\d+]] ParameterValue
+ // CHECK-DAG: [[Arg2:i\d+]] ParameterValue
+ // CHECK-DAG: [[Neg:i\d+]] Neg [ [[Arg1]] ]
+ // CHECK-DAG: [[Sub1:i\d+]] Sub [ [[Neg]] [[Arg2]] ]
+ // CHECK-DAG: [[Sub2:i\d+]] Sub [ [[Neg]] [[Arg2]] ]
+ // CHECK-DAG: [[Or:i\d+]] Or [ [[Sub1]] [[Sub2]] ]
+ // CHECK-DAG: Return [ [[Or]] ]
+
+ // CHECK-START: int Main.SubNeg2(int, int) instruction_simplifier (after)
+ // CHECK-NOT: Add
+
+ public static int SubNeg2(int arg1, int arg2) {
+ int temp = -arg1;
+ return (temp - arg2) | (temp - arg2);
+ }
+
+ /**
+ * This follows test-cases SubNeg1 and SubNeg2.
+ * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitSub`.
+ * The optimization should not happen if it moves an additional instruction in
+ * the loop.
+ */
+
+ // CHECK-START: long Main.SubNeg3(long, long) instruction_simplifier (before)
+ // -------------- Arguments and initial negation operation.
+ // CHECK-DAG: [[Arg1:j\d+]] ParameterValue
+ // CHECK-DAG: [[Arg2:j\d+]] ParameterValue
+ // CHECK-DAG: [[Neg:j\d+]] Neg [ [[Arg1]] ]
+ // CHECK: Goto
+ // -------------- Loop
+ // CHECK: SuspendCheck
+ // CHECK: [[Sub:j\d+]] Sub [ [[Neg]] [[Arg2]] ]
+ // CHECK: Goto
+
+ // CHECK-START: long Main.SubNeg3(long, long) instruction_simplifier (after)
+ // -------------- Arguments and initial negation operation.
+ // CHECK-DAG: [[Arg1:j\d+]] ParameterValue
+ // CHECK-DAG: [[Arg2:j\d+]] ParameterValue
+ // CHECK-DAG: [[Neg:j\d+]] Neg [ [[Arg1]] ]
+ // CHECK-DAG: Goto
+ // -------------- Loop
+ // CHECK: SuspendCheck
+ // CHECK: [[Sub:j\d+]] Sub [ [[Neg]] [[Arg2]] ]
+ // CHECK-NOT: Neg
+ // CHECK: Goto
+
+ public static long SubNeg3(long arg1, long arg2) {
+ long res = 0;
+ long temp = -arg1;
+ for (long i = 0; i < 1; i++) {
+ res += temp - arg2 - i;
+ }
+ return res;
+ }
+
public static void main(String[] args) {
int arg = 123456;
@@ -328,5 +779,20 @@
assertLongEquals(UShr0(arg), arg);
assertIntEquals(Xor0(arg), arg);
assertIntEquals(XorAllOnes(arg), ~arg);
+ assertIntEquals(AddNegs1(arg, arg + 1), -(arg + arg + 1));
+ assertIntEquals(AddNegs2(arg, arg + 1), -(arg + arg + 1));
+ assertLongEquals(AddNegs3(arg, arg + 1), -(2 * arg + 1));
+ assertLongEquals(AddNeg1(arg, arg + 1), 1);
+ assertLongEquals(AddNeg2(arg, arg + 1), -1);
+ assertLongEquals(NegNeg1(arg), arg);
+ assertIntEquals(NegNeg2(arg), 0);
+ assertLongEquals(NegNeg3(arg), arg);
+ assertIntEquals(NegSub1(arg, arg + 1), 1);
+ assertIntEquals(NegSub2(arg, arg + 1), 1);
+ assertLongEquals(NotNot1(arg), arg);
+ assertIntEquals(NotNot2(arg), -1);
+ assertIntEquals(SubNeg1(arg, arg + 1), -(arg + arg + 1));
+ assertIntEquals(SubNeg2(arg, arg + 1), -(arg + arg + 1));
+ assertLongEquals(SubNeg3(arg, arg + 1), -(2 * arg + 1));
}
}