Implement register allocator for floating point registers.
Also:
- Fix misuses of emitting the rex prefix in the x86_64 assembler.
- Fix movaps code generation in the x86_64 assembler.
Change-Id: Ib6dcf6e7c4a9c43368cfc46b02ba50f69ae69cbe
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 9df9d41..9e63f8b 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -191,12 +191,24 @@
stream << X86_64ManagedRegister::FromXmmRegister(FloatRegister(reg));
}
-void CodeGeneratorX86_64::SaveCoreRegister(Location stack_location, uint32_t reg_id) {
- __ movq(Address(CpuRegister(RSP), stack_location.GetStackIndex()), CpuRegister(reg_id));
+size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
+ __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
+ return kX86_64WordSize;
}
-void CodeGeneratorX86_64::RestoreCoreRegister(Location stack_location, uint32_t reg_id) {
- __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_location.GetStackIndex()));
+size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
+ __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
+ return kX86_64WordSize;
+}
+
+size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+ __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
+ return kX86_64WordSize;
+}
+
+size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+ __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
+ return kX86_64WordSize;
}
CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph)
@@ -727,6 +739,26 @@
// Will be generated at use site.
}
+void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+ locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant) {
+ // Will be generated at use site.
+}
+
+void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+ locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
+ // Will be generated at use site.
+}
+
void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
ret->SetLocations(nullptr);
}
@@ -995,7 +1027,7 @@
case Primitive::kPrimDouble:
case Primitive::kPrimFloat: {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::Any());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
locations->SetOut(Location::SameAsFirstInput());
break;
}
@@ -1032,21 +1064,12 @@
}
case Primitive::kPrimFloat: {
- if (second.IsFpuRegister()) {
- __ addss(first.As<XmmRegister>(), second.As<XmmRegister>());
- } else {
- __ addss(first.As<XmmRegister>(),
- Address(CpuRegister(RSP), second.GetStackIndex()));
- }
+ __ addss(first.As<XmmRegister>(), second.As<XmmRegister>());
break;
}
case Primitive::kPrimDouble: {
- if (second.IsFpuRegister()) {
- __ addsd(first.As<XmmRegister>(), second.As<XmmRegister>());
- } else {
- __ addsd(first.As<XmmRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
- }
+ __ addsd(first.As<XmmRegister>(), second.As<XmmRegister>());
break;
}
@@ -1482,10 +1505,30 @@
break;
}
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- LOG(FATAL) << "Unimplemented register type " << instruction->GetType();
- UNREACHABLE();
+ case Primitive::kPrimFloat: {
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
+ XmmRegister out = locations->Out().As<XmmRegister>();
+ if (index.IsConstant()) {
+ __ movss(out, Address(obj,
+ (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
+ } else {
+ __ movss(out, Address(obj, index.As<CpuRegister>(), TIMES_4, data_offset));
+ }
+ break;
+ }
+
+ case Primitive::kPrimDouble: {
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
+ XmmRegister out = locations->Out().As<XmmRegister>();
+ if (index.IsConstant()) {
+ __ movsd(out, Address(obj,
+ (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
+ } else {
+ __ movsd(out, Address(obj, index.As<CpuRegister>(), TIMES_8, data_offset));
+ }
+ break;
+ }
+
case Primitive::kPrimVoid:
LOG(FATAL) << "Unreachable type " << instruction->GetType();
UNREACHABLE();
@@ -1509,6 +1552,8 @@
locations->SetInAt(2, Location::RequiresRegister());
if (value_type == Primitive::kPrimLong) {
locations->SetInAt(2, Location::RequiresRegister());
+ } else if (value_type == Primitive::kPrimFloat || value_type == Primitive::kPrimDouble) {
+ locations->SetInAt(2, Location::RequiresFpuRegister());
} else {
locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
}
@@ -1581,6 +1626,7 @@
__ movl(Address(obj, index.As<CpuRegister>(), TIMES_4, data_offset),
value.As<CpuRegister>());
} else {
+ DCHECK(value.IsConstant()) << value;
__ movl(Address(obj, index.As<CpuRegister>(), TIMES_4, data_offset),
Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
}
@@ -1609,10 +1655,34 @@
break;
}
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- LOG(FATAL) << "Unimplemented register type " << instruction->GetType();
- UNREACHABLE();
+ case Primitive::kPrimFloat: {
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
+ if (index.IsConstant()) {
+ size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+ DCHECK(value.IsFpuRegister());
+ __ movss(Address(obj, offset), value.As<XmmRegister>());
+ } else {
+ DCHECK(value.IsFpuRegister());
+ __ movss(Address(obj, index.As<CpuRegister>(), TIMES_4, data_offset),
+ value.As<XmmRegister>());
+ }
+ break;
+ }
+
+ case Primitive::kPrimDouble: {
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
+ if (index.IsConstant()) {
+ size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+ DCHECK(value.IsFpuRegister());
+ __ movsd(Address(obj, offset), value.As<XmmRegister>());
+ } else {
+ DCHECK(value.IsFpuRegister());
+ __ movsd(Address(obj, index.As<CpuRegister>(), TIMES_8, data_offset),
+ value.As<XmmRegister>());
+ }
+ break;
+ }
+
case Primitive::kPrimVoid:
LOG(FATAL) << "Unreachable type " << instruction->GetType();
UNREACHABLE();
@@ -1746,6 +1816,9 @@
if (destination.IsRegister()) {
__ movl(destination.As<CpuRegister>(),
Address(CpuRegister(RSP), source.GetStackIndex()));
+ } else if (destination.IsFpuRegister()) {
+ __ movss(destination.As<XmmRegister>(),
+ Address(CpuRegister(RSP), source.GetStackIndex()));
} else {
DCHECK(destination.IsStackSlot());
__ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
@@ -1755,6 +1828,8 @@
if (destination.IsRegister()) {
__ movq(destination.As<CpuRegister>(),
Address(CpuRegister(RSP), source.GetStackIndex()));
+ } else if (destination.IsFpuRegister()) {
+ __ movsd(destination.As<XmmRegister>(), Address(CpuRegister(RSP), source.GetStackIndex()));
} else {
DCHECK(destination.IsDoubleStackSlot());
__ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
@@ -1767,6 +1842,7 @@
if (destination.IsRegister()) {
__ movl(destination.As<CpuRegister>(), imm);
} else {
+ DCHECK(destination.IsStackSlot()) << destination;
__ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
}
} else if (constant->IsLongConstant()) {
@@ -1774,14 +1850,42 @@
if (destination.IsRegister()) {
__ movq(destination.As<CpuRegister>(), Immediate(value));
} else {
+ DCHECK(destination.IsDoubleStackSlot()) << destination;
__ movq(CpuRegister(TMP), Immediate(value));
__ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
}
+ } else if (constant->IsFloatConstant()) {
+ Immediate imm(bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue()));
+ if (destination.IsFpuRegister()) {
+ __ movl(CpuRegister(TMP), imm);
+ __ movd(destination.As<XmmRegister>(), CpuRegister(TMP));
+ } else {
+ DCHECK(destination.IsStackSlot()) << destination;
+ __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
+ }
} else {
- LOG(FATAL) << "Unimplemented constant type";
+ DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
+ Immediate imm(bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue()));
+ if (destination.IsFpuRegister()) {
+ __ movq(CpuRegister(TMP), imm);
+ __ movd(destination.As<XmmRegister>(), CpuRegister(TMP));
+ } else {
+ DCHECK(destination.IsDoubleStackSlot()) << destination;
+ __ movq(CpuRegister(TMP), imm);
+ __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
+ }
}
- } else {
- LOG(FATAL) << "Unimplemented";
+ } else if (source.IsFpuRegister()) {
+ if (destination.IsFpuRegister()) {
+ __ movaps(destination.As<XmmRegister>(), source.As<XmmRegister>());
+ } else if (destination.IsStackSlot()) {
+ __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
+ source.As<XmmRegister>());
+ } else {
+ DCHECK(destination.IsDoubleStackSlot());
+ __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
+ source.As<XmmRegister>());
+ }
}
}
@@ -1823,6 +1927,18 @@
CpuRegister(ensure_scratch.GetRegister()));
}
+void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
+ __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
+ __ movss(Address(CpuRegister(RSP), mem), reg);
+ __ movd(reg, CpuRegister(TMP));
+}
+
+void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
+ __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
+ __ movsd(Address(CpuRegister(RSP), mem), reg);
+ __ movd(reg, CpuRegister(TMP));
+}
+
void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
MoveOperands* move = moves_.Get(index);
Location source = move->GetSource();
@@ -1842,8 +1958,20 @@
Exchange64(destination.As<CpuRegister>(), source.GetStackIndex());
} else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
Exchange64(destination.GetStackIndex(), source.GetStackIndex());
+ } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
+ __ movd(CpuRegister(TMP), source.As<XmmRegister>());
+ __ movaps(source.As<XmmRegister>(), destination.As<XmmRegister>());
+ __ movd(destination.As<XmmRegister>(), CpuRegister(TMP));
+ } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
+ Exchange32(source.As<XmmRegister>(), destination.GetStackIndex());
+ } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
+ Exchange32(destination.As<XmmRegister>(), source.GetStackIndex());
+ } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
+ Exchange64(source.As<XmmRegister>(), destination.GetStackIndex());
+ } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
+ Exchange64(destination.As<XmmRegister>(), source.GetStackIndex());
} else {
- LOG(FATAL) << "Unimplemented";
+ LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
}
}