X86: Use short forward jumps if possible The optimizing compiler uses 32 bit relative jumps for all forward jumps, just in case the offset is too large to fit in one byte. Some of the generated code knows that the jumps will in fact fit. Use the 'NearLabel' class for the code generator and intrinsics. Use the jecxz/jrcxz instruction for string intrinsics. Unfortunately, conditional jumps to basic blocks don't know enough to use this, as we don't know how much code will be generated. This saves a whopping 0.24% for core.oat and boot.oat sizes, but every little bit helps, and it reduces icache footprint slightly. Change-Id: I633fe3b2e0e810b4ce12fdad8c02135644b63506 Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>

commit: 0c9497da9485ba688c592e5f452b7b1305a519c0 [log] [tgz]
author: Mark Mendell <mark.p.mendell@intel.com> Fri Aug 21 09:30:05 2015 -0400
committer: Mark Mendell <mark.p.mendell@intel.com> Wed Aug 26 11:18:59 2015 -0400
tree: 7ebcf9c4ebea923131bf6064fd5924d54306e2b7
parent: 73f455ecb76d063846a82735eb80596ceee8cee3 [diff] [blame]
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 820ec78..4aae037 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc

@@ -1303,7 +1303,7 @@
   }
 
   // Convert the jumps into the result.
-  Label done_label;
+  NearLabel done_label;
 
   // False case: result = 0.
   __ Bind(&false_label);
@@ -1392,7 +1392,7 @@
   Location left = locations->InAt(0);
   Location right = locations->InAt(1);
 
-  Label less, greater, done;
+  NearLabel less, greater, done;
   Primitive::Type type = compare->InputAt(0)->GetType();
   switch (type) {
     case Primitive::kPrimLong: {
@@ -2117,7 +2117,7 @@
           // Processing a Dex `float-to-int' instruction.
           XmmRegister input = in.AsFpuRegister<XmmRegister>();
           CpuRegister output = out.AsRegister<CpuRegister>();
-          Label done, nan;
+          NearLabel done, nan;
 
           __ movl(output, Immediate(kPrimIntMax));
           // if input >= (float)INT_MAX goto done
@@ -2139,7 +2139,7 @@
           // Processing a Dex `double-to-int' instruction.
           XmmRegister input = in.AsFpuRegister<XmmRegister>();
           CpuRegister output = out.AsRegister<CpuRegister>();
-          Label done, nan;
+          NearLabel done, nan;
 
           __ movl(output, Immediate(kPrimIntMax));
           // if input >= (double)INT_MAX goto done
@@ -2181,7 +2181,7 @@
           // Processing a Dex `float-to-long' instruction.
           XmmRegister input = in.AsFpuRegister<XmmRegister>();
           CpuRegister output = out.AsRegister<CpuRegister>();
-          Label done, nan;
+          NearLabel done, nan;
 
           codegen_->Load64BitValue(output, kPrimLongMax);
           // if input >= (float)LONG_MAX goto done
@@ -2203,7 +2203,7 @@
           // Processing a Dex `double-to-long' instruction.
           XmmRegister input = in.AsFpuRegister<XmmRegister>();
           CpuRegister output = out.AsRegister<CpuRegister>();
-          Label done, nan;
+          NearLabel done, nan;
 
           codegen_->Load64BitValue(output, kPrimLongMax);
           // if input >= (double)LONG_MAX goto done
@@ -2766,7 +2766,7 @@
   PushOntoFPStack(first, 0, 2 * elem_size, is_float);
 
   // Loop doing FPREM until we stabilize.
-  Label retry;
+  NearLabel retry;
   __ Bind(&retry);
   __ fprem();
 
@@ -2920,8 +2920,8 @@
 
     __ movl(numerator, eax);
 
-    Label no_div;
-    Label end;
+    NearLabel no_div;
+    NearLabel end;
     __ testl(eax, eax);
     __ j(kNotEqual, &no_div);
 
@@ -4235,7 +4235,7 @@
                                      CpuRegister object,
                                      CpuRegister value,
                                      bool value_can_be_null) {
-  Label is_null;
+  NearLabel is_null;
   if (value_can_be_null) {
     __ testl(value, value);
     __ j(kEqual, &is_null);
@@ -4662,7 +4662,7 @@
   Location cls = locations->InAt(1);
   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  Label done, zero;
+  NearLabel done, zero;
   SlowPathCodeX86_64* slow_path = nullptr;
 
   // Return 0 if `obj` is null.
commit	0c9497da9485ba688c592e5f452b7b1305a519c0	[log] [tgz]
author	Mark Mendell <mark.p.mendell@intel.com>	Fri Aug 21 09:30:05 2015 -0400
committer	Mark Mendell <mark.p.mendell@intel.com>	Wed Aug 26 11:18:59 2015 -0400
tree	7ebcf9c4ebea923131bf6064fd5924d54306e2b7
parent	73f455ecb76d063846a82735eb80596ceee8cee3 [diff] [blame]