Optimizing: Simplify consecutive type conversions.

Merge two consecutive type conversions to one if the result
of such merged conversion is guaranteed to be the same and
remove all implicit conversions, not just conversions to the
same type. Improve codegens to handle conversions from long
to integral types smaller than int.

This will make it easier to simplify `(byte) (x & 0xffL)` to
`(byte) x` where the conversion from long to byte is done by
two dex instructions, long-to-int and in int-to-byte.

Bug: 23965701
Change-Id: I833f193556671136ad2cd3f5b31cdfbc2d99c19d
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 005b6c1..87f52c6 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -2079,6 +2079,8 @@
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to byte is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
@@ -2097,6 +2099,8 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to short is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -2181,6 +2185,8 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to char is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -2280,6 +2286,10 @@
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to byte is a result of code transformations.
+          __ sbfx(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>(), 0, 8);
+          break;
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
@@ -2297,6 +2307,10 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to short is a result of code transformations.
+          __ sbfx(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>(), 0, 16);
+          break;
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -2398,6 +2412,10 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to char is a result of code transformations.
+          __ ubfx(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>(), 0, 16);
+          break;
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 71d65e8..119084e 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -3943,18 +3943,26 @@
         __ Andi(dst, src, 0xFFFF);
         break;
       case Primitive::kPrimByte:
-        // long is never converted into types narrower than int directly,
-        // so SEB and SEH can be used without ever causing unpredictable results
-        // on 64-bit inputs
-        DCHECK(input_type != Primitive::kPrimLong);
-        __ Seb(dst, src);
+        if (input_type == Primitive::kPrimLong) {
+          // Type conversion from long to types narrower than int is a result of code
+          // transformations. To avoid unpredictable results for SEB and SEH, we first
+          // need to sign-extend the low 32-bit value into bits 32 through 63.
+          __ Sll(dst, src, 0);
+          __ Seb(dst, dst);
+        } else {
+          __ Seb(dst, src);
+        }
         break;
       case Primitive::kPrimShort:
-        // long is never converted into types narrower than int directly,
-        // so SEB and SEH can be used without ever causing unpredictable results
-        // on 64-bit inputs
-        DCHECK(input_type != Primitive::kPrimLong);
-        __ Seh(dst, src);
+        if (input_type == Primitive::kPrimLong) {
+          // Type conversion from long to types narrower than int is a result of code
+          // transformations. To avoid unpredictable results for SEB and SEH, we first
+          // need to sign-extend the low 32-bit value into bits 32 through 63.
+          __ Sll(dst, src, 0);
+          __ Seh(dst, dst);
+        } else {
+          __ Seh(dst, src);
+        }
         break;
       case Primitive::kPrimInt:
       case Primitive::kPrimLong:
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 3713690..07edd97 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -2145,6 +2145,18 @@
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimLong: {
+          // Type conversion from long to byte is a result of code transformations.
+          HInstruction* input = conversion->InputAt(0);
+          Location input_location = input->IsConstant()
+              ? Location::ConstantLocation(input->AsConstant())
+              : Location::RegisterPairLocation(EAX, EDX);
+          locations->SetInAt(0, input_location);
+          // Make the output overlap to please the register allocator. This greatly simplifies
+          // the validation of the linear scan implementation
+          locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+          break;
+        }
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
@@ -2165,6 +2177,8 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to short is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -2242,6 +2256,8 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to char is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -2336,6 +2352,16 @@
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to byte is a result of code transformations.
+          if (in.IsRegisterPair()) {
+            __ movsxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
+          } else {
+            DCHECK(in.GetConstant()->IsLongConstant());
+            int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
+            __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
+          }
+          break;
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
@@ -2359,6 +2385,18 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to short is a result of code transformations.
+          if (in.IsRegisterPair()) {
+            __ movsxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
+          } else if (in.IsDoubleStackSlot()) {
+            __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
+          } else {
+            DCHECK(in.GetConstant()->IsLongConstant());
+            int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
+            __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
+          }
+          break;
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -2495,6 +2533,18 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to short is a result of code transformations.
+          if (in.IsRegisterPair()) {
+            __ movzxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
+          } else if (in.IsDoubleStackSlot()) {
+            __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
+          } else {
+            DCHECK(in.GetConstant()->IsLongConstant());
+            int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
+            __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
+          }
+          break;
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 35603aa..a5c386d 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -2363,6 +2363,8 @@
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to byte is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
@@ -2381,6 +2383,8 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to short is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -2458,6 +2462,8 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to char is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -2552,6 +2558,8 @@
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to byte is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimShort:
@@ -2560,13 +2568,12 @@
           // Processing a Dex `int-to-byte' instruction.
           if (in.IsRegister()) {
             __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
-          } else if (in.IsStackSlot()) {
+          } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
             __ movsxb(out.AsRegister<CpuRegister>(),
                       Address(CpuRegister(RSP), in.GetStackIndex()));
           } else {
-            DCHECK(in.GetConstant()->IsIntConstant());
             __ movl(out.AsRegister<CpuRegister>(),
-                    Immediate(static_cast<int8_t>(in.GetConstant()->AsIntConstant()->GetValue())));
+                    Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
           }
           break;
 
@@ -2578,6 +2585,8 @@
 
     case Primitive::kPrimShort:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to short is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -2586,13 +2595,12 @@
           // Processing a Dex `int-to-short' instruction.
           if (in.IsRegister()) {
             __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
-          } else if (in.IsStackSlot()) {
+          } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
             __ movsxw(out.AsRegister<CpuRegister>(),
                       Address(CpuRegister(RSP), in.GetStackIndex()));
           } else {
-            DCHECK(in.GetConstant()->IsIntConstant());
             __ movl(out.AsRegister<CpuRegister>(),
-                    Immediate(static_cast<int16_t>(in.GetConstant()->AsIntConstant()->GetValue())));
+                    Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
           }
           break;
 
@@ -2735,6 +2743,8 @@
 
     case Primitive::kPrimChar:
       switch (input_type) {
+        case Primitive::kPrimLong:
+          // Type conversion from long to char is a result of code transformations.
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
         case Primitive::kPrimByte:
@@ -2743,14 +2753,12 @@
           // Processing a Dex `int-to-char' instruction.
           if (in.IsRegister()) {
             __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
-          } else if (in.IsStackSlot()) {
+          } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
             __ movzxw(out.AsRegister<CpuRegister>(),
                       Address(CpuRegister(RSP), in.GetStackIndex()));
           } else {
-            DCHECK(in.GetConstant()->IsIntConstant());
             __ movl(out.AsRegister<CpuRegister>(),
-                    Immediate(static_cast<uint16_t>(
-                        in.GetConstant()->AsIntConstant()->GetValue())));
+                    Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
           }
           break;
 
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 0029cc3..98f8009 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -757,11 +757,79 @@
   }
 }
 
+static bool IsTypeConversionImplicit(Primitive::Type input_type, Primitive::Type result_type) {
+  // Besides conversion to the same type, widening integral conversions are implicit,
+  // excluding conversions to long and the byte->char conversion where we need to
+  // clear the high 16 bits of the 32-bit sign-extended representation of byte.
+  return result_type == input_type ||
+      (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimByte) ||
+      (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimShort) ||
+      (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimChar) ||
+      (result_type == Primitive::kPrimShort && input_type == Primitive::kPrimByte);
+}
+
+static bool IsTypeConversionLossless(Primitive::Type input_type, Primitive::Type result_type) {
+  // The conversion to a larger type is loss-less with the exception of two cases,
+  //   - conversion to char, the only unsigned type, where we may lose some bits, and
+  //   - conversion from float to long, the only FP to integral conversion with smaller FP type.
+  // For integral to FP conversions this holds because the FP mantissa is large enough.
+  DCHECK_NE(input_type, result_type);
+  return Primitive::ComponentSize(result_type) > Primitive::ComponentSize(input_type) &&
+      result_type != Primitive::kPrimChar &&
+      !(result_type == Primitive::kPrimLong && input_type == Primitive::kPrimFloat);
+}
+
 void InstructionSimplifierVisitor::VisitTypeConversion(HTypeConversion* instruction) {
-  if (instruction->GetResultType() == instruction->GetInputType()) {
-    // Remove the instruction if it's converting to the same type.
-    instruction->ReplaceWith(instruction->GetInput());
+  HInstruction* input = instruction->GetInput();
+  Primitive::Type input_type = input->GetType();
+  Primitive::Type result_type = instruction->GetResultType();
+  if (IsTypeConversionImplicit(input_type, result_type)) {
+    // Remove the implicit conversion; this includes conversion to the same type.
+    instruction->ReplaceWith(input);
     instruction->GetBlock()->RemoveInstruction(instruction);
+    RecordSimplification();
+    return;
+  }
+
+  if (input->IsTypeConversion()) {
+    HTypeConversion* input_conversion = input->AsTypeConversion();
+    HInstruction* original_input = input_conversion->GetInput();
+    Primitive::Type original_type = original_input->GetType();
+
+    // When the first conversion is lossless, a direct conversion from the original type
+    // to the final type yields the same result, even for a lossy second conversion, for
+    // example float->double->int or int->double->float.
+    bool is_first_conversion_lossless = IsTypeConversionLossless(original_type, input_type);
+
+    // For integral conversions, see if the first conversion loses only bits that the second
+    // doesn't need, i.e. the final type is no wider than the intermediate. If so, direct
+    // conversion yields the same result, for example long->int->short or int->char->short.
+    bool integral_conversions_with_non_widening_second =
+        Primitive::IsIntegralType(input_type) &&
+        Primitive::IsIntegralType(original_type) &&
+        Primitive::IsIntegralType(result_type) &&
+        Primitive::ComponentSize(result_type) <= Primitive::ComponentSize(input_type);
+
+    if (is_first_conversion_lossless || integral_conversions_with_non_widening_second) {
+      // If the merged conversion is implicit, do the simplification unconditionally.
+      if (IsTypeConversionImplicit(original_type, result_type)) {
+        instruction->ReplaceWith(original_input);
+        instruction->GetBlock()->RemoveInstruction(instruction);
+        if (!input_conversion->HasUses()) {
+          // Don't wait for DCE.
+          input_conversion->GetBlock()->RemoveInstruction(input_conversion);
+        }
+        RecordSimplification();
+        return;
+      }
+      // Otherwise simplify only if the first conversion has no other use.
+      if (input_conversion->HasOnlyOneNonEnvironmentUse()) {
+        input_conversion->ReplaceWith(original_input);
+        input_conversion->GetBlock()->RemoveInstruction(input_conversion);
+        RecordSimplification();
+        return;
+      }
+    }
   }
 }