ARM32: Combine LSR into ADD for Div by 2^n

Combining LSR into ADD reduces a number of used registers from three to
two and a needed number of instructions.

This CL implements this optimization.

Test: 411-checker-hdiv-hrem-pow2
Test: test.py --host --optimizing --jit --gtest --interpreter
Test: test.py --32 --target --optimizing --jit --interpreter
Test: run-gtests.sh
Change-Id: I230d2623e745884fe4278a860740829ee63750c6
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index cafb601..551de55 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -4222,18 +4222,16 @@
 
   vixl32::Register out = OutputRegister(instruction);
   vixl32::Register dividend = InputRegisterAt(instruction, 0);
-  vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
   int32_t imm = Int32ConstantFrom(second);
   uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
   int ctz_imm = CTZ(abs_imm);
 
-  if (ctz_imm == 1) {
-    __ Lsr(temp, dividend, 32 - ctz_imm);
-  } else {
-    __ Asr(temp, dividend, 31);
-    __ Lsr(temp, temp, 32 - ctz_imm);
+  vixl32::Register add_right_input = dividend;
+  if (ctz_imm > 1) {
+    __ Asr(out, dividend, 31);
+    add_right_input = out;
   }
-  __ Add(out, temp, dividend);
+  __ Add(out, dividend, Operand(add_right_input, vixl32::LSR, 32 - ctz_imm));
 
   if (instruction->IsDiv()) {
     __ Asr(out, out, ctz_imm);
@@ -4241,8 +4239,8 @@
       __ Rsb(out, out, 0);
     }
   } else {
-    __ Ubfx(out, out, 0, ctz_imm);
-    __ Sub(out, out, temp);
+    __ Bfc(out, 0, ctz_imm);
+    __ Sub(out, dividend, out);
   }
 }
 
@@ -4329,16 +4327,17 @@
       if (div->InputAt(1)->IsConstant()) {
         locations->SetInAt(0, Location::RequiresRegister());
         locations->SetInAt(1, Location::ConstantLocation(div->InputAt(1)->AsConstant()));
-        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
         int32_t value = Int32ConstantFrom(div->InputAt(1));
+        Location::OutputOverlap out_overlaps = Location::kNoOutputOverlap;
         if (value == 1 || value == 0 || value == -1) {
           // No temp register required.
+        } else if (IsPowerOfTwo(AbsOrMin(value))) {
+          // The "out" register is used as a temporary, so it overlaps with the inputs.
+          out_overlaps = Location::kOutputOverlap;
         } else {
-          locations->AddTemp(Location::RequiresRegister());
-          if (!IsPowerOfTwo(AbsOrMin(value))) {
-            locations->AddTemp(Location::RequiresRegister());
-          }
+          locations->AddRegisterTemps(2);
         }
+        locations->SetOut(Location::RequiresRegister(), out_overlaps);
       } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
         locations->SetInAt(0, Location::RequiresRegister());
         locations->SetInAt(1, Location::RequiresRegister());
@@ -4442,16 +4441,17 @@
       if (rem->InputAt(1)->IsConstant()) {
         locations->SetInAt(0, Location::RequiresRegister());
         locations->SetInAt(1, Location::ConstantLocation(rem->InputAt(1)->AsConstant()));
-        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
         int32_t value = Int32ConstantFrom(rem->InputAt(1));
+        Location::OutputOverlap out_overlaps = Location::kNoOutputOverlap;
         if (value == 1 || value == 0 || value == -1) {
           // No temp register required.
+        } else if (IsPowerOfTwo(AbsOrMin(value))) {
+          // The "out" register is used as a temporary, so it overlaps with the inputs.
+          out_overlaps = Location::kOutputOverlap;
         } else {
-          locations->AddTemp(Location::RequiresRegister());
-          if (!IsPowerOfTwo(AbsOrMin(value))) {
-            locations->AddTemp(Location::RequiresRegister());
-          }
+          locations->AddRegisterTemps(2);
         }
+        locations->SetOut(Location::RequiresRegister(), out_overlaps);
       } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
         locations->SetInAt(0, Location::RequiresRegister());
         locations->SetInAt(1, Location::RequiresRegister());
diff --git a/test/411-checker-hdiv-hrem-pow2/src/DivTest.java b/test/411-checker-hdiv-hrem-pow2/src/DivTest.java
index 1a086ef..e9e338c 100644
--- a/test/411-checker-hdiv-hrem-pow2/src/DivTest.java
+++ b/test/411-checker-hdiv-hrem-pow2/src/DivTest.java
@@ -91,9 +91,14 @@
     expectEquals(0, $noinline$IntDivByIntMin(Integer.MAX_VALUE));
   }
 
+  /// CHECK-START-ARM:   java.lang.Integer DivTest.$noinline$IntDivBy2(int) disassembly (after)
+  /// CHECK:                 add       r{{\d+}}, r{{\d+}}, r{{\d+}}, lsr #31
+  /// CHECK:                 asr{{s?}} r{{\d+}}, #1
+  //
   /// CHECK-START-ARM64: java.lang.Integer DivTest.$noinline$IntDivBy2(int) disassembly (after)
   /// CHECK:                 add w{{\d+}}, w{{\d+}}, w{{\d+}}, lsr #31
   /// CHECK:                 asr w{{\d+}}, w{{\d+}}, #1
+  //
   /// CHECK-START-X86_64: java.lang.Integer DivTest.$noinline$IntDivBy2(int) disassembly (after)
   /// CHECK-NOT:             cmovnl/geq
   /// CHECK:                 add
@@ -102,9 +107,15 @@
     return r;
   }
 
+  /// CHECK-START-ARM:   java.lang.Integer DivTest.$noinline$IntDivByMinus2(int) disassembly (after)
+  /// CHECK:                 add       r{{\d+}}, r{{\d+}}, r{{\d+}}, lsr #31
+  /// CHECK:                 asr{{s?}} r{{\d+}}, #1
+  /// CHECK:                 rsb{{s?}} r{{\d+}}, #0
+  //
   /// CHECK-START-ARM64: java.lang.Integer DivTest.$noinline$IntDivByMinus2(int) disassembly (after)
   /// CHECK:                 add w{{\d+}}, w{{\d+}}, w{{\d+}}, lsr #31
   /// CHECK:                 neg w{{\d+}}, w{{\d+}}, asr #1
+  //
   /// CHECK-START-X86_64: java.lang.Integer DivTest.$noinline$IntDivByMinus2(int) disassembly (after)
   /// CHECK-NOT:             cmovnl/geq
   /// CHECK:                 add
@@ -113,6 +124,11 @@
     return r;
   }
 
+  /// CHECK-START-ARM:   java.lang.Integer DivTest.$noinline$IntDivBy16(int) disassembly (after)
+  /// CHECK:                 asr{{s?}} r{{\d+}}, r{{\d+}}, #31
+  /// CHECK:                 add       r{{\d+}}, r{{\d+}}, r{{\d+}}, lsr #28
+  /// CHECK:                 asr{{s?}} r{{\d+}}, #4
+  //
   /// CHECK-START-ARM64: java.lang.Integer DivTest.$noinline$IntDivBy16(int) disassembly (after)
   /// CHECK:                add w{{\d+}}, w{{\d+}}, #0xf
   /// CHECK:                cmp w{{\d+}}, #0x0
@@ -123,6 +139,12 @@
     return r;
   }
 
+  /// CHECK-START-ARM:   java.lang.Integer DivTest.$noinline$IntDivByMinus16(int) disassembly (after)
+  /// CHECK:                 asr{{s?}} r{{\d+}}, r{{\d+}}, #31
+  /// CHECK:                 add       r{{\d+}}, r{{\d+}}, r{{\d+}}, lsr #28
+  /// CHECK:                 asr{{s?}} r{{\d+}}, #4
+  /// CHECK:                 rsb{{s?}} r{{\d+}}, #0
+  //
   /// CHECK-START-ARM64: java.lang.Integer DivTest.$noinline$IntDivByMinus16(int) disassembly (after)
   /// CHECK:                add w{{\d+}}, w{{\d+}}, #0xf
   /// CHECK:                cmp w{{\d+}}, #0x0
@@ -133,6 +155,12 @@
     return r;
   }
 
+  /// CHECK-START-ARM:   java.lang.Integer DivTest.$noinline$IntDivByIntMin(int) disassembly (after)
+  /// CHECK:                 asr{{s?}} r{{\d+}}, r{{\d+}}, #31
+  /// CHECK:                 add       r{{\d+}}, r{{\d+}}, r{{\d+}}, lsr #1
+  /// CHECK:                 asr{{s?}} r{{\d+}}, #31
+  /// CHECK:                 rsb{{s?}} r{{\d+}}, #0
+  //
   /// CHECK-START-ARM64: java.lang.Integer DivTest.$noinline$IntDivByIntMin(int) disassembly (after)
   /// CHECK:                mov w{{\d+}}, #0x7fffffff
   /// CHECK:                add w{{\d+}}, w{{\d+}}, w{{\d+}}
diff --git a/test/411-checker-hdiv-hrem-pow2/src/RemTest.java b/test/411-checker-hdiv-hrem-pow2/src/RemTest.java
index 54d7847..1d67c33 100644
--- a/test/411-checker-hdiv-hrem-pow2/src/RemTest.java
+++ b/test/411-checker-hdiv-hrem-pow2/src/RemTest.java
@@ -88,10 +88,16 @@
     expectEquals(Integer.MAX_VALUE, $noinline$IntModIntMin(Integer.MAX_VALUE));
   }
 
+  /// CHECK-START-ARM:   java.lang.Integer RemTest.$noinline$IntMod2(int) disassembly (after)
+  /// CHECK:                 add       r{{\d+}}, r{{\d+}}, r{{\d+}}, lsr #31
+  /// CHECK:                 bfc       r{{\d+}}, #0, #1
+  /// CHECK:                 sub{{s?}} r{{\d+}}, r{{\d+}}, r{{\d+}}
+  //
   /// CHECK-START-ARM64: java.lang.Integer RemTest.$noinline$IntMod2(int) disassembly (after)
   /// CHECK:                 cmp w{{\d+}}, #0x0
   /// CHECK:                 and w{{\d+}}, w{{\d+}}, #0x1
   /// CHECK:                 cneg w{{\d+}}, w{{\d+}}, lt
+  //
   /// CHECK-START-X86_64: java.lang.Integer RemTest.$noinline$IntMod2(int) disassembly (after)
   /// CHECK:          Rem [{{i\d+}},{{i\d+}}]
   /// CHECK-NOT:      imul
@@ -108,10 +114,16 @@
     return r;
   }
 
+  /// CHECK-START-ARM:   java.lang.Integer RemTest.$noinline$IntModMinus2(int) disassembly (after)
+  /// CHECK:                 add       r{{\d+}}, r{{\d+}}, r{{\d+}}, lsr #31
+  /// CHECK:                 bfc       r{{\d+}}, #0, #1
+  /// CHECK:                 sub{{s?}} r{{\d+}}, r{{\d+}}, r{{\d+}}
+  //
   /// CHECK-START-ARM64: java.lang.Integer RemTest.$noinline$IntModMinus2(int) disassembly (after)
   /// CHECK:                 cmp w{{\d+}}, #0x0
   /// CHECK:                 and w{{\d+}}, w{{\d+}}, #0x1
   /// CHECK:                 cneg w{{\d+}}, w{{\d+}}, lt
+  //
   /// CHECK-START-X86_64: java.lang.Integer RemTest.$noinline$IntModMinus2(int) disassembly (after)
   /// CHECK:          Rem [{{i\d+}},{{i\d+}}]
   /// CHECK-NOT:      imul
@@ -128,11 +140,18 @@
     return r;
   }
 
+  /// CHECK-START-ARM:   java.lang.Integer RemTest.$noinline$IntMod16(int) disassembly (after)
+  /// CHECK:                 asr{{s?}} r{{\d+}}, r{{\d+}}, #31
+  /// CHECK:                 add       r{{\d+}}, r{{\d+}}, r{{\d+}}, lsr #28
+  /// CHECK:                 bfc       r{{\d+}}, #0, #4
+  /// CHECK:                 sub{{s?}} r{{\d+}}, r{{\d+}}, r{{\d+}}
+  //
   /// CHECK-START-ARM64: java.lang.Integer RemTest.$noinline$IntMod16(int) disassembly (after)
   /// CHECK:                 negs w{{\d+}}, w{{\d+}}
   /// CHECK:                 and w{{\d+}}, w{{\d+}}, #0xf
   /// CHECK:                 and w{{\d+}}, w{{\d+}}, #0xf
   /// CHECK:                 csneg w{{\d+}}, w{{\d+}}, mi
+  //
   /// CHECK-START-X86_64: java.lang.Integer RemTest.$noinline$IntMod16(int) disassembly (after)
   /// CHECK:          Rem [{{i\d+}},{{i\d+}}]
   /// CHECK-NOT:      imul
@@ -149,11 +168,18 @@
     return r;
   }
 
+  /// CHECK-START-ARM:   java.lang.Integer RemTest.$noinline$IntModMinus16(int) disassembly (after)
+  /// CHECK:                 asr{{s?}} r{{\d+}}, r{{\d+}}, #31
+  /// CHECK:                 add       r{{\d+}}, r{{\d+}}, r{{\d+}}, lsr #28
+  /// CHECK:                 bfc       r{{\d+}}, #0, #4
+  /// CHECK:                 sub{{s?}} r{{\d+}}, r{{\d+}}, r{{\d+}}
+  //
   /// CHECK-START-ARM64: java.lang.Integer RemTest.$noinline$IntModMinus16(int) disassembly (after)
   /// CHECK:                 negs w{{\d+}}, w{{\d+}}
   /// CHECK:                 and w{{\d+}}, w{{\d+}}, #0xf
   /// CHECK:                 and w{{\d+}}, w{{\d+}}, #0xf
   /// CHECK:                 csneg w{{\d+}}, w{{\d+}}, mi
+  //
   /// CHECK-START-X86_64: java.lang.Integer RemTest.$noinline$IntModMinus16(int) disassembly (after)
   /// CHECK:          Rem [{{i\d+}},{{i\d+}}]
   /// CHECK-NOT:      imul
@@ -170,11 +196,18 @@
     return r;
   }
 
+  /// CHECK-START-ARM:   java.lang.Integer RemTest.$noinline$IntModIntMin(int) disassembly (after)
+  /// CHECK:                 asr{{s?}} r{{\d+}}, r{{\d+}}, #31
+  /// CHECK:                 add       r{{\d+}}, r{{\d+}}, r{{\d+}}, lsr #1
+  /// CHECK:                 bfc       r{{\d+}}, #0, #31
+  /// CHECK:                 sub{{s?}} r{{\d+}}, r{{\d+}}, r{{\d+}}
+  //
   /// CHECK-START-ARM64: java.lang.Integer RemTest.$noinline$IntModIntMin(int) disassembly (after)
   /// CHECK:                 negs w{{\d+}}, w{{\d+}}
   /// CHECK:                 and w{{\d+}}, w{{\d+}}, #0x7fffffff
   /// CHECK:                 and w{{\d+}}, w{{\d+}}, #0x7fffffff
   /// CHECK:                 csneg w{{\d+}}, w{{\d+}}, mi
+  //
   /// CHECK-START-X86_64: java.lang.Integer RemTest.$noinline$IntModIntMin(int) disassembly (after)
   /// CHECK:          Rem [{{i\d+}},{{i\d+}}]
   /// CHECK-NOT:      imul