No unrolling for large loop bodies.

Rationale:
should yield 1, not 0

Test: test-art-host test-art-target
Change-Id: I0ca68b2a5a4dba1c3e41248376002d9635716840
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index e150b65..baa0453 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -34,6 +34,9 @@
 // All current SIMD targets want 16-byte alignment.
 static constexpr size_t kAlignedBase = 16;
 
+// No loop unrolling factor (just one copy of the loop-body).
+static constexpr uint32_t kNoUnrollingFactor = 1;
+
 // Remove the instruction from the graph. A bit more elaborate than the usual
 // instruction removal, since there may be a cycle in the use structure.
 static void RemoveFromCycle(HInstruction* instruction) {
@@ -791,7 +794,7 @@
                     vector_index_,
                     ptc,
                     graph_->GetIntConstant(1),
-                    /*unroll*/ 1);
+                    kNoUnrollingFactor);
   }
 
   // Generate vector loop, possibly further unrolled:
@@ -818,7 +821,7 @@
                     vector_index_,
                     stc,
                     graph_->GetIntConstant(1),
-                    /*unroll*/ 1);
+                    kNoUnrollingFactor);
   }
 
   // Link reductions to their final uses.
@@ -1767,14 +1770,17 @@
 uint32_t HLoopOptimization::GetUnrollingFactor(HBasicBlock* block, int64_t trip_count) {
   switch (compiler_driver_->GetInstructionSet()) {
     case kArm64: {
-      DCHECK_NE(vector_length_, 0u);
+      // Don't unroll with insufficient iterations.
       // TODO: Unroll loops with unknown trip count.
+      DCHECK_NE(vector_length_, 0u);
       if (trip_count < 2 * vector_length_) {
-        return 1;
+        return kNoUnrollingFactor;
       }
-
+      // Don't unroll for large loop body size.
       uint32_t instruction_count = block->GetInstructions().CountSize();
-
+      if (instruction_count >= ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE) {
+        return kNoUnrollingFactor;
+      }
       // Find a beneficial unroll factor with the following restrictions:
       //  - At least one iteration of the transformed loop should be executed.
       //  - The loop body shouldn't be "too big" (heuristic).
@@ -1783,13 +1789,12 @@
       uint32_t unroll_factor =
           TruncToPowerOfTwo(std::min({uf1, uf2, ARM64_SIMD_MAXIMUM_UNROLL_FACTOR}));
       DCHECK_GE(unroll_factor, 1u);
-
       return unroll_factor;
     }
     case kX86:
     case kX86_64:
     default:
-      return 1;
+      return kNoUnrollingFactor;
   }
 }
 
diff --git a/test/623-checker-loop-regressions/src/Main.java b/test/623-checker-loop-regressions/src/Main.java
index 9229d81..418be30 100644
--- a/test/623-checker-loop-regressions/src/Main.java
+++ b/test/623-checker-loop-regressions/src/Main.java
@@ -485,6 +485,18 @@
     return sum;
   }
 
+  // Large loop body should not break unrolling computation.
+  static void largeBody(int[] x) {
+    for (int i = 0; i < 100; i++) {
+      x[i] = x[i] * 1 + x[i] * 2 + x[i] * 3 + x[i] * 4 + x[i] * 5 + x[i] * 6 +
+          x[i] * 7 + x[i] * 8 + x[i] * 9 + x[i] * 10 + x[i] * 11 + x[i] * 12 +
+          x[i] * 13 + x[i] * 14 + x[i] * 15 + x[i] * 1 + x[i] * 2 + x[i] * 3 + x[i] * 4 +
+          x[i] * 5 + x[i] * 6 + x[i] * 7 + x[i] * 8 + x[i] * 9 + x[i] * 10 + x[i] * 11 +
+          x[i] * 12 + x[i] * 13 + x[i] * 14 + x[i] * 15 + x[i] * 1 + x[i] * 2 + x[i] * 3 +
+          x[i] * 4 + x[i] * 5;
+    }
+  }
+
   public static void main(String[] args) {
     expectEquals(10, earlyExitFirst(-1));
     for (int i = 0; i <= 10; i++) {
@@ -633,6 +645,11 @@
     }
     expectEquals(-2080768, sum(x));
 
+    largeBody(f);
+    for (int i = 0; i < 100; i++) {
+      expectEquals(2805, f[i]);
+    }
+
     System.out.println("passed");
   }