ARM64: Workaround for the callee saved FP registers and SIMD.
Treat as scheduling barriers those vector instructions whose live
ranges exceed the vectorized loop boundaries. This is a workaround
for the lack of notion of SIMD register in the compiler; around a
call we have to save/restore all live SIMD&FP registers (only
lower 64 bits of SIMD&FP registers are callee saved) so don't
reorder such vector instructions.
Test: 706-checker-scheduler, test-art-host, test-art-target
Bug: 69667779
Change-Id: I31e57518339d41545a0c519f7299afe381a8286c
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index 096349f..87dff84 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -109,6 +109,16 @@
// Assumes vector nodes cannot be moved by default. Each concrete implementation
// that can be moved should override this method and return true.
+ //
+ // Note: similar approach is used for instruction scheduling (if it is turned on for the target):
+ // by default HScheduler::IsSchedulable returns false for a particular HVecOperation.
+ // HScheduler${ARCH}::IsSchedulable can be overridden to return true for an instruction (see
+ // scheduler_arm64.h for example) if it is safe to schedule it; in this case one *must* also
+ // look at/update HScheduler${ARCH}::IsSchedulingBarrier for this instruction.
+ //
+ // Note: For newly introduced vector instructions HScheduler${ARCH}::IsSchedulingBarrier must be
+ // altered to return true if the instruction might reside outside the SIMD loop body since SIMD
+ // registers are not kept alive across vector loop boundaries (yet).
bool CanBeMoved() const OVERRIDE { return false; }
// Tests if all data of a vector node (vector length and packed type) is equal.
diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h
index bb7c353..dfa077f 100644
--- a/compiler/optimizing/scheduler.h
+++ b/compiler/optimizing/scheduler.h
@@ -462,6 +462,11 @@
// containing basic block from being scheduled.
// This method is used to restrict scheduling to instructions that we know are
// safe to handle.
+ //
+ // For newly introduced instructions by default HScheduler::IsSchedulable returns false.
+ // HScheduler${ARCH}::IsSchedulable can be overridden to return true for an instruction (see
+ // scheduler_arm64.h for example) if it is safe to schedule it; in this case one *must* also
+ // look at/update HScheduler${ARCH}::IsSchedulingBarrier for this instruction.
virtual bool IsSchedulable(const HInstruction* instruction) const;
bool IsSchedulable(const HBasicBlock* block) const;
diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h
index 32f161f..f71cb5b 100644
--- a/compiler/optimizing/scheduler_arm64.h
+++ b/compiler/optimizing/scheduler_arm64.h
@@ -151,6 +151,20 @@
#undef CASE_INSTRUCTION_KIND
}
+ // Treat as scheduling barriers those vector instructions whose live ranges exceed the vectorized
+ // loop boundaries. This is a workaround for the lack of notion of SIMD register in the compiler;
+ // around a call we have to save/restore all live SIMD&FP registers (only lower 64 bits of
+ // SIMD&FP registers are callee saved) so don't reorder such vector instructions.
+ //
+ // TODO: remove this when a proper support of SIMD registers is introduced to the compiler.
+ bool IsSchedulingBarrier(const HInstruction* instr) const OVERRIDE {
+ return HScheduler::IsSchedulingBarrier(instr) ||
+ instr->IsVecReduce() ||
+ instr->IsVecExtractScalar() ||
+ instr->IsVecSetScalars() ||
+ instr->IsVecReplicateScalar();
+ }
+
private:
SchedulingLatencyVisitorARM64 arm64_latency_visitor_;
DISALLOW_COPY_AND_ASSIGN(HSchedulerARM64);
diff --git a/test/706-checker-scheduler/src/Main.java b/test/706-checker-scheduler/src/Main.java
index d21596d..25e4fad 100644
--- a/test/706-checker-scheduler/src/Main.java
+++ b/test/706-checker-scheduler/src/Main.java
@@ -523,7 +523,71 @@
return res;
}
+ private static void expectEquals(int expected, int result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+
+ private static final int ARRAY_SIZE = 32;
+
+ // Check that VecReplicateScalar is not reordered.
+ /// CHECK-START-ARM64: void Main.testVecReplicateScalar() scheduler (before)
+ /// CHECK: Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK: NewArray loop:<<Loop>> outer_loop:none
+ /// CHECK: VecReplicateScalar loop:<<Loop>> outer_loop:none
+
+ /// CHECK-START-ARM64: void Main.testVecReplicateScalar() scheduler (after)
+ /// CHECK: Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK: NewArray loop:<<Loop>> outer_loop:none
+ /// CHECK: VecReplicateScalar loop:<<Loop>> outer_loop:none
+ private static void testVecReplicateScalar() {
+ for (int j = 0; j <= 8; j++) {
+ int[] a = new int[ARRAY_SIZE];
+ for (int i = 0; i < a.length; i++) {
+ a[i] += 1;
+ }
+ for (int i = 0; i < a.length; i++) {
+ expectEquals(1, a[i]);
+ }
+ }
+ }
+
+ // Check that VecSetScalars, VecReduce, VecExtractScalar are not reordered.
+ /// CHECK-START-ARM64: void Main.testVecSetScalars() scheduler (before)
+ /// CHECK: Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK: NewArray loop:<<Loop>> outer_loop:none
+ /// CHECK: VecSetScalars loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK: VecReduce loop:<<Loop>> outer_loop:none
+ /// CHECK: VecExtractScalar loop:<<Loop>> outer_loop:none
+ /// CHECK: InvokeStaticOrDirect loop:<<Loop>> outer_loop:none
+ /// CHECK: InvokeStaticOrDirect loop:<<Loop>> outer_loop:none
+
+ /// CHECK-START-ARM64: void Main.testVecSetScalars() scheduler (after)
+ /// CHECK: Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK: NewArray loop:<<Loop>> outer_loop:none
+ /// CHECK: VecSetScalars loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK: VecReduce loop:<<Loop>> outer_loop:none
+ /// CHECK: VecExtractScalar loop:<<Loop>> outer_loop:none
+ /// CHECK: InvokeStaticOrDirect loop:<<Loop>> outer_loop:none
+ /// CHECK: InvokeStaticOrDirect loop:<<Loop>> outer_loop:none
+ private static void testVecSetScalars() {
+ for (int j = 0; j <= 8; j++) {
+ int[] a = new int[ARRAY_SIZE];
+ int s = 5;
+ for (int i = 0; i < ARRAY_SIZE; i++) {
+ s+=a[i];
+ }
+ expectEquals(a[0], 0);
+ expectEquals(s, 5);
+ }
+ }
+
public static void main(String[] args) {
+ testVecSetScalars();
+ testVecReplicateScalar();
if ((arrayAccess() + intDiv(10)) != -35) {
System.out.println("FAIL");
}