Generalized zero/sign-ext analysis. Generalized SAD.
Rationale:
The more, the better. Some of the analysis was
overly conservative (e.g. extension does not
need to happen from terminals only as long
as vectorized guarantees higher order bits
don't contribute). Also, added hidden-SUB for SAD.
Test: test-art-host test-art-target
Bug: 64091002
Change-Id: I66afd8fb4292ce5cf14f98f9c5ce2bf2b8c98488
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 7e37018..fec64e2 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -74,19 +74,16 @@
// Forward declaration.
static bool IsZeroExtensionAndGet(HInstruction* instruction,
DataType::Type type,
- /*out*/ HInstruction** operand,
- bool to64 = false);
+ /*out*/ HInstruction** operand);
-// Detect a sign extension in instruction from the given type. The to64 parameter
-// denotes if result is long, and thus sign extension from int can be included.
+// Detect a sign extension in instruction from the given type.
// Returns the promoted operand on success.
static bool IsSignExtensionAndGet(HInstruction* instruction,
DataType::Type type,
- /*out*/ HInstruction** operand,
- bool to64 = false) {
+ /*out*/ HInstruction** operand) {
// Accept any already wider constant that would be handled properly by sign
// extension when represented in the *width* of the given narrower data type
- // (the fact that char normally zero extends does not matter here).
+ // (the fact that Uint16 normally zero extends does not matter here).
int64_t value = 0;
if (IsInt64AndGet(instruction, /*out*/ &value)) {
switch (type) {
@@ -103,43 +100,39 @@
return true;
}
return false;
- case DataType::Type::kInt32:
- if (IsInt<32>(value)) {
- *operand = instruction;
- return to64;
- }
- return false;
default:
return false;
}
}
- // An implicit widening conversion of a signed integer to an integral type sign-extends
- // the two's-complement representation of the integer value to fill the wider format.
- if (instruction->GetType() == type && (instruction->IsArrayGet() ||
- instruction->IsStaticFieldGet() ||
- instruction->IsInstanceFieldGet())) {
+ // An implicit widening conversion of any signed expression sign-extends.
+ if (instruction->GetType() == type) {
switch (type) {
case DataType::Type::kInt8:
case DataType::Type::kInt16:
*operand = instruction;
return true;
- case DataType::Type::kInt32:
- *operand = instruction;
- return to64;
default:
return false;
}
}
- // Explicit type conversions.
+ // An explicit widening conversion of a signed expression sign-extends.
if (instruction->IsTypeConversion()) {
- DataType::Type from = instruction->InputAt(0)->GetType();
+ HInstruction* conv = instruction->InputAt(0);
+ DataType::Type from = conv->GetType();
switch (instruction->GetType()) {
+ case DataType::Type::kInt32:
case DataType::Type::kInt64:
- return IsSignExtensionAndGet(instruction->InputAt(0), type, /*out*/ operand, /*to64*/ true);
+ if (type == from && (from == DataType::Type::kInt8 ||
+ from == DataType::Type::kInt16 ||
+ from == DataType::Type::kInt32)) {
+ *operand = conv;
+ return true;
+ }
+ return false;
case DataType::Type::kInt16:
return type == DataType::Type::kUint16 &&
from == DataType::Type::kUint16 &&
- IsZeroExtensionAndGet(instruction->InputAt(0), type, /*out*/ operand, to64);
+ IsZeroExtensionAndGet(instruction->InputAt(0), type, /*out*/ operand);
default:
return false;
}
@@ -147,16 +140,14 @@
return false;
}
-// Detect a zero extension in instruction from the given type. The to64 parameter
-// denotes if result is long, and thus zero extension from int can be included.
+// Detect a zero extension in instruction from the given type.
// Returns the promoted operand on success.
static bool IsZeroExtensionAndGet(HInstruction* instruction,
DataType::Type type,
- /*out*/ HInstruction** operand,
- bool to64) {
+ /*out*/ HInstruction** operand) {
// Accept any already wider constant that would be handled properly by zero
// extension when represented in the *width* of the given narrower data type
- // (the fact that byte/short/int normally sign extend does not matter here).
+ // (the fact that Int8/Int16 normally sign extend does not matter here).
int64_t value = 0;
if (IsInt64AndGet(instruction, /*out*/ &value)) {
switch (type) {
@@ -173,21 +164,12 @@
return true;
}
return false;
- case DataType::Type::kInt32:
- if (IsUint<32>(value)) {
- *operand = instruction;
- return to64;
- }
- return false;
default:
return false;
}
}
- // An implicit widening conversion of a char to an integral type zero-extends
- // the representation of the char value to fill the wider format.
- if (instruction->GetType() == type && (instruction->IsArrayGet() ||
- instruction->IsStaticFieldGet() ||
- instruction->IsInstanceFieldGet())) {
+ // An implicit widening conversion of any unsigned expression zero-extends.
+ if (instruction->GetType() == type) {
if (type == DataType::Type::kUint16) {
*operand = instruction;
return true;
@@ -195,6 +177,9 @@
}
// A sign (or zero) extension followed by an explicit removal of just the
// higher sign bits is equivalent to a zero extension of the underlying operand.
+ //
+ // TODO: move this into simplifier and use new type system instead.
+ //
if (instruction->IsAnd()) {
int64_t mask = 0;
HInstruction* a = instruction->InputAt(0);
@@ -210,22 +195,26 @@
case DataType::Type::kUint16:
case DataType::Type::kInt16:
return mask == std::numeric_limits<uint16_t>::max();
- case DataType::Type::kInt32:
- return mask == std::numeric_limits<uint32_t>::max() && to64;
default: return false;
}
}
}
- // Explicit type conversions.
+ // An explicit widening conversion of an unsigned expression zero-extends.
if (instruction->IsTypeConversion()) {
- DataType::Type from = instruction->InputAt(0)->GetType();
+ HInstruction* conv = instruction->InputAt(0);
+ DataType::Type from = conv->GetType();
switch (instruction->GetType()) {
+ case DataType::Type::kInt32:
case DataType::Type::kInt64:
- return IsZeroExtensionAndGet(instruction->InputAt(0), type, /*out*/ operand, /*to64*/ true);
+ if (type == from && from == DataType::Type::kUint16) {
+ *operand = conv;
+ return true;
+ }
+ return false;
case DataType::Type::kUint16:
return type == DataType::Type::kInt16 &&
from == DataType::Type::kInt16 &&
- IsSignExtensionAndGet(instruction->InputAt(0), type, /*out*/ operand, to64);
+ IsSignExtensionAndGet(instruction->InputAt(0), type, /*out*/ operand);
default:
return false;
}
@@ -360,6 +349,22 @@
return false;
}
+// Detect a + c for constant c.
+static bool IsAddConst(HInstruction* instruction,
+ /*out*/ HInstruction** a,
+ /*out*/ int64_t* c) {
+ if (instruction->IsAdd()) {
+ if (IsInt64AndGet(instruction->InputAt(0), c)) {
+ *a = instruction->InputAt(1);
+ return true;
+ } else if (IsInt64AndGet(instruction->InputAt(1), c)) {
+ *a = instruction->InputAt(0);
+ return true;
+ }
+ }
+ return false;
+}
+
// Detect reductions of the following forms,
// x = x_phi + ..
// x = x_phi - ..
@@ -1148,6 +1153,7 @@
size_t size_vec = DataType::Size(type);
size_t size_from = DataType::Size(from);
size_t size_to = DataType::Size(to);
+ DataType::Type ctype = size_from == size_vec ? from : type;
// Accept an integral conversion
// (1a) narrowing into vector type, "wider" operations cannot bring in higher order bits, or
// (1b) widening from at least vector type, and
@@ -1157,7 +1163,7 @@
VectorizeUse(node, opa, generate_code, type, restrictions | kNoHiBits)) ||
(size_to >= size_from &&
size_from >= size_vec &&
- VectorizeUse(node, opa, generate_code, type, restrictions))) {
+ VectorizeUse(node, opa, generate_code, ctype, restrictions))) {
if (generate_code) {
if (vector_mode_ == kVector) {
vector_map_->Put(instruction, vector_map_->Get(opa)); // operand pass-through
@@ -1896,9 +1902,14 @@
(v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsInt ||
v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsLong)) {
HInstruction* x = v->InputAt(0);
- if (x->IsSub() && x->GetType() == reduction_type) {
- a = x->InputAt(0);
- b = x->InputAt(1);
+ if (x->GetType() == reduction_type) {
+ int64_t c = 0;
+ if (x->IsSub()) {
+ a = x->InputAt(0);
+ b = x->InputAt(1);
+ } else if (IsAddConst(x, /*out*/ &a, /*out*/ &c)) {
+ b = graph_->GetConstant(reduction_type, -c); // hidden SUB!
+ }
}
}
if (a == nullptr || b == nullptr) {
@@ -1906,22 +1917,21 @@
}
// Accept same-type or consistent sign extension for narrower-type on operands a and b.
// The same-type or narrower operands are called r (a or lower) and s (b or lower).
+ // We inspect the operands carefully to pick the most suited type.
HInstruction* r = a;
HInstruction* s = b;
bool is_unsigned = false;
DataType::Type sub_type = a->GetType();
- if (a->IsTypeConversion()) {
- HInstruction* hunt = a;
- while (hunt->IsTypeConversion()) {
- hunt = hunt->InputAt(0);
- }
- sub_type = hunt->GetType();
- } else if (b->IsTypeConversion()) {
- HInstruction* hunt = a;
- while (hunt->IsTypeConversion()) {
- hunt = hunt->InputAt(0);
- }
- sub_type = hunt->GetType();
+ if (DataType::Size(b->GetType()) < DataType::Size(sub_type)) {
+ sub_type = b->GetType();
+ }
+ if (a->IsTypeConversion() &&
+ DataType::Size(a->InputAt(0)->GetType()) < DataType::Size(sub_type)) {
+ sub_type = a->InputAt(0)->GetType();
+ }
+ if (b->IsTypeConversion() &&
+ DataType::Size(b->InputAt(0)->GetType()) < DataType::Size(sub_type)) {
+ sub_type = b->InputAt(0)->GetType();
}
if (reduction_type != sub_type &&
(!IsNarrowerOperands(a, b, sub_type, &r, &s, &is_unsigned) || is_unsigned)) {
diff --git a/test/623-checker-loop-regressions/src/Main.java b/test/623-checker-loop-regressions/src/Main.java
index 418be30..f6d3bba 100644
--- a/test/623-checker-loop-regressions/src/Main.java
+++ b/test/623-checker-loop-regressions/src/Main.java
@@ -497,6 +497,13 @@
}
}
+ // Mixed of 16-bit and 8-bit array references.
+ static void castAndNarrow(byte[] x, char[] y) {
+ for (int i = 0; i < x.length; i++) {
+ x[i] = (byte) ((short) y[i] + 1);
+ }
+ }
+
public static void main(String[] args) {
expectEquals(10, earlyExitFirst(-1));
for (int i = 0; i <= 10; i++) {
@@ -650,6 +657,15 @@
expectEquals(2805, f[i]);
}
+ char[] cx = new char[259];
+ for (int i = 0; i < 259; i++) {
+ cx[i] = (char) (i - 100);
+ }
+ castAndNarrow(b1, cx);
+ for (int i = 0; i < 259; i++) {
+ expectEquals((byte)((short) cx[i] + 1), b1[i]);
+ }
+
System.out.println("passed");
}
diff --git a/test/645-checker-abs-simd/src/Main.java b/test/645-checker-abs-simd/src/Main.java
index c49d85d..57c51a6 100644
--- a/test/645-checker-abs-simd/src/Main.java
+++ b/test/645-checker-abs-simd/src/Main.java
@@ -131,6 +131,28 @@
}
}
+ /// CHECK-START: void Main.doitCastedChar(char[]) loop_optimization (before)
+ /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: void Main.doitCastedChar(char[]) loop_optimization (after)
+ /// CHECK-DAG: Phi loop:<<Loop1:B\d+>> outer_loop:none
+ /// CHECK-DAG: VecLoad loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: VecAbs loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: VecStore loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: Phi loop:<<Loop2:B\d+>> outer_loop:none
+ /// CHECK-DAG: ArrayGet loop:<<Loop2>> outer_loop:none
+ //
+ /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+ //
+ private static void doitCastedChar(char[] x) {
+ for (int i = 0; i < x.length; i++) {
+ x[i] = (char) Math.abs((short) x[i]);
+ }
+ }
+
/// CHECK-START: void Main.doitInt(int[]) loop_optimization (before)
/// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
/// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none
@@ -298,7 +320,7 @@
xc[i] = (char) i;
}
doitChar(xc);
- for (int i = 0; i < 1024 *64; i++) {
+ for (int i = 0; i < 1024 * 64; i++) {
expectEquals32((char) Math.abs((char) i), xc[i]);
}
short[] xs = new short[1024 * 64];
@@ -309,6 +331,13 @@
for (int i = 0; i < 1024 * 64; i++) {
expectEquals32((short) Math.abs((short) i), xs[i]);
}
+ for (int i = 0; i < 1024 * 64; i++) {
+ xc[i] = (char) i;
+ }
+ doitCastedChar(xc);
+ for (int i = 0; i < 1024 * 64; i++) {
+ expectEquals32((char) Math.abs((short) i), xc[i]);
+ }
// Set up minint32, maxint32 and some others.
int[] xi = new int[8];
xi[0] = 0x80000000;
diff --git a/test/660-checker-simd-sad-short3/expected.txt b/test/660-checker-simd-sad-short3/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/660-checker-simd-sad-short3/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/660-checker-simd-sad-short3/info.txt b/test/660-checker-simd-sad-short3/info.txt
new file mode 100644
index 0000000..b56c119
--- /dev/null
+++ b/test/660-checker-simd-sad-short3/info.txt
@@ -0,0 +1 @@
+Functional tests on SAD vectorization.
diff --git a/test/660-checker-simd-sad-short3/src/Main.java b/test/660-checker-simd-sad-short3/src/Main.java
new file mode 100644
index 0000000..c8850b4
--- /dev/null
+++ b/test/660-checker-simd-sad-short3/src/Main.java
@@ -0,0 +1,351 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for SAD (sum of absolute differences).
+ *
+ * Some special cases: parameters, constants, invariants, casted computations.
+ */
+public class Main {
+
+ /// CHECK-START: int Main.sadShort2IntParamRight(short[], short) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Param:s\d+>> ParameterValue loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get>>,<<Param>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.sadShort2IntParamRight(short[], short) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<Param:s\d+>> ParameterValue loop:none
+ /// CHECK-DAG: <<Rep:d\d+>> VecReplicateScalar [<<Param>>] loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load>>,<<Rep>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ private static int sadShort2IntParamRight(short[] s, short param) {
+ int sad = 0;
+ for (int i = 0; i < s.length; i++) {
+ sad += Math.abs(s[i] - param);
+ }
+ return sad;
+ }
+
+ /// CHECK-START: int Main.sadShort2IntParamLeft(short[], short) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Param:s\d+>> ParameterValue loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Param>>,<<Get>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.sadShort2IntParamLeft(short[], short) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<Param:s\d+>> ParameterValue loop:none
+ /// CHECK-DAG: <<Rep:d\d+>> VecReplicateScalar [<<Param>>] loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Rep>>,<<Load>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ private static int sadShort2IntParamLeft(short[] s, short param) {
+ int sad = 0;
+ for (int i = 0; i < s.length; i++) {
+ sad += Math.abs(param - s[i]);
+ }
+ return sad;
+ }
+
+ /// CHECK-START: int Main.sadShort2IntConstRight(short[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<ConsI:i\d+>> IntConstant -32767 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add:i\d+>> Add [<<Get>>,<<ConsI>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Add>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.sadShort2IntConstRight(short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<ConsI:i\d+>> IntConstant 32767 loop:none
+ /// CHECK-DAG: <<Rep:d\d+>> VecReplicateScalar [<<ConsI>>] loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load>>,<<Rep>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ private static int sadShort2IntConstRight(short[] s) {
+ int sad = 0;
+ for (int i = 0; i < s.length; i++) {
+ sad += Math.abs(s[i] - 32767);
+ }
+ return sad;
+ }
+
+ /// CHECK-START: int Main.sadShort2IntConstLeft(short[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<ConsI:i\d+>> IntConstant 32767 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:i\d+>> Sub [<<ConsI>>,<<Get>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.sadShort2IntConstLeft(short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<ConsI:i\d+>> IntConstant 32767 loop:none
+ /// CHECK-DAG: <<Rep:d\d+>> VecReplicateScalar [<<ConsI>>] loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Rep>>,<<Load>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ private static int sadShort2IntConstLeft(short[] s) {
+ int sad = 0;
+ for (int i = 0; i < s.length; i++) {
+ sad += Math.abs(32767 - s[i]);
+ }
+ return sad;
+ }
+
+ /// CHECK-START: int Main.sadShort2IntInvariantRight(short[], int) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Conv:s\d+>> TypeConversion [{{i\d+}}] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get>>,<<Conv>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.sadShort2IntInvariantRight(short[], int) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<Conv:s\d+>> TypeConversion [{{i\d+}}] loop:none
+ /// CHECK-DAG: <<Rep:d\d+>> VecReplicateScalar [<<Conv>>] loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load>>,<<Rep>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ private static int sadShort2IntInvariantRight(short[] s, int val) {
+ int sad = 0;
+ short x = (short) (val + 1);
+ for (int i = 0; i < s.length; i++) {
+ sad += Math.abs(s[i] - x);
+ }
+ return sad;
+ }
+
+ /// CHECK-START: int Main.sadShort2IntInvariantLeft(short[], int) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Conv:s\d+>> TypeConversion [{{i\d+}}] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Conv>>,<<Get>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.sadShort2IntInvariantLeft(short[], int) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<Conv:s\d+>> TypeConversion [{{i\d+}}] loop:none
+ /// CHECK-DAG: <<Rep:d\d+>> VecReplicateScalar [<<Conv>>] loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Rep>>,<<Load>>] loop:<<Loop>> outer_loop:none
+ private static int sadShort2IntInvariantLeft(short[] s, int val) {
+ int sad = 0;
+ short x = (short) (val + 1);
+ for (int i = 0; i < s.length; i++) {
+ sad += Math.abs(x - s[i]);
+ }
+ return sad;
+ }
+
+ /// CHECK-START: int Main.sadShort2IntCastedExprRight(short[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<ConsI:i\d+>> IntConstant 110 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add:i\d+>> [<<Get>>,<<ConsI>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Conv:s\d+>> TypeConversion [<<Add>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get>>,<<Conv>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.sadShort2IntCastedExprRight(short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<ConsI:i\d+>> IntConstant 110 loop:none
+ /// CHECK-DAG: <<Rep:d\d+>> VecReplicateScalar [<<ConsI>>] loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add:d\d+>> VecAdd [<<Load>>,<<Rep>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load>>,<<Add>>] loop:<<Loop>> outer_loop:none
+ private static int sadShort2IntCastedExprRight(short[] s) {
+ int sad = 0;
+ for (int i = 0; i < s.length; i++) {
+ short x = (short) (s[i] + 110); // narrower part sign extends
+ sad += Math.abs(s[i] - x);
+ }
+ return sad;
+ }
+
+ /// CHECK-START: int Main.sadShort2IntCastedExprLeft(short[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<ConsI:i\d+>> IntConstant 110 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add:i\d+>> [<<Get>>,<<ConsI>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Conv:s\d+>> TypeConversion [<<Add>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Conv>>,<<Get>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.sadShort2IntCastedExprLeft(short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<ConsI:i\d+>> IntConstant 110 loop:none
+ /// CHECK-DAG: <<Rep:d\d+>> VecReplicateScalar [<<ConsI>>] loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add:d\d+>> VecAdd [<<Load>>,<<Rep>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Add>>,<<Load>>] loop:<<Loop>> outer_loop:none
+ private static int sadShort2IntCastedExprLeft(short[] s) {
+ int sad = 0;
+ for (int i = 0; i < s.length; i++) {
+ short x = (short) (s[i] + 110); // narrower part sign extends
+ sad += Math.abs(x - s[i]);
+ }
+ return sad;
+ }
+
+ public static void main(String[] args) {
+ short[] interesting = {
+ (short) 0x0000,
+ (short) 0x0001,
+ (short) 0x0002,
+ (short) 0x0003,
+ (short) 0x0004,
+ (short) 0x1234,
+ (short) 0x8000,
+ (short) 0x8001,
+ (short) 0x8002,
+ (short) 0x8003,
+ (short) 0x8004,
+ (short) 0x8004,
+ (short) 0x7000,
+ (short) 0x7fff,
+ (short) 0xf000,
+ (short) 0xffff
+ };
+ short[] s = new short[64];
+ for (int i = 0; i < 64; i++) {
+ s[i] = interesting[i % interesting.length];
+ }
+
+ expectEquals(1067200, sadShort2IntParamRight(s, (short)-1));
+ expectEquals(1067200, sadShort2IntParamRight(s, (short) 0));
+ expectEquals(1067208, sadShort2IntParamRight(s, (short) 1));
+ expectEquals(1067224, sadShort2IntParamRight(s, (short) 2));
+ expectEquals(2635416, sadShort2IntParamRight(s, (short) 0x7fff));
+ expectEquals(1558824, sadShort2IntParamRight(s, (short) 0x8000));
+
+ expectEquals(1067200, sadShort2IntParamLeft(s, (short)-1));
+ expectEquals(1067200, sadShort2IntParamLeft(s, (short) 0));
+ expectEquals(1067208, sadShort2IntParamLeft(s, (short) 1));
+ expectEquals(1067224, sadShort2IntParamLeft(s, (short) 2));
+ expectEquals(2635416, sadShort2IntParamLeft(s, (short) 0x7fff));
+ expectEquals(1558824, sadShort2IntParamLeft(s, (short) 0x8000));
+
+ expectEquals(2635416, sadShort2IntConstRight(s));
+ expectEquals(2635416, sadShort2IntConstLeft(s));
+
+ expectEquals(1067200, sadShort2IntInvariantRight(s, -2));
+ expectEquals(1067200, sadShort2IntInvariantRight(s, -1));
+ expectEquals(1067208, sadShort2IntInvariantRight(s, 0));
+ expectEquals(1067224, sadShort2IntInvariantRight(s, 1));
+ expectEquals(2635416, sadShort2IntInvariantRight(s, 0x7ffe));
+ expectEquals(1558824, sadShort2IntInvariantRight(s, 0x7fff));
+
+ expectEquals(1067200, sadShort2IntInvariantLeft(s, -2));
+ expectEquals(1067200, sadShort2IntInvariantLeft(s, -1));
+ expectEquals(1067208, sadShort2IntInvariantLeft(s, 0));
+ expectEquals(1067224, sadShort2IntInvariantLeft(s, 1));
+ expectEquals(2635416, sadShort2IntInvariantLeft(s, 0x7ffe));
+ expectEquals(1558824, sadShort2IntInvariantLeft(s, 0x7fff));
+
+ expectEquals(268304, sadShort2IntCastedExprLeft(s));
+ expectEquals(268304, sadShort2IntCastedExprRight(s));
+
+ System.out.println("passed");
+ }
+
+ private static void expectEquals(int expected, int result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+
+ private static void expectEquals(long expected, long result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+}