Revert "Revert^2 "Implement Dot Product Vectorization for x86""
This reverts commit 7cf5607f472020711e36eedbbfebb25b40d3f90e.
Bug: 144947842
Reason for revert: Seems to have broken android.jvmti.cts.JvmtiHostTest1936#testJvmt
Change-Id: Ied6ff6ddf1cb2e3e76adcaa0fda5e36af254b7c5
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 1390af2..68aef77 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -1201,38 +1201,11 @@
}
void LocationsBuilderX86::VisitVecDotProd(HVecDotProd* instruction) {
- LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
- locations->SetInAt(2, Location::RequiresFpuRegister());
- locations->SetOut(Location::SameAsFirstInput());
- locations->AddTemp(Location::RequiresFpuRegister());
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
void InstructionCodeGeneratorX86::VisitVecDotProd(HVecDotProd* instruction) {
- bool cpu_has_avx = CpuHasAvxFeatureFlag();
- LocationSummary* locations = instruction->GetLocations();
- XmmRegister acc = locations->InAt(0).AsFpuRegister<XmmRegister>();
- XmmRegister left = locations->InAt(1).AsFpuRegister<XmmRegister>();
- XmmRegister right = locations->InAt(2).AsFpuRegister<XmmRegister>();
- switch (instruction->GetPackedType()) {
- case DataType::Type::kInt32: {
- DCHECK_EQ(4u, instruction->GetVectorLength());
- XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- if (!cpu_has_avx) {
- __ movaps(tmp, right);
- __ pmaddwd(tmp, left);
- __ paddd(acc, tmp);
- } else {
- __ vpmaddwd(tmp, left, right);
- __ vpaddd(acc, acc, tmp);
- }
- break;
- }
- default:
- LOG(FATAL) << "Unsupported SIMD Type" << instruction->GetPackedType();
- UNREACHABLE();
- }
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
// Helper to set up locations for vector memory operations.
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index 7fac44d..19dfd1d 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -1174,38 +1174,11 @@
}
void LocationsBuilderX86_64::VisitVecDotProd(HVecDotProd* instruction) {
- LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
- locations->SetInAt(2, Location::RequiresFpuRegister());
- locations->SetOut(Location::SameAsFirstInput());
- locations->AddTemp(Location::RequiresFpuRegister());
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
void InstructionCodeGeneratorX86_64::VisitVecDotProd(HVecDotProd* instruction) {
- bool cpu_has_avx = CpuHasAvxFeatureFlag();
- LocationSummary* locations = instruction->GetLocations();
- XmmRegister acc = locations->InAt(0).AsFpuRegister<XmmRegister>();
- XmmRegister left = locations->InAt(1).AsFpuRegister<XmmRegister>();
- XmmRegister right = locations->InAt(2).AsFpuRegister<XmmRegister>();
- switch (instruction->GetPackedType()) {
- case DataType::Type::kInt32: {
- DCHECK_EQ(4u, instruction->GetVectorLength());
- XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- if (!cpu_has_avx) {
- __ movaps(tmp, right);
- __ pmaddwd(tmp, left);
- __ paddd(acc, tmp);
- } else {
- __ vpmaddwd(tmp, left, right);
- __ vpaddd(acc, acc, tmp);
- }
- break;
- }
- default:
- LOG(FATAL) << "Unsupported SIMD Type" << instruction->GetPackedType();
- UNREACHABLE();
- }
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
// Helper to set up locations for vector memory operations.
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 567a41e..9c4e9d2 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -1623,19 +1623,13 @@
kNoDotProd;
return TrySetVectorLength(16);
case DataType::Type::kUint16:
- *restrictions |= kNoDiv |
- kNoAbs |
- kNoSignedHAdd |
- kNoUnroundedHAdd |
- kNoSAD |
- kNoDotProd;
- return TrySetVectorLength(8);
case DataType::Type::kInt16:
*restrictions |= kNoDiv |
kNoAbs |
kNoSignedHAdd |
kNoUnroundedHAdd |
- kNoSAD;
+ kNoSAD|
+ kNoDotProd;
return TrySetVectorLength(8);
case DataType::Type::kInt32:
*restrictions |= kNoDiv | kNoSAD;
@@ -2172,7 +2166,7 @@
bool generate_code,
DataType::Type reduction_type,
uint64_t restrictions) {
- if (!instruction->IsAdd() || reduction_type != DataType::Type::kInt32) {
+ if (!instruction->IsAdd() || (reduction_type != DataType::Type::kInt32)) {
return false;
}
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 55f7691..166aec8 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -2268,20 +2268,6 @@
}
-void X86Assembler::vpmaddwd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
- DCHECK(CpuHasAVXorAVX2FeatureFlag());
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- uint8_t ByteZero = 0x00, ByteOne = 0x00;
- ByteZero = EmitVexPrefixByteZero(/* is_twobyte_form=*/ true);
- X86ManagedRegister vvvv_reg = X86ManagedRegister::FromXmmRegister(src1);
- ByteOne = EmitVexPrefixByteOne(/*R=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- EmitUint8(ByteZero);
- EmitUint8(ByteOne);
- EmitUint8(0xF5);
- EmitXmmRegisterOperand(dst, src2);
-}
-
-
void X86Assembler::phaddw(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 27fde26..1b6941c 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -577,7 +577,6 @@
void pavgw(XmmRegister dst, XmmRegister src);
void psadbw(XmmRegister dst, XmmRegister src);
void pmaddwd(XmmRegister dst, XmmRegister src);
- void vpmaddwd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
void phaddw(XmmRegister dst, XmmRegister src);
void phaddd(XmmRegister dst, XmmRegister src);
void haddps(XmmRegister dst, XmmRegister src);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 9253730..12d9646 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -965,11 +965,6 @@
DriverStr(RepeatFF(&x86::X86Assembler::pmaddwd, "pmaddwd %{reg2}, %{reg1}"), "pmaddwd");
}
-TEST_F(AssemblerX86AVXTest, VPMAddWD) {
- DriverStr(
- RepeatFFF(&x86::X86Assembler::vpmaddwd, "vpmaddwd %{reg3}, %{reg2}, %{reg1}"), "vpmaddwd");
-}
-
TEST_F(AssemblerX86Test, PHAddW) {
DriverStr(RepeatFF(&x86::X86Assembler::phaddw, "phaddw %{reg2}, %{reg1}"), "phaddw");
}
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 2c5dd9e..64246aa 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -71,7 +71,6 @@
return false;
}
-
void X86_64Assembler::call(CpuRegister reg) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(reg);
@@ -759,6 +758,7 @@
EmitOperand(src.LowBits(), Operand(dst));
}
+
void X86_64Assembler::addss(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
@@ -768,6 +768,7 @@
EmitXmmRegisterOperand(dst.LowBits(), src);
}
+
void X86_64Assembler::addss(XmmRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
@@ -2632,6 +2633,7 @@
EmitXmmRegisterOperand(dst.LowBits(), src);
}
+
void X86_64Assembler::pxor(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
@@ -3143,35 +3145,6 @@
EmitXmmRegisterOperand(dst.LowBits(), src);
}
-void X86_64Assembler::vpmaddwd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
- DCHECK(CpuHasAVXorAVX2FeatureFlag());
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- bool is_twobyte_form = false;
- uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
- if (!src2.NeedsRex()) {
- is_twobyte_form = true;
- }
- ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
- X86_64ManagedRegister vvvv_reg =
- X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
- if (is_twobyte_form) {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- } else {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
- /*X=*/ false,
- src2.NeedsRex(),
- SET_VEX_M_0F);
- ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- }
- EmitUint8(ByteZero);
- EmitUint8(ByteOne);
- if (!is_twobyte_form) {
- EmitUint8(ByteTwo);
- }
- EmitUint8(0xF5);
- EmitXmmRegisterOperand(dst.LowBits(), src2);
-}
-
void X86_64Assembler::phaddw(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 70072d9..15f3ab9 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -615,7 +615,6 @@
void pavgw(XmmRegister dst, XmmRegister src);
void psadbw(XmmRegister dst, XmmRegister src);
void pmaddwd(XmmRegister dst, XmmRegister src);
- void vpmaddwd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
void phaddw(XmmRegister dst, XmmRegister src);
void phaddd(XmmRegister dst, XmmRegister src);
void haddps(XmmRegister dst, XmmRegister src);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 3921c4a..e3b8390 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1740,11 +1740,6 @@
DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaddwd, "pmaddwd %{reg2}, %{reg1}"), "pmadwd");
}
-TEST_F(AssemblerX86_64AVXTest, VPmaddwd) {
- DriverStr(RepeatFFF(&x86_64::X86_64Assembler::vpmaddwd,
- "vpmaddwd %{reg3}, %{reg2}, %{reg1}"), "vpmaddwd");
-}
-
TEST_F(AssemblerX86_64Test, Phaddw) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::phaddw, "phaddw %{reg2}, %{reg1}"), "phaddw");
}
diff --git a/test/684-checker-simd-dotprod/src/Main.java b/test/684-checker-simd-dotprod/src/Main.java
index aa03d1e..e0c8716 100644
--- a/test/684-checker-simd-dotprod/src/Main.java
+++ b/test/684-checker-simd-dotprod/src/Main.java
@@ -17,7 +17,6 @@
import other.TestByte;
import other.TestCharShort;
import other.TestVarious;
-import other.TestFloatDouble;
/**
* Tests for dot product idiom vectorization.
@@ -27,7 +26,6 @@
TestByte.run();
TestCharShort.run();
TestVarious.run();
- TestFloatDouble.run();
System.out.println("passed");
}
}
diff --git a/test/684-checker-simd-dotprod/src/other/TestFloatDouble.java b/test/684-checker-simd-dotprod/src/other/TestFloatDouble.java
deleted file mode 100644
index b155ae1..0000000
--- a/test/684-checker-simd-dotprod/src/other/TestFloatDouble.java
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (C) 2019 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package other;
-
-/**
- * Tests for dot product idiom vectorization: char and short case.
- */
-public class TestFloatDouble {
-
- public static final int ARRAY_SIZE = 1024;
-
-
- /// CHECK-START-{X86_64}: float other.TestFloatDouble.testDotProdSimpleFloat(float[], float[]) loop_optimization (after)
- /// CHECK-NOT: VecDotProd
- public static final float testDotProdSimpleFloat(float[] a, float[] b) {
- float sum = 0;
- for (int i = 0; i < b.length; i++) {
- sum += a[i] * b[i];
- }
- return sum;
- }
-
-
- /// CHECK-START-{X86_64}: double other.TestFloatDouble.testDotProdSimpleDouble(double[], double[]) loop_optimization (after)
- /// CHECK-NOT: VecDotProd
-
- public static final double testDotProdSimpleDouble(double[] a, double[] b) {
- double sum = 0;
- for (int i = 0; i < b.length; i++) {
- sum += a[i] * b[i];
- }
- return sum;
- }
-
- private static void expectEquals(float expected, float result) {
- if (Float.compare(expected, result) != 0) {
- throw new Error("Expected: " + expected + ", found: " + result);
- }
- }
-
- private static void expectEquals(double expected, double result) {
- if (Double.compare(expected, result) != 0) {
- throw new Error("Expected: " + expected + ", found: " + result);
- }
- }
-
- public static void run() {
- final float MAX_F = Float.MAX_VALUE;
- final float MIN_F = Float.MIN_VALUE;
- final double MAX_D = Double.MAX_VALUE;
- final double MIN_D = Double.MIN_VALUE;
-
- double[] a = new double[1024];
- for (int i = 0; i != 1024; ++i) a[i] = MAX_D;
- double[] b = new double[1024];
- for (int i = 0; i != 1024; ++i) b[i] = ((i & 1) == 0) ? 1.0 : -1.0;
- expectEquals(0.0, testDotProdSimpleDouble(a,b));
-
- float[] f1_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3.33f, 0.125f, 3.0f, 0.25f};
- float[] f2_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6.125f, 2.25f, 1.213f, 0.5f};
- expectEquals(24.4415f, testDotProdSimpleFloat(f1_1, f2_1));
-
- float [] f1_2 = { 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0.63671875f, 0.76953125f, 0.22265625f, 1.0f};
- float [] f2_2 = { 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, MIN_F, MAX_F, MAX_F, MIN_F };
- expectEquals(3.376239E38f, testDotProdSimpleFloat(f1_2, f2_2));
-
- float[] f1_3 = { 0xc0000000, 0xc015c28f, 0x411dd42c, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, MIN_F, MIN_F };
- float[] f2_3 = { 0x3f4c779a, 0x408820c5, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0x00000000, 0, MAX_F, MAX_F };
- expectEquals(-2.30124471E18f, testDotProdSimpleFloat(f1_3, f2_3));
- }
-
- public static void main(String[] args) {
- run();
- }
-}