Revert^4 "Implement Dot Product Vectorization for x86"

This reverts commit 8e895008a3e2f2813bb46cb0c6bc76884e46e9ac.

Reason for revert: The test failure seems unrelated.
Bug: 144947842

Change-Id: I7b437f0443d71a5c762e1a8372564ed989971cc9
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 68aef77..1390af2 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -1201,11 +1201,38 @@
 }
 
 void LocationsBuilderX86::VisitVecDotProd(HVecDotProd* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetInAt(1, Location::RequiresFpuRegister());
+  locations->SetInAt(2, Location::RequiresFpuRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresFpuRegister());
 }
 
 void InstructionCodeGeneratorX86::VisitVecDotProd(HVecDotProd* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  bool cpu_has_avx = CpuHasAvxFeatureFlag();
+  LocationSummary* locations = instruction->GetLocations();
+  XmmRegister acc = locations->InAt(0).AsFpuRegister<XmmRegister>();
+  XmmRegister left = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister right = locations->InAt(2).AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case DataType::Type::kInt32: {
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+      if (!cpu_has_avx) {
+        __ movaps(tmp, right);
+        __ pmaddwd(tmp, left);
+        __ paddd(acc, tmp);
+      } else {
+        __ vpmaddwd(tmp, left, right);
+        __ vpaddd(acc, acc, tmp);
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unsupported SIMD Type" << instruction->GetPackedType();
+      UNREACHABLE();
+  }
 }
 
 // Helper to set up locations for vector memory operations.
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index 19dfd1d..7fac44d 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -1174,11 +1174,38 @@
 }
 
 void LocationsBuilderX86_64::VisitVecDotProd(HVecDotProd* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetInAt(1, Location::RequiresFpuRegister());
+  locations->SetInAt(2, Location::RequiresFpuRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresFpuRegister());
 }
 
 void InstructionCodeGeneratorX86_64::VisitVecDotProd(HVecDotProd* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  bool cpu_has_avx = CpuHasAvxFeatureFlag();
+  LocationSummary* locations = instruction->GetLocations();
+  XmmRegister acc = locations->InAt(0).AsFpuRegister<XmmRegister>();
+  XmmRegister left = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister right = locations->InAt(2).AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case DataType::Type::kInt32: {
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+      if (!cpu_has_avx) {
+        __ movaps(tmp, right);
+        __ pmaddwd(tmp, left);
+        __ paddd(acc, tmp);
+      } else {
+        __ vpmaddwd(tmp, left, right);
+        __ vpaddd(acc, acc, tmp);
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unsupported SIMD Type" << instruction->GetPackedType();
+      UNREACHABLE();
+  }
 }
 
 // Helper to set up locations for vector memory operations.
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 9c4e9d2..567a41e 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -1623,13 +1623,19 @@
                              kNoDotProd;
             return TrySetVectorLength(16);
           case DataType::Type::kUint16:
+            *restrictions |= kNoDiv |
+                             kNoAbs |
+                             kNoSignedHAdd |
+                             kNoUnroundedHAdd |
+                             kNoSAD |
+                             kNoDotProd;
+            return TrySetVectorLength(8);
           case DataType::Type::kInt16:
             *restrictions |= kNoDiv |
                              kNoAbs |
                              kNoSignedHAdd |
                              kNoUnroundedHAdd |
-                             kNoSAD|
-                             kNoDotProd;
+                             kNoSAD;
             return TrySetVectorLength(8);
           case DataType::Type::kInt32:
             *restrictions |= kNoDiv | kNoSAD;
@@ -2166,7 +2172,7 @@
                                               bool generate_code,
                                               DataType::Type reduction_type,
                                               uint64_t restrictions) {
-  if (!instruction->IsAdd() || (reduction_type != DataType::Type::kInt32)) {
+  if (!instruction->IsAdd() || reduction_type != DataType::Type::kInt32) {
     return false;
   }