Merge "ART: Add HADDPS/HADDPD/SHUFPS/SHUFPD instruction generation"
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 1470e8c..7baf2d9 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -402,6 +402,8 @@
   EXT_0F_ENCODING_MAP(Pxor,      0x66, 0xEF, REG_DEF0_USE0),
   EXT_0F_ENCODING2_MAP(Phaddw,   0x66, 0x38, 0x01, REG_DEF0_USE0),
   EXT_0F_ENCODING2_MAP(Phaddd,   0x66, 0x38, 0x02, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Haddpd,    0x66, 0x7C, REG_DEF0_USE0),
+  EXT_0F_ENCODING_MAP(Haddps,    0xF2, 0x7C, REG_DEF0_USE0),
 
   { kX86PextrbRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x14, 0, 0, 1, false }, "PextbRRI", "!0r,!1r,!2d" },
   { kX86PextrwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0xC5, 0x00, 0, 0, 1, false }, "PextwRRI", "!0r,!1r,!2d" },
@@ -410,6 +412,9 @@
   { kX86PshuflwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0xF2, 0, 0x0F, 0x70, 0, 0, 0, 1, false }, "PshuflwRRI", "!0r,!1r,!2d" },
   { kX86PshufdRRI,  kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x70, 0, 0, 0, 1, false }, "PshuffRRI", "!0r,!1r,!2d" },
 
+  { kX86ShufpsRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x00, 0, 0x0F, 0xC6, 0, 0, 0, 1, false }, "kX86ShufpsRRI", "!0r,!1r,!2d" },
+  { kX86ShufpdRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0xC6, 0, 0, 0, 1, false }, "kX86ShufpdRRI", "!0r,!1r,!2d" },
+
   { kX86PsrawRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 4, 0, 1, false }, "PsrawRI", "!0r,!1d" },
   { kX86PsradRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 4, 0, 1, false }, "PsradRI", "!0r,!1d" },
   { kX86PsrlwRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 2, 0, 1, false }, "PsrlwRI", "!0r,!1d" },
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index ff243ce..e271e9d 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -564,11 +564,15 @@
   Binary0fOpCode(kX86Pxor),     // parallel XOR 128 bits x 1
   Binary0fOpCode(kX86Phaddw),   // parallel horizontal addition 16 bits x 8
   Binary0fOpCode(kX86Phaddd),   // parallel horizontal addition 32 bits x 4
+  Binary0fOpCode(kX86Haddpd),   // parallel FP horizontal addition 64 bits x 2
+  Binary0fOpCode(kX86Haddps),   // parallel FP horizontal addition 32 bits x 4
   kX86PextrbRRI,                // Extract 8 bits from XMM into GPR
   kX86PextrwRRI,                // Extract 16 bits from XMM into GPR
   kX86PextrdRRI,                // Extract 32 bits from XMM into GPR
   kX86PshuflwRRI,               // Shuffle 16 bits in lower 64 bits of XMM.
   kX86PshufdRRI,                // Shuffle 32 bits in XMM.
+  kX86ShufpsRRI,                // FP Shuffle 32 bits in XMM.
+  kX86ShufpdRRI,                // FP Shuffle 64 bits in XMM.
   kX86PsrawRI,                  // signed right shift of floating point registers 16 bits x 8
   kX86PsradRI,                  // signed right shift of floating point registers 32 bits x 4
   kX86PsrlwRI,                  // logical right shift of floating point registers 16 bits x 8
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index 14a5b5f..e6cbf05 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -641,6 +641,21 @@
         store = true;
         immediate_bytes = 1;
         break;
+      case 0x7C:
+        if (prefix[0] == 0xF2) {
+          opcode << "haddps";
+          prefix[0] = 0;  // clear prefix now it's served its purpose as part of the opcode
+        } else if (prefix[2] == 0x66) {
+          opcode << "haddpd";
+          prefix[2] = 0;  // clear prefix now it's served its purpose as part of the opcode
+        } else {
+          opcode << StringPrintf("unknown opcode '0F %02X'", *instr);
+          break;
+        }
+        src_reg_file = dst_reg_file = SSE;
+        has_modrm = true;
+        load = true;
+        break;
       case 0x7E:
         if (prefix[2] == 0x66) {
           src_reg_file = SSE;
@@ -733,6 +748,18 @@
           opcode << StringPrintf("unknown opcode '0F %02X'", *instr);
         }
         break;
+      case 0xC6:
+        if (prefix[2] == 0x66) {
+          opcode << "shufpd";
+          prefix[2] = 0;
+        } else {
+          opcode << "shufps";
+        }
+        has_modrm = true;
+        store = true;
+        src_reg_file = dst_reg_file = SSE;
+        immediate_bytes = 1;
+        break;
       case 0xC7:
         static const char* x0FxC7_opcodes[] = { "unknown-0f-c7", "cmpxchg8b", "unknown-0f-c7", "unknown-0f-c7", "unknown-0f-c7", "unknown-0f-c7", "unknown-0f-c7", "unknown-0f-c7" };
         modrm_opcodes = x0FxC7_opcodes;