Merge "ART: Add HADDPS/HADDPD/SHUFPS/SHUFPD instruction generation"
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 1470e8c..7baf2d9 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -402,6 +402,8 @@
EXT_0F_ENCODING_MAP(Pxor, 0x66, 0xEF, REG_DEF0_USE0),
EXT_0F_ENCODING2_MAP(Phaddw, 0x66, 0x38, 0x01, REG_DEF0_USE0),
EXT_0F_ENCODING2_MAP(Phaddd, 0x66, 0x38, 0x02, REG_DEF0_USE0),
+ EXT_0F_ENCODING_MAP(Haddpd, 0x66, 0x7C, REG_DEF0_USE0),
+ EXT_0F_ENCODING_MAP(Haddps, 0xF2, 0x7C, REG_DEF0_USE0),
{ kX86PextrbRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x14, 0, 0, 1, false }, "PextbRRI", "!0r,!1r,!2d" },
{ kX86PextrwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0xC5, 0x00, 0, 0, 1, false }, "PextwRRI", "!0r,!1r,!2d" },
@@ -410,6 +412,9 @@
{ kX86PshuflwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0xF2, 0, 0x0F, 0x70, 0, 0, 0, 1, false }, "PshuflwRRI", "!0r,!1r,!2d" },
{ kX86PshufdRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x70, 0, 0, 0, 1, false }, "PshuffRRI", "!0r,!1r,!2d" },
+ { kX86ShufpsRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x00, 0, 0x0F, 0xC6, 0, 0, 0, 1, false }, "kX86ShufpsRRI", "!0r,!1r,!2d" },
+ { kX86ShufpdRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0xC6, 0, 0, 0, 1, false }, "kX86ShufpdRRI", "!0r,!1r,!2d" },
+
{ kX86PsrawRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 4, 0, 1, false }, "PsrawRI", "!0r,!1d" },
{ kX86PsradRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 4, 0, 1, false }, "PsradRI", "!0r,!1d" },
{ kX86PsrlwRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 2, 0, 1, false }, "PsrlwRI", "!0r,!1d" },
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index ff243ce..e271e9d 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -564,11 +564,15 @@
Binary0fOpCode(kX86Pxor), // parallel XOR 128 bits x 1
Binary0fOpCode(kX86Phaddw), // parallel horizontal addition 16 bits x 8
Binary0fOpCode(kX86Phaddd), // parallel horizontal addition 32 bits x 4
+ Binary0fOpCode(kX86Haddpd), // parallel FP horizontal addition 64 bits x 2
+ Binary0fOpCode(kX86Haddps), // parallel FP horizontal addition 32 bits x 4
kX86PextrbRRI, // Extract 8 bits from XMM into GPR
kX86PextrwRRI, // Extract 16 bits from XMM into GPR
kX86PextrdRRI, // Extract 32 bits from XMM into GPR
kX86PshuflwRRI, // Shuffle 16 bits in lower 64 bits of XMM.
kX86PshufdRRI, // Shuffle 32 bits in XMM.
+ kX86ShufpsRRI, // FP Shuffle 32 bits in XMM.
+ kX86ShufpdRRI, // FP Shuffle 64 bits in XMM.
kX86PsrawRI, // signed right shift of floating point registers 16 bits x 8
kX86PsradRI, // signed right shift of floating point registers 32 bits x 4
kX86PsrlwRI, // logical right shift of floating point registers 16 bits x 8
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index 14a5b5f..e6cbf05 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -641,6 +641,21 @@
store = true;
immediate_bytes = 1;
break;
+ case 0x7C:
+ if (prefix[0] == 0xF2) {
+ opcode << "haddps";
+ prefix[0] = 0; // clear prefix now it's served its purpose as part of the opcode
+ } else if (prefix[2] == 0x66) {
+ opcode << "haddpd";
+ prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode
+ } else {
+ opcode << StringPrintf("unknown opcode '0F %02X'", *instr);
+ break;
+ }
+ src_reg_file = dst_reg_file = SSE;
+ has_modrm = true;
+ load = true;
+ break;
case 0x7E:
if (prefix[2] == 0x66) {
src_reg_file = SSE;
@@ -733,6 +748,18 @@
opcode << StringPrintf("unknown opcode '0F %02X'", *instr);
}
break;
+ case 0xC6:
+ if (prefix[2] == 0x66) {
+ opcode << "shufpd";
+ prefix[2] = 0;
+ } else {
+ opcode << "shufps";
+ }
+ has_modrm = true;
+ store = true;
+ src_reg_file = dst_reg_file = SSE;
+ immediate_bytes = 1;
+ break;
case 0xC7:
static const char* x0FxC7_opcodes[] = { "unknown-0f-c7", "cmpxchg8b", "unknown-0f-c7", "unknown-0f-c7", "unknown-0f-c7", "unknown-0f-c7", "unknown-0f-c7", "unknown-0f-c7" };
modrm_opcodes = x0FxC7_opcodes;