Reduce x86 sequence for GP pair to XMM

Added support for punpckldq which is useful for interleaving
32-bit values from two xmm registers.

This new instruction is now used for transfers from GP pairs
to XMM in order to reduce path length.

Change-Id: I70d9b69449dfcfb9a94a628deb74a7cffe96bac7
Signed-off-by: Razvan A Lupusoru <razvan.a.lupusoru@intel.com>
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index 6d82f0a..ef83498 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -392,6 +392,17 @@
         has_modrm = true;
         src_reg_file = dst_reg_file = SSE;
         break;
+      case 0x62:
+        if (prefix[2] == 0x66) {
+          src_reg_file = dst_reg_file = SSE;
+          prefix[2] = 0;  // Clear prefix now. It has served its purpose as part of the opcode.
+        } else {
+          src_reg_file = dst_reg_file = MMX;
+        }
+        opcode << "punpckldq";
+        load = true;
+        has_modrm = true;
+        break;
       case 0x6E:
         if (prefix[2] == 0x66) {
           dst_reg_file = SSE;