Support mmxext (integer sse) subset on i386 (athlon).

Some processors like the AMD Athlon "Classic" support mmxext,
a sse1 subset. This subset is not properly detected by VEX.
The subset uses the same encoding as the sse1 instructions.

The subset is described at:
  http://support.amd.com/us/Embedded_TechDocs/22466.pdf
  https://en.wikipedia.org/wiki/3DNow!#3DNow.21_extensions

This introduces a new VEX_HWCAPS_X86_MMXEXT that sits between
the baseline (0) and VEX_HWCAPS_X86_SSE1. There is also a new
x86g_dirtyhelper_CPUID_mmxext to mimics a Athlon "Classic"
(Model 2, K75 "Pluto/Orion").

Groups all mmxext instructions together in one block.

git-svn-id: svn://svn.valgrind.org/vex/trunk@2745 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/host_x86_defs.c b/priv/host_x86_defs.c
index 21a05a9..693eaa2 100644
--- a/priv/host_x86_defs.c
+++ b/priv/host_x86_defs.c
@@ -727,7 +727,8 @@
    X86Instr* i          = LibVEX_Alloc(sizeof(X86Instr));
    i->tag               = Xin_MFence;
    i->Xin.MFence.hwcaps = hwcaps;
-   vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_SSE1
+   vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_MMXEXT
+                            |VEX_HWCAPS_X86_SSE1
                             |VEX_HWCAPS_X86_SSE2
                             |VEX_HWCAPS_X86_SSE3
                             |VEX_HWCAPS_X86_LZCNT)));
@@ -2695,7 +2696,7 @@
          *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
          goto done;
       }
-      if (i->Xin.MFence.hwcaps & VEX_HWCAPS_X86_SSE1) {
+      if (i->Xin.MFence.hwcaps & VEX_HWCAPS_X86_MMXEXT) {
          /* sfence */
          *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF8;
          /* lock addl $0,0(%esp) */