Patch from Tom Hughes, for bug 72643:

  Patch to improve SSE/SS2 support

  This patch should implement most of the missing SSE/SSE2 opcodes. About
  the only ones it doesn't do are the MASKMOVxxx ones as they are quite
  horrible and involved an implicit reference to EDI so I need to think
  about them a bit more.

  The patch also includes a set of tests for the MMX/SSE/SSE2 opcodes to
  validate that they have the same effect under valgrind as they do when
  run normally. In one or two cases this wasn't actually the case even
  for some of the implemented opcodes, so I fixed those as well ;-)


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@2202 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/cachegrind/cg_main.c b/cachegrind/cg_main.c
index 6264abf..4726c90 100644
--- a/cachegrind/cg_main.c
+++ b/cachegrind/cg_main.c
@@ -544,7 +544,7 @@
 
          case SSE2a_MemRd:
          case SSE2a1_MemRd:
-            sk_assert(u_in->size == 4 || u_in->size == 16 || u_in->size == 512);
+            sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16 || u_in->size == 512);
             t_read = u_in->val3;
             is_FPU_R = True;
             break;
@@ -556,7 +556,7 @@
             break;
 
          case SSE3a1_MemRd:
-            sk_assert(u_in->size == 16);
+            sk_assert(u_in->size == 8 || u_in->size == 16);
             t_read = u_in->val3;
             is_FPU_R = True;
             break;
@@ -577,7 +577,7 @@
             break;
 
          case SSE2a_MemWr:
-            sk_assert(u_in->size == 4 || u_in->size == 16 || u_in->size == 512);
+            sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16 || u_in->size == 512);
             t_write = u_in->val3;
             is_FPU_W = True;
             break;
@@ -798,7 +798,7 @@
 
          case SSE2a_MemRd:
          case SSE2a1_MemRd:
-            sk_assert(u_in->size == 4 || u_in->size == 16 || u_in->size == 512);
+            sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16 || u_in->size == 512);
             t_read = u_in->val3;
             t_read_addr = newTemp(cb);
             uInstr2(cb, MOV, 4, TempReg, u_in->val3,  TempReg, t_read_addr);
@@ -821,7 +821,7 @@
             break;
 
          case SSE3a1_MemRd:
-            sk_assert(u_in->size == 16);
+            sk_assert(u_in->size == 8 || u_in->size == 16);
             t_read = u_in->val3;
             t_read_addr = newTemp(cb);
             uInstr2(cb, MOV, 4, TempReg, u_in->val3,  TempReg, t_read_addr);
@@ -861,7 +861,7 @@
             break;
 
          case SSE2a_MemWr:
-            sk_assert(u_in->size == 4 || u_in->size == 16 || u_in->size == 512);
+            sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16 || u_in->size == 512);
            /* fall through */
          case SSE3a_MemWr:
             sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16 || u_in->size == 512);