row functions for windows use ssse3 for yuv to rgb.  mac use sse3 for rgb to yuv
Review URL: http://webrtc-codereview.appspot.com/267007

git-svn-id: http://libyuv.googlecode.com/svn/trunk@66 16f28f9a-4ce2-e073-06de-1de4eb20be90
diff --git a/source/row_posix.cc b/source/row_posix.cc
index ad6202e..f355122 100644
--- a/source/row_posix.cc
+++ b/source/row_posix.cc
@@ -253,37 +253,47 @@
 #ifdef HAS_ARGBTOUVROW_SSSE3
 void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
                        uint8* dst_u, uint8* dst_v, int width) {
-  asm volatile(
-  "movdqa     %5,%%xmm7\n"
-  "movdqa     %6,%%xmm6\n"
-  "movdqa     %7,%%xmm5\n"
+ asm volatile(
+  "movdqa     %0,%%xmm4\n"
+  "movdqa     %1,%%xmm3\n"
+  "movdqa     %2,%%xmm5\n"
+  :
+  : "m"(kARGBToU),         // %0
+    "m"(kARGBToV),         // %1
+    "m"(kAddUV128)         // %2
+  :
+#if defined(__SSE2__)
+    "xmm3", "xmm4", "xmm5"
+#endif
+ );
+ asm volatile(
   "sub        %1,%2\n"
 "1:"
   "movdqa     (%0),%%xmm0\n"
   "movdqa     0x10(%0),%%xmm1\n"
   "movdqa     0x20(%0),%%xmm2\n"
-  "movdqa     0x30(%0),%%xmm3\n"
+  "movdqa     0x30(%0),%%xmm6\n"
   "pavgb      (%0,%4,1),%%xmm0\n"
   "pavgb      0x10(%0,%4,1),%%xmm1\n"
   "pavgb      0x20(%0,%4,1),%%xmm2\n"
-  "pavgb      0x30(%0,%4,1),%%xmm3\n"
+  "pavgb      0x30(%0,%4,1),%%xmm6\n"
   "lea        0x40(%0),%0\n"
-  "movdqa     %%xmm0,%%xmm4\n"
+  "movdqa     %%xmm0,%%xmm7\n"
   "shufps     $0x88,%%xmm1,%%xmm0\n"
-  "shufps     $0xdd,%%xmm1,%%xmm4\n"
-  "pavgb      %%xmm4,%%xmm0\n"
-  "movdqa     %%xmm2,%%xmm4\n"
-  "shufps     $0x88,%%xmm3,%%xmm2\n"
-  "shufps     $0xdd,%%xmm3,%%xmm4\n"
-  "pavgb      %%xmm4,%%xmm2\n"
+  "shufps     $0xdd,%%xmm1,%%xmm7\n"
+  "pavgb      %%xmm7,%%xmm0\n"
+  "movdqa     %%xmm2,%%xmm7\n"
+  "shufps     $0x88,%%xmm6,%%xmm2\n"
+  "shufps     $0xdd,%%xmm6,%%xmm7\n"
+  "pavgb      %%xmm7,%%xmm2\n"
   "movdqa     %%xmm0,%%xmm1\n"
-  "movdqa     %%xmm2,%%xmm3\n"
-  "pmaddubsw  %%xmm7,%%xmm0\n"
-  "pmaddubsw  %%xmm7,%%xmm2\n"
-  "pmaddubsw  %%xmm6,%%xmm1\n"
-  "pmaddubsw  %%xmm6,%%xmm3\n"
+  "movdqa     %%xmm2,%%xmm6\n"
+  "pmaddubsw  %%xmm4,%%xmm0\n"
+  "pmaddubsw  %%xmm4,%%xmm2\n"
+  "pmaddubsw  %%xmm3,%%xmm1\n"
+  "pmaddubsw  %%xmm3,%%xmm6\n"
   "phaddw     %%xmm2,%%xmm0\n"
-  "phaddw     %%xmm3,%%xmm1\n"
+  "phaddw     %%xmm6,%%xmm1\n"
   "psraw      $0x8,%%xmm0\n"
   "psraw      $0x8,%%xmm1\n"
   "packsswb   %%xmm1,%%xmm0\n"
@@ -297,13 +307,10 @@
     "+r"(dst_u),           // %1
     "+r"(dst_v),           // %2
     "+rm"(width)           // %3
-  : "r"(static_cast<intptr_t>(src_stride_argb)), // %4
-    "m"(kARGBToU),         // %5
-    "m"(kARGBToV),         // %6
-    "m"(kAddUV128)         // %7
+  : "r"(static_cast<intptr_t>(src_stride_argb))
   : "memory", "cc"
 #if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+    , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
 #endif
 );
 }