Revert r7241, which broke tests on most platforms.



git-svn-id: http://skia.googlecode.com/svn/trunk@7245 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/bench/Matrix44Bench.cpp b/bench/Matrix44Bench.cpp
index ac39639..ce5d99d 100644
--- a/bench/Matrix44Bench.cpp
+++ b/bench/Matrix44Bench.cpp
@@ -125,34 +125,12 @@
     typedef Matrix44Bench INHERITED;
 };
 
-class SetConcatMatrix44BenchSpecialCase : public Matrix44Bench {
-public:
-    SetConcatMatrix44BenchSpecialCase(void* param) : INHERITED(param, "setconcat_special") {
-        fX = fY = fZ = SkDoubleToMScalar(1.5);
-        fM1.setScale(fX, fY, fZ);
-        fM2.setTranslate(fX, fY, fZ);
-    }
-protected:
-    virtual void performTest() {
-        fM0.reset();    // just to normalize this test with prescale/postscale
-        for (int i = 0; i < 10; ++i) {
-            fM0.setConcat(fM1, fM2);
-        }
-    }
-private:
-    SkMatrix44 fM0, fM1, fM2;
-    SkMScalar  fX, fY, fZ;
-    typedef Matrix44Bench INHERITED;
-};
-
 class SetConcatMatrix44Bench : public Matrix44Bench {
 public:
     SetConcatMatrix44Bench(void* param) : INHERITED(param, "setconcat") {
         fX = fY = fZ = SkDoubleToMScalar(1.5);
         fM1.setScale(fX, fY, fZ);
-        fM1.set(2, 0, 3.0f);
         fM2.setTranslate(fX, fY, fZ);
-        fM2.set(2, 0, 3.0f);
     }
 protected:
     virtual void performTest() {
@@ -189,7 +167,6 @@
 DEF_BENCH( return new PreScaleMatrix44Bench(p); )
 DEF_BENCH( return new PostScaleMatrix44Bench(p); )
 DEF_BENCH( return new InvertMatrix44Bench(p); )
-DEF_BENCH( return new SetConcatMatrix44BenchSpecialCase(p); )
 DEF_BENCH( return new SetConcatMatrix44Bench(p); )
 DEF_BENCH( return new GetTypeMatrix44Bench(p); )
 
diff --git a/include/utils/SkMatrix44.h b/include/utils/SkMatrix44.h
index 83fb7c9..41f1a30 100644
--- a/include/utils/SkMatrix44.h
+++ b/include/utils/SkMatrix44.h
@@ -51,11 +51,6 @@
     static const SkMScalar SK_MScalarPI = 3.14159265f;
 #endif
 
-#if (defined(__x86_64__) || defined(_M_X64) || defined(__SSE2__)) && \
-defined(SK_MSCALAR_IS_DOUBLE)
-#define SK_MATRIX44_USE_SSE2
-#endif
-
 #define SkMScalarToScalar SkMScalarToFloat
 #define SkScalarToMScalar SkFloatToMScalar
 
@@ -104,11 +99,7 @@
     }
 };
 
-class
-#if defined(SK_MATRIX44_USE_SSE2) && defined(_MSC_VER)
-__declspec(align(16))
-#endif
-SK_API SkMatrix44 {
+class SK_API SkMatrix44 {
 public:
 
     enum Uninitialized_Constructor {
@@ -407,10 +398,6 @@
     inline bool isTriviallyIdentity() const {
         return 0 == fTypeMask;
     }
-}
-#if defined(SK_MATRIX44_USE_SSE2) && !defined(_MSC_VER)
-__attribute__ ((aligned (16)))
-#endif
-;
+};
 
 #endif
diff --git a/src/utils/SkMatrix44.cpp b/src/utils/SkMatrix44.cpp
index 5953da6..1906593 100644
--- a/src/utils/SkMatrix44.cpp
+++ b/src/utils/SkMatrix44.cpp
@@ -343,21 +343,6 @@
     return 0 == (value & ~mask);
 }
 
-#if defined(SK_MATRIX44_USE_SSE2)
-#include <emmintrin.h>
-struct MatrixD {
-	__m128d x_xy, x_zw;
-	__m128d y_xy, y_zw;
-	__m128d z_xy, z_zw;
-	__m128d w_xy, w_zw;
-};
-
-#if defined(_MSC_VER)
-inline __m128d operator +(__m128d a, __m128d b) { return _mm_add_pd(a, b); }
-inline __m128d operator *(__m128d a, __m128d b) { return _mm_mul_pd(a, b); }
-#endif
-#endif
-
 void SkMatrix44::setConcat(const SkMatrix44& a, const SkMatrix44& b) {
     const SkMatrix44::TypeMask a_mask = a.getType();
     const SkMatrix44::TypeMask b_mask = b.getType();
@@ -372,70 +357,19 @@
     }
 
     bool useStorage = (this == &a || this == &b);
-#if defined(SK_MATRIX44_USE_SSE2)
-    MatrixD storage;
-    SkMScalar* result = useStorage ? (SkMScalar*)&storage : &fMat[0][0];
-#else
     SkMScalar storage[16];
     SkMScalar* result = useStorage ? storage : &fMat[0][0];
-#endif
 
     if (bits_isonly(a_mask | b_mask, kScale_Mask | kTranslate_Mask)) {
+        sk_bzero(result, sizeof(storage));
         result[0] = a.fMat[0][0] * b.fMat[0][0];
-        result[1] = 0.0;
-        result[2] = 0.0;
-        result[3] = 0.0;
-        result[4] = 0.0;
         result[5] = a.fMat[1][1] * b.fMat[1][1];
-        result[6] = 0.0;
-        result[7] = 0.0;
-        result[8] = 0.0;
-        result[9] = 0.0;
         result[10] = a.fMat[2][2] * b.fMat[2][2];
-        result[11] = 0.0;
         result[12] = a.fMat[0][0] * b.fMat[3][0] + a.fMat[3][0];
         result[13] = a.fMat[1][1] * b.fMat[3][1] + a.fMat[3][1];
         result[14] = a.fMat[2][2] * b.fMat[3][2] + a.fMat[3][2];
         result[15] = 1;
     } else {
-#if defined(SK_MATRIX44_USE_SSE2)
-        MatrixD* p = (MatrixD*)result;
-        const MatrixD* pa = (const MatrixD*)a.fMat;
-        const MatrixD* pb = (const MatrixD*)b.fMat;
-        __m128d x_xy = pa->x_xy;
-        __m128d x_zw = pa->x_zw;
-        __m128d y_xy = pa->y_xy;
-        __m128d y_zw = pa->y_zw;
-        __m128d z_xy = pa->z_xy;
-        __m128d z_zw = pa->z_zw;
-        __m128d w_xy = pa->w_xy;
-        __m128d w_zw = pa->w_zw;
-        __m128d b0, b1, b2, b3;
-        b0 = _mm_set1_pd(((double*)&pb->x_xy)[0]);
-        b1 = _mm_set1_pd(((double*)&pb->x_xy)[1]);
-        b2 = _mm_set1_pd(((double*)&pb->x_zw)[0]);
-        b3 = _mm_set1_pd(((double*)&pb->x_zw)[1]);
-        p->x_xy = b0 * x_xy + b1 * y_xy + b2 * z_xy + b3 * w_xy;
-        p->x_zw = b0 * x_zw + b1 * y_zw + b2 * z_zw + b3 * w_zw;
-        b0 = _mm_set1_pd(((double*)&pb->y_xy)[0]);
-        b1 = _mm_set1_pd(((double*)&pb->y_xy)[1]);
-        b2 = _mm_set1_pd(((double*)&pb->y_zw)[0]);
-        b3 = _mm_set1_pd(((double*)&pb->y_zw)[1]);
-        p->y_xy = b0 * x_xy + b1 * y_xy + b2 * z_xy + b3 * w_xy;
-        p->y_zw = b0 * x_zw + b1 * y_zw + b2 * z_zw + b3 * w_zw;
-        b0 = _mm_set1_pd(((double*)&pb->z_xy)[0]);
-        b1 = _mm_set1_pd(((double*)&pb->z_xy)[1]);
-        b2 = _mm_set1_pd(((double*)&pb->z_zw)[0]);
-        b3 = _mm_set1_pd(((double*)&pb->z_zw)[1]);
-        p->z_xy = b0 * x_xy + b1 * y_xy + b2 * z_xy + b3 * w_xy;
-        p->z_zw = b0 * x_zw + b1 * y_zw + b2 * z_zw + b3 * w_zw;
-        b0 = _mm_set1_pd(((double*)&pb->w_xy)[0]);
-        b1 = _mm_set1_pd(((double*)&pb->w_xy)[1]);
-        b2 = _mm_set1_pd(((double*)&pb->w_zw)[0]);
-        b3 = _mm_set1_pd(((double*)&pb->w_zw)[1]);
-        p->w_xy = b0 * x_xy + b1 * y_xy + b2 * z_xy + b3 * w_xy;
-        p->w_zw = b0 * x_zw + b1 * y_zw + b2 * z_zw + b3 * w_zw;
-#else
         for (int j = 0; j < 4; j++) {
             for (int i = 0; i < 4; i++) {
                 double value = 0;
@@ -445,11 +379,10 @@
                 *result++ = SkDoubleToMScalar(value);
             }
         }
-#endif
     }
 
     if (useStorage) {
-        memcpy(fMat, result, sizeof(storage));
+        memcpy(fMat, storage, sizeof(storage));
     }
     this->dirtyTypeMask();
 }