add getTypes() to SkMatrix44, to cache how complex the matrix is.
add bench

optimize operator== by performing 4 compares in a row before checking
optimize setconcat by noting when we can write the answer directly into this

At least on this macbook, I had to mark helpers like isIdentity() as inline to get them inlined.
Review URL: https://codereview.appspot.com/6863053

git-svn-id: http://skia.googlecode.com/svn/trunk@6655 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/bench/Matrix44Bench.cpp b/bench/Matrix44Bench.cpp
new file mode 100644
index 0000000..f10870c
--- /dev/null
+++ b/bench/Matrix44Bench.cpp
@@ -0,0 +1,140 @@
+/*
+ * Copyright 2012 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "SkBenchmark.h"
+#include "SkMatrix44.h"
+#include "SkRandom.h"
+#include "SkString.h"
+
+class Matrix44Bench : public SkBenchmark {
+    SkString    fName;
+    enum { N = 10000 };
+public:
+    Matrix44Bench(void* param, const char name[]) : INHERITED(param) {
+        fName.printf("matrix44_%s", name);
+        fIsRendering = false;
+    }
+
+    virtual void performTest() = 0;
+
+protected:
+    virtual int mulLoopCount() const { return 1; }
+
+    virtual const char* onGetName() {
+        return fName.c_str();
+    }
+
+    virtual void onDraw(SkCanvas* canvas) {
+        int n = SkBENCHLOOP(N * this->mulLoopCount());
+        for (int i = 0; i < n; i++) {
+            this->performTest();
+        }
+    }
+
+private:
+    typedef SkBenchmark INHERITED;
+};
+
+class EqualsMatrix44Bench : public Matrix44Bench {
+public:
+    EqualsMatrix44Bench(void* param) : INHERITED(param, "equals") {
+        fM1.set(0, 0, 0);
+        fM2.set(3, 3, 0);
+    }
+protected:
+    virtual void performTest() {
+        for (int i = 0; i < 10; ++i) {
+            fM0 == fM1;
+            fM1 == fM2;
+            fM2 == fM0;
+        }
+    }
+private:
+    SkMatrix44 fM0, fM1, fM2;
+    typedef Matrix44Bench INHERITED;
+};
+
+class PreScaleMatrix44Bench : public Matrix44Bench {
+public:
+    PreScaleMatrix44Bench(void* param) : INHERITED(param, "prescale") {
+        fX = fY = fZ = SkDoubleToMScalar(1.5);
+    }
+protected:
+    virtual void performTest() {
+        fM0.reset();
+        for (int i = 0; i < 10; ++i) {
+            fM0.preScale(fX, fY, fZ);
+        }
+    }
+private:
+    SkMatrix44 fM0;
+    SkMScalar  fX, fY, fZ;
+    typedef Matrix44Bench INHERITED;
+};
+
+class PostScaleMatrix44Bench : public Matrix44Bench {
+public:
+    PostScaleMatrix44Bench(void* param) : INHERITED(param, "postscale") {
+        fX = fY = fZ = SkDoubleToMScalar(1.5);
+    }
+protected:
+    virtual void performTest() {
+        fM0.reset();
+        for (int i = 0; i < 10; ++i) {
+            fM0.postScale(fX, fY, fZ);
+        }
+    }
+private:
+    SkMatrix44 fM0;
+    SkMScalar  fX, fY, fZ;
+    typedef Matrix44Bench INHERITED;
+};
+
+class SetConcatMatrix44Bench : public Matrix44Bench {
+public:
+    SetConcatMatrix44Bench(void* param) : INHERITED(param, "setconcat") {
+        fX = fY = fZ = SkDoubleToMScalar(1.5);
+        fM1.setScale(fX, fY, fZ);
+        fM2.setTranslate(fX, fY, fZ);
+    }
+protected:
+    virtual void performTest() {
+        fM0.reset();    // just to normalize this test with prescale/postscale
+        for (int i = 0; i < 10; ++i) {
+            fM0.setConcat(fM1, fM2);
+        }
+    }
+private:
+    SkMatrix44 fM0, fM1, fM2;
+    SkMScalar  fX, fY, fZ;
+    typedef Matrix44Bench INHERITED;
+};
+
+class GetTypeMatrix44Bench : public Matrix44Bench {
+public:
+    GetTypeMatrix44Bench(void* param) : INHERITED(param, "gettype") {}
+protected:
+    // Putting random generation of the matrix inside performTest()
+    // would help us avoid anomalous runs, but takes up 25% or
+    // more of the function time.
+    virtual void performTest() {
+        for (int i = 0; i < 20; ++i) {
+            fMatrix.set(1, 2, 1);   // to invalidate the type-cache
+            fMatrix.getType();
+        }
+    }
+private:
+    SkMatrix44 fMatrix;
+    typedef Matrix44Bench INHERITED;
+};
+
+DEF_BENCH( return new EqualsMatrix44Bench(p); )
+DEF_BENCH( return new PreScaleMatrix44Bench(p); )
+DEF_BENCH( return new PostScaleMatrix44Bench(p); )
+DEF_BENCH( return new SetConcatMatrix44Bench(p); )
+DEF_BENCH( return new GetTypeMatrix44Bench(p); )
+
diff --git a/gyp/bench.gypi b/gyp/bench.gypi
index bc90c5e..dbb30b6 100644
--- a/gyp/bench.gypi
+++ b/gyp/bench.gypi
@@ -21,6 +21,7 @@
     '../bench/InterpBench.cpp',
     '../bench/LineBench.cpp',
     '../bench/MathBench.cpp',
+    '../bench/Matrix44Bench.cpp',
     '../bench/MatrixBench.cpp',
     '../bench/MatrixConvolutionBench.cpp',
     '../bench/MemoryBench.cpp',
diff --git a/include/core/SkPostConfig.h b/include/core/SkPostConfig.h
index 9a5c54a..12fe87d 100644
--- a/include/core/SkPostConfig.h
+++ b/include/core/SkPostConfig.h
@@ -33,7 +33,9 @@
 #if defined(SK_MSCALAR_IS_DOUBLE) && defined(SK_MSCALAR_IS_FLOAT)
     #error "cannot define both SK_MSCALAR_IS_DOUBLE and SK_MSCALAR_IS_FLOAT"
 #elif !defined(SK_MSCALAR_IS_DOUBLE) && !defined(SK_MSCALAR_IS_FLOAT)
-    #define SK_MSCALAR_IS_FLOAT
+    // default is double, as that is faster given our impl uses doubles
+    // for intermediate calculations.
+    #define SK_MSCALAR_IS_DOUBLE
 #endif
 
 #if defined(SK_CPU_LENDIAN) && defined(SK_CPU_BENDIAN)
diff --git a/include/utils/SkMatrix44.h b/include/utils/SkMatrix44.h
index 67486b3..79bc700 100644
--- a/include/utils/SkMatrix44.h
+++ b/include/utils/SkMatrix44.h
@@ -1,4 +1,3 @@
-
 /*
  * Copyright 2011 Google Inc.
  *
@@ -6,8 +5,6 @@
  * found in the LICENSE file.
  */
 
-
-
 #ifndef SkMatrix44_DEFINED
 #define SkMatrix44_DEFINED
 
@@ -15,7 +12,12 @@
 #include "SkScalar.h"
 
 #ifdef SK_MSCALAR_IS_DOUBLE
+#ifdef SK_MSCALAR_IS_FLOAT
+    #error "can't define MSCALAR both as DOUBLE and FLOAT"
+#endif
     typedef double SkMScalar;
+    typedef int64_t SkMIntScalar;
+
     static inline double SkFloatToMScalar(float x) {
         return static_cast<double>(x);
     }
@@ -30,7 +32,12 @@
     }
     static const SkMScalar SK_MScalarPI = 3.141592653589793;
 #elif defined SK_MSCALAR_IS_FLOAT
+#ifdef SK_MSCALAR_IS_DOUBLE
+    #error "can't define MSCALAR both as DOUBLE and FLOAT"
+#endif
     typedef float SkMScalar;
+    typedef int32_t SkMIntScalar;
+    
     static inline float SkFloatToMScalar(float x) {
         return x;
     }
@@ -96,11 +103,12 @@
 
 class SK_API SkMatrix44 {
 public:
-    SkMatrix44();
+    SkMatrix44() { this->setIdentity(); }
     SkMatrix44(const SkMatrix44&);
     SkMatrix44(const SkMatrix44& a, const SkMatrix44& b);
 
     SkMatrix44& operator=(const SkMatrix44& src) {
+        SkASSERT(sizeof(src) == sizeof(fMat) + sizeof(SkMIntScalar));
         memcpy(this, &src, sizeof(*this));
         return *this;
     }
@@ -114,23 +122,70 @@
     SkMatrix44& operator=(const SkMatrix& src);
     operator SkMatrix() const;
 
-    SkMScalar get(int row, int col) const {
+    /**
+     *  Return a reference to a const identity matrix
+     */
+    static const SkMatrix44& I();
+
+    enum TypeMask {
+        kIdentity_Mask      = 0,
+        kTranslate_Mask     = 0x01,  //!< set if the matrix has translation
+        kScale_Mask         = 0x02,  //!< set if the matrix has any scale != 1
+        kAffine_Mask        = 0x04,  //!< set if the matrix skews or rotates
+        kPerspective_Mask   = 0x08   //!< set if the matrix is in perspective
+    };
+    
+    /**
+     *  Returns a bitfield describing the transformations the matrix may
+     *  perform. The bitfield is computed conservatively, so it may include
+     *  false positives. For example, when kPerspective_Mask is true, all
+     *  other bits may be set to true even in the case of a pure perspective
+     *  transform.
+     */
+    inline TypeMask getType() const {
+        if (fTypeMask & kUnknown_Mask) {
+            fTypeMask = this->computeTypeMask();
+        }
+        SkASSERT(!(fTypeMask & kUnknown_Mask));
+        return (TypeMask)fTypeMask;
+    }
+
+    inline bool isIdentity() const {
+        return 0 == this->getType();
+    }
+    
+    void setIdentity();
+    inline void reset() { this->setIdentity();}
+
+    /**
+     *  get a value from the matrix. The row,col parameters work as follows:
+     *  (0, 0)  scale-x
+     *  (0, 3)  translate-x
+     *  (3, 0)  perspective-x
+     */
+    inline SkMScalar get(int row, int col) const {
         SkASSERT((unsigned)row <= 3);
         SkASSERT((unsigned)col <= 3);
         return fMat[col][row];
     }
 
-    void set(int row, int col, SkMScalar value) {
+    /**
+     *  set a value in the matrix. The row,col parameters work as follows:
+     *  (0, 0)  scale-x
+     *  (0, 3)  translate-x
+     *  (3, 0)  perspective-x
+     */
+    inline void set(int row, int col, SkMScalar value) {
         SkASSERT((unsigned)row <= 3);
         SkASSERT((unsigned)col <= 3);
         fMat[col][row] = value;
-        fIdentity = false;
+        this->dirtyTypeMask();
     }
 
-    double getDouble(int row, int col) const {
+    inline double getDouble(int row, int col) const {
         return SkMScalarToDouble(this->get(row, col));
     }
-    void setDouble(int row, int col, double value) {
+    inline void setDouble(int row, int col, double value) {
         this->set(row, col, SkDoubleToMScalar(value));
     }
 
@@ -154,9 +209,13 @@
     void setRowMajorf(const float[]);
     void setRowMajord(const double[]);
 
-    bool isIdentity() const;
-    void setIdentity();
-    void reset() { this->setIdentity();}
+#ifdef SK_MSCALAR_IS_FLOAT
+    void setColMajor(const SkMScalar data[]) { this->setColMajorf(data); }
+    void setRowMajor(const SkMScalar data[]) { this->setRowMajorf(data); }
+#else
+    void setColMajor(const SkMScalar data[]) { this->setColMajord(data); }
+    void setRowMajor(const SkMScalar data[]) { this->setRowMajord(data); }
+#endif
 
     void set3x3(SkMScalar m00, SkMScalar m01, SkMScalar m02,
                 SkMScalar m10, SkMScalar m11, SkMScalar m12,
@@ -170,13 +229,13 @@
     void preScale(SkMScalar sx, SkMScalar sy, SkMScalar sz);
     void postScale(SkMScalar sx, SkMScalar sy, SkMScalar sz);
 
-    void setScale(SkMScalar scale) {
+    inline void setScale(SkMScalar scale) {
         this->setScale(scale, scale, scale);
     }
-    void preScale(SkMScalar scale) {
+    inline void preScale(SkMScalar scale) {
         this->preScale(scale, scale, scale);
     }
-    void postScale(SkMScalar scale) {
+    inline void postScale(SkMScalar scale) {
         this->postScale(scale, scale, scale);
     }
 
@@ -197,10 +256,10 @@
                             SkMScalar radians);
 
     void setConcat(const SkMatrix44& a, const SkMatrix44& b);
-    void preConcat(const SkMatrix44& m) {
+    inline void preConcat(const SkMatrix44& m) {
         this->setConcat(*this, m);
     }
-    void postConcat(const SkMatrix44& m) {
+    inline void postConcat(const SkMatrix44& m) {
         this->setConcat(m, *this);
     }
 
@@ -220,7 +279,7 @@
         It is legal for src and dst to point to the same memory.
      */
     void mapScalars(const SkScalar src[4], SkScalar dst[4]) const;
-    void mapScalars(SkScalar vec[4]) const {
+    inline void mapScalars(SkScalar vec[4]) const {
         this->mapScalars(vec, vec);
     }
 
@@ -236,11 +295,11 @@
 #ifdef SK_MSCALAR_IS_DOUBLE
     void mapMScalars(const SkMScalar src[4], SkMScalar dst[4]) const;
 #elif defined SK_MSCALAR_IS_FLOAT
-    void mapMScalars(const SkMScalar src[4], SkMScalar dst[4]) const {
+    inline void mapMScalars(const SkMScalar src[4], SkMScalar dst[4]) const {
         this->mapScalars(src, dst);
     }
 #endif
-    void mapMScalars(SkMScalar vec[4]) const {
+    inline void mapMScalars(SkMScalar vec[4]) const {
         this->mapMScalars(vec, vec);
     }
 
@@ -255,14 +314,48 @@
     double determinant() const;
 
 private:
-    /*  Stored in the same order as opengl:
-         [3][0] = tx
-         [3][1] = ty
-         [3][2] = tz
-     */
-    SkMScalar fMat[4][4];
+    SkMScalar               fMat[4][4];
+    // we use SkMIntScalar instead of just int, as we want to ensure that
+    // we are always packed with no extra bits, allowing us to call memcpy
+    // without fear of copying uninitialized bits.
+    mutable SkMIntScalar    fTypeMask;
+    
+    enum {
+        kUnknown_Mask = 0x80,
 
-    bool fIdentity;
+        kAllPublic_Masks = 0xF
+    };
+
+    SkMScalar transX() const { return fMat[3][0]; }
+    SkMScalar transY() const { return fMat[3][1]; }
+    SkMScalar transZ() const { return fMat[3][2]; }
+
+    SkMScalar scaleX() const { return fMat[0][0]; }
+    SkMScalar scaleY() const { return fMat[1][1]; }
+    SkMScalar scaleZ() const { return fMat[2][2]; }
+    
+    SkMScalar perspX() const { return fMat[0][3]; }
+    SkMScalar perspY() const { return fMat[1][3]; }
+    SkMScalar perspZ() const { return fMat[2][3]; }
+
+    int computeTypeMask() const;
+
+    inline void dirtyTypeMask() {
+        fTypeMask = kUnknown_Mask;
+    }
+
+    inline void setTypeMask(int mask) {
+        SkASSERT(0 == (~(kAllPublic_Masks | kUnknown_Mask) & mask));
+        fTypeMask = mask;
+    }
+
+    /**
+     *  Does not take the time to 'compute' the typemask. Only returns true if
+     *  we already know that this matrix is identity.
+     */
+    inline bool isTriviallyIdentity() const {
+        return 0 == fTypeMask;
+    }
 };
 
 #endif
diff --git a/src/utils/SkMatrix44.cpp b/src/utils/SkMatrix44.cpp
index 43caacd..489a550 100644
--- a/src/utils/SkMatrix44.cpp
+++ b/src/utils/SkMatrix44.cpp
@@ -10,10 +10,6 @@
 
 #include "SkMatrix44.h"
 
-SkMatrix44::SkMatrix44() {
-    this->setIdentity();
-}
-
 SkMatrix44::SkMatrix44(const SkMatrix44& src) {
     memcpy(this, &src, sizeof(src));
 }
@@ -22,18 +18,69 @@
     this->setConcat(a, b);
 }
 
-bool SkMatrix44::operator==(const SkMatrix44& other) const {
-    if (fIdentity && other.fIdentity)
-        return true;
+static inline bool eq4(const SkMScalar* SK_RESTRICT a,
+                      const SkMScalar* SK_RESTRICT b) {
+    return (a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]) & (a[3] == b[3]);
+}
 
-    const SkMScalar* a = &fMat[0][0];
-    const SkMScalar* b = &other.fMat[0][0];
+bool SkMatrix44::operator==(const SkMatrix44& other) const {
+    if (this == &other) {
+        return true;
+    }
+
+    if (this->isTriviallyIdentity() && other.isTriviallyIdentity()) {
+        return true;
+    }
+
+    const SkMScalar* SK_RESTRICT a = &fMat[0][0];
+    const SkMScalar* SK_RESTRICT b = &other.fMat[0][0];
+
+#if 0
     for (int i = 0; i < 16; ++i) {
         if (a[i] != b[i]) {
             return false;
         }
     }
     return true;
+#else
+    // to reduce branch instructions, we compare 4 at a time.
+    // see bench/Matrix44Bench.cpp for test.
+    if (!eq4(&a[0], &b[0])) {
+        return false;
+    }
+    if (!eq4(&a[4], &b[4])) {
+        return false;
+    }
+    if (!eq4(&a[8], &b[8])) {
+        return false;
+    }
+    return eq4(&a[12], &b[12]);
+#endif
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+int SkMatrix44::computeTypeMask() const {
+    unsigned mask = 0;
+    
+    if (0 != perspX() || 0 != perspY() || 0 != perspZ() || 1 != fMat[3][3]) {
+        return kTranslate_Mask | kScale_Mask | kAffine_Mask | kPerspective_Mask;
+    }
+
+    if (0 != transX() || 0 != transY() || 0 != transZ()) {
+        mask |= kTranslate_Mask;
+    }
+
+    if (1 != scaleX() || 1 != scaleY() || 1 != scaleZ()) {
+        mask |= kScale_Mask;
+    }
+
+    if (0 != fMat[1][0] || 0 != fMat[0][1] || 0 != fMat[0][2] ||
+        0 != fMat[2][0] || 0 != fMat[1][2] || 0 != fMat[2][1]) {
+            mask |= kAffine_Mask;
+    }
+
+    return mask;
 }
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -93,7 +140,8 @@
 #elif defined SK_MSCALAR_IS_FLOAT
     memcpy(dst, src, 16 * sizeof(float));
 #endif
-    fIdentity = false;
+
+    this->dirtyTypeMask();
 }
 
 void SkMatrix44::setColMajord(const double src[]) {
@@ -105,7 +153,8 @@
         dst[i] = SkDoubleToMScalar(src[i]);
     }
 #endif
-    fIdentity = false;
+
+    this->dirtyTypeMask();
 }
 
 void SkMatrix44::setRowMajorf(const float src[]) {
@@ -118,7 +167,7 @@
         src += 4;
         dst += 1;
     }
-    fIdentity = false;
+    this->dirtyTypeMask();
 }
 
 void SkMatrix44::setRowMajord(const double src[]) {
@@ -131,41 +180,30 @@
         src += 4;
         dst += 1;
     }
-    fIdentity = false;
+    this->dirtyTypeMask();
 }
 
 ///////////////////////////////////////////////////////////////////////////////
 
-bool SkMatrix44::isIdentity() const {
-    if (fIdentity)
-        return true;
-
-    static const SkMScalar  sIdentityMat[4][4] = {
-        { 1, 0, 0, 0 },
-        { 0, 1, 0, 0 },
-        { 0, 0, 1, 0 },
-        { 0, 0, 0, 1 },
-    };
-    return !memcmp(fMat, sIdentityMat, sizeof(fMat));
+const SkMatrix44& SkMatrix44::I() {
+    static SkMatrix44 gIdentity;
+    return gIdentity;
 }
 
-///////////////////////////////////////////////////////////////////////////////
-
 void SkMatrix44::setIdentity() {
     sk_bzero(fMat, sizeof(fMat));
     fMat[0][0] = fMat[1][1] = fMat[2][2] = fMat[3][3] = 1;
-    fIdentity = true;
+    this->setTypeMask(kIdentity_Mask);
 }
 
 void SkMatrix44::set3x3(SkMScalar m00, SkMScalar m01, SkMScalar m02,
                         SkMScalar m10, SkMScalar m11, SkMScalar m12,
                         SkMScalar m20, SkMScalar m21, SkMScalar m22) {
-    sk_bzero(fMat, sizeof(fMat));
     fMat[0][0] = m00; fMat[0][1] = m01; fMat[0][2] = m02; fMat[0][3] = 0;
     fMat[1][0] = m10; fMat[1][1] = m11; fMat[1][2] = m12; fMat[1][3] = 0;
     fMat[2][0] = m20; fMat[2][1] = m21; fMat[2][2] = m22; fMat[2][3] = 0;
     fMat[3][0] = 0;   fMat[3][1] = 0;   fMat[3][2] = 0;   fMat[3][3] = 1;
-    fIdentity = false;
+    this->dirtyTypeMask();
 }
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -176,7 +214,12 @@
     fMat[3][1] = ty;
     fMat[3][2] = tz;
     fMat[3][3] = 1;
-    fIdentity = false;
+    
+    int mask = kIdentity_Mask;
+    if (0 != tx || 0 != ty || 0 != tz) {
+        mask |= kTranslate_Mask;
+    }
+    this->setTypeMask(mask);
 }
 
 void SkMatrix44::preTranslate(SkMScalar dx, SkMScalar dy, SkMScalar dz) {
@@ -189,7 +232,7 @@
     fMat[3][0] += dx;
     fMat[3][1] += dy;
     fMat[3][2] += dz;
-    fIdentity = false;
+    this->dirtyTypeMask();
 }
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -200,7 +243,12 @@
     fMat[1][1] = sy;
     fMat[2][2] = sz;
     fMat[3][3] = 1;
-    fIdentity = false;
+    
+    int mask = kIdentity_Mask;
+    if (0 != sx || 0 != sy || 0 != sz) {
+        mask |= kScale_Mask;
+    }
+    this->setTypeMask(mask);
 }
 
 void SkMatrix44::preScale(SkMScalar sx, SkMScalar sy, SkMScalar sz) {
@@ -215,16 +263,16 @@
         fMat[i][1] *= sy;
         fMat[i][2] *= sz;
     }
-    fIdentity = false;
+    this->dirtyTypeMask();
 }
 
 ///////////////////////////////////////////////////////////////////////////////
 
 void SkMatrix44::setRotateAbout(SkMScalar x, SkMScalar y, SkMScalar z,
                                 SkMScalar radians) {
-    double len2 = x * x + y * y + z * z;
-    if (len2 != 1) {
-        if (len2 == 0) {
+    double len2 = (double)x * x + (double)y * y + (double)z * z;
+    if (1 != len2) {
+        if (0 == len2) {
             this->setIdentity();
             return;
         }
@@ -268,18 +316,33 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 void SkMatrix44::setConcat(const SkMatrix44& a, const SkMatrix44& b) {
-    SkMScalar result[4][4];
-    for (int i = 0; i < 4; i++) {
-        for (int j = 0; j < 4; j++) {
+    if (a.isIdentity()) {
+        *this = b;
+        return;
+    }
+    if (b.isIdentity()) {
+        *this = a;
+        return;
+    }
+
+    bool useStorage = (this == &a || this == &b);
+    SkMScalar storage[16];
+    SkMScalar* result = useStorage ? storage : &fMat[0][0];
+
+    for (int j = 0; j < 4; j++) {
+        for (int i = 0; i < 4; i++) {
             double value = 0;
             for (int k = 0; k < 4; k++) {
                 value += SkMScalarToDouble(a.fMat[k][i]) * b.fMat[j][k];
             }
-            result[j][i] = SkDoubleToMScalar(value);
+            *result++ = SkDoubleToMScalar(value);
         }
     }
-    memcpy(fMat, result, sizeof(result));
-    fIdentity = false;
+    if (useStorage) {
+        memcpy(fMat, storage, sizeof(storage));
+    }
+
+    this->dirtyTypeMask();
 }
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -328,10 +391,20 @@
 }
 
 bool SkMatrix44::invert(SkMatrix44* inverse) const {
+    if (this->isTriviallyIdentity()) {
+        if (inverse) {
+            *inverse = *this;
+            return true;
+        }
+    }
+
     double det = this->determinant();
     if (dabs(det) < TOO_SMALL_FOR_DETERMINANT) {
         return false;
     }
+
+    // We now we will succeed, so return early if the caller doesn't actually
+    // want the computed inverse.
     if (NULL == inverse) {
         return true;
     }
@@ -380,7 +453,7 @@
             inverse->fMat[i][j] = SkDoubleToMScalar(tmp[i][j] * invDet);
         }
     }
-    inverse->fIdentity = false;
+    inverse->dirtyTypeMask();
     return true;
 }
 
@@ -393,12 +466,18 @@
     SkTSwap(fMat[1][2], fMat[2][1]);
     SkTSwap(fMat[1][3], fMat[3][1]);
     SkTSwap(fMat[2][3], fMat[3][2]);
+
+    if (!this->isTriviallyIdentity()) {
+        this->dirtyTypeMask();
+    }
 }
 
 ///////////////////////////////////////////////////////////////////////////////
 
 void SkMatrix44::mapScalars(const SkScalar src[4], SkScalar dst[4]) const {
-    SkScalar result[4];
+    SkScalar storage[4];
+    SkScalar* result = (src == dst) ? storage : dst;
+
     for (int i = 0; i < 4; i++) {
         SkMScalar value = 0;
         for (int j = 0; j < 4; j++) {
@@ -406,21 +485,31 @@
         }
         result[i] = SkMScalarToScalar(value);
     }
-    memcpy(dst, result, sizeof(result));
+    
+    if (storage == result) {
+        memcpy(dst, storage, sizeof(storage));
+    }
 }
 
 #ifdef SK_MSCALAR_IS_DOUBLE
+
 void SkMatrix44::mapMScalars(const SkMScalar src[4], SkMScalar dst[4]) const {
-    SkMScalar result[4];
+    SkMScalar storage[4];
+    SkMScalar* result = (src == dst) ? storage : dst;
+    
     for (int i = 0; i < 4; i++) {
         SkMScalar value = 0;
         for (int j = 0; j < 4; j++) {
             value += fMat[j][i] * src[j];
         }
-        result[i] = SkMScalarToScalar(value);
+        result[i] = value;
     }
-    memcpy(dst, result, sizeof(result));
+    
+    if (storage == result) {
+        memcpy(dst, storage, sizeof(storage));
+    }
 }
+
 #endif
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -445,6 +534,8 @@
 
 ///////////////////////////////////////////////////////////////////////////////
 
+// TODO: make this support src' perspective elements
+//
 static void initFromMatrix(SkMScalar dst[4][4], const SkMatrix& src) {
     sk_bzero(dst, 16 * sizeof(SkMScalar));
     dst[0][0] = SkScalarToMScalar(src[SkMatrix::kMScaleX]);
@@ -462,10 +553,17 @@
 
 SkMatrix44& SkMatrix44::operator=(const SkMatrix& src) {
     initFromMatrix(fMat, src);
-    fIdentity = src.isIdentity();
+
+    if (src.isIdentity()) {
+        this->setTypeMask(kIdentity_Mask);
+    } else {
+        this->dirtyTypeMask();
+    }
     return *this;
 }
 
+// TODO: make this support our perspective elements
+//
 SkMatrix44::operator SkMatrix() const {
     SkMatrix dst;
     dst.reset();    // setup our perspective correctly for identity
diff --git a/tests/Matrix44Test.cpp b/tests/Matrix44Test.cpp
index ea6a56f..ed8770a 100644
--- a/tests/Matrix44Test.cpp
+++ b/tests/Matrix44Test.cpp
@@ -1,10 +1,10 @@
-
 /*
  * Copyright 2011 Google Inc.
  *
  * Use of this source code is governed by a BSD-style license that can be
  * found in the LICENSE file.
  */
+
 #include "Test.h"
 #include "SkMatrix44.h"
 
@@ -72,6 +72,30 @@
     return nearly_equal(m, identity);
 }
 
+static void test_gettype(skiatest::Reporter* reporter) {
+    SkMatrix44 matrix;
+    
+    REPORTER_ASSERT(reporter, matrix.isIdentity());
+    REPORTER_ASSERT(reporter, SkMatrix44::kIdentity_Mask == matrix.getType());
+    
+    int expectedMask;
+
+    matrix.set(1, 1, 0);
+    expectedMask = SkMatrix44::kScale_Mask;
+    REPORTER_ASSERT(reporter, matrix.getType() == expectedMask);
+
+    matrix.set(0, 3, 1);    // translate-x
+    expectedMask |= SkMatrix44::kTranslate_Mask;
+    REPORTER_ASSERT(reporter, matrix.getType() == expectedMask);
+
+    matrix.set(2, 0, 1);
+    expectedMask |= SkMatrix44::kAffine_Mask;
+    REPORTER_ASSERT(reporter, matrix.getType() == expectedMask);
+    
+    matrix.set(3, 2, 1);
+    REPORTER_ASSERT(reporter, matrix.getType() & SkMatrix44::kPerspective_Mask);
+}
+
 static void test_common_angles(skiatest::Reporter* reporter) {
     SkMatrix44 rot;
     // Test precision of rotation in common cases
@@ -125,25 +149,25 @@
 static void test_determinant(skiatest::Reporter* reporter) {
     SkMatrix44 a;
     REPORTER_ASSERT(reporter, nearly_equal_double(1, a.determinant()));
-    a.set(1, 1, SkFloatToMScalar(2));
+    a.set(1, 1, 2);
     REPORTER_ASSERT(reporter, nearly_equal_double(2, a.determinant()));
     SkMatrix44 b;
     REPORTER_ASSERT(reporter, a.invert(&b));
     REPORTER_ASSERT(reporter, nearly_equal_double(0.5, b.determinant()));
     SkMatrix44 c = b = a;
-    c.set(0, 1, SkFloatToMScalar(4));
-    b.set(1, 0, SkFloatToMScalar(4));
+    c.set(0, 1, 4);
+    b.set(1, 0, 4);
     REPORTER_ASSERT(reporter,
                     nearly_equal_double(a.determinant(),
                                         b.determinant()));
     SkMatrix44 d = a;
-    d.set(0, 0, SkFloatToMScalar(8));
+    d.set(0, 0, 8);
     REPORTER_ASSERT(reporter, nearly_equal_double(16, d.determinant()));
 
     SkMatrix44 e = a;
     e.postConcat(d);
     REPORTER_ASSERT(reporter, nearly_equal_double(32, e.determinant()));
-    e.set(0, 0, SkFloatToMScalar(0));
+    e.set(0, 0, 0);
     REPORTER_ASSERT(reporter, nearly_equal_double(0, e.determinant()));
 }
 
@@ -180,9 +204,12 @@
 
 static void test_set_row_col_major(skiatest::Reporter* reporter) {
     SkMatrix44 a, b, c, d;
-    for (int row = 0; row < 4; ++row)
-        for (int col = 0; col < 4; ++col)
+    for (int row = 0; row < 4; ++row) {
+        for (int col = 0; col < 4; ++col) {
             a.setDouble(row, col, row * 4 + col);
+        }
+    }
+            
     double bufferd[16];
     float bufferf[16];
     a.asColMajord(bufferd);
@@ -200,31 +227,26 @@
 }
 
 static void TestMatrix44(skiatest::Reporter* reporter) {
-#ifdef SK_SCALAR_IS_FLOAT
     SkMatrix44 mat, inverse, iden1, iden2, rot;
 
     mat.reset();
-    mat.setTranslate(SK_Scalar1, SK_Scalar1, SK_Scalar1);
+    mat.setTranslate(1, 1, 1);
     mat.invert(&inverse);
     iden1.setConcat(mat, inverse);
     REPORTER_ASSERT(reporter, is_identity(iden1));
 
-    mat.setScale(SkIntToScalar(2), SkIntToScalar(2), SkIntToScalar(2));
+    mat.setScale(2, 2, 2);
     mat.invert(&inverse);
     iden1.setConcat(mat, inverse);
     REPORTER_ASSERT(reporter, is_identity(iden1));
 
-    mat.setScale(SK_Scalar1/2, SK_Scalar1/2, SK_Scalar1/2);
+    mat.setScale(SK_MScalar1/2, SK_MScalar1/2, SK_MScalar1/2);
     mat.invert(&inverse);
     iden1.setConcat(mat, inverse);
     REPORTER_ASSERT(reporter, is_identity(iden1));
 
-    mat.setScale(SkIntToScalar(3), SkIntToScalar(5), SkIntToScalar(20));
-    rot.setRotateDegreesAbout(
-        SkIntToScalar(0),
-        SkIntToScalar(0),
-        SkIntToScalar(-1),
-        SkIntToScalar(90));
+    mat.setScale(3, 3, 3);
+    rot.setRotateDegreesAbout(0, 0, -1, 90);
     mat.postConcat(rot);
     REPORTER_ASSERT(reporter, mat.invert(NULL));
     mat.invert(&inverse);
@@ -268,10 +290,11 @@
         test_common_angles(reporter);
     }
 
+    test_gettype(reporter);
     test_determinant(reporter);
     test_transpose(reporter);
     test_get_set_double(reporter);
-#endif
+    test_set_row_col_major(reporter);
 }
 
 #include "TestClassDef.h"