cherry-pick from 7241: speed-up matrix44::setconcat when both inputs are
scale+translate by inlining the zero-assignments, rather than calling sk_bzero.

bench: matrix44_setconcat 2x faster



git-svn-id: http://skia.googlecode.com/svn/trunk@7251 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/src/utils/SkMatrix44.cpp b/src/utils/SkMatrix44.cpp
index 1906593..92c8715 100644
--- a/src/utils/SkMatrix44.cpp
+++ b/src/utils/SkMatrix44.cpp
@@ -360,11 +360,14 @@
     SkMScalar storage[16];
     SkMScalar* result = useStorage ? storage : &fMat[0][0];
 
+    // Both matrices are at most scale+translate
     if (bits_isonly(a_mask | b_mask, kScale_Mask | kTranslate_Mask)) {
-        sk_bzero(result, sizeof(storage));
         result[0] = a.fMat[0][0] * b.fMat[0][0];
+        result[1] = result[2] = result[3] = result[4] = 0;
         result[5] = a.fMat[1][1] * b.fMat[1][1];
+        result[6] = result[7] = result[8] = result[9] = 0;
         result[10] = a.fMat[2][2] * b.fMat[2][2];
+        result[11] = 0;
         result[12] = a.fMat[0][0] * b.fMat[3][0] + a.fMat[3][0];
         result[13] = a.fMat[1][1] * b.fMat[3][1] + a.fMat[3][1];
         result[14] = a.fMat[2][2] * b.fMat[3][2] + a.fMat[3][2];