Revert of Change mapRectScaleTranslate to pass args/ret by value (patchset #2 id:20001 of https://codereview.chromium.org/2138943002/ )

Reason for revert:
Build-Ubuntu-GCC-Arm7-Release-Android fails.

Original issue's description:
> Change mapRectScaleTranslate to pass args/ret by value
>
> This reverts commit 6092b6e0e57be20d2e1ad079c0af133d2f67bfd3.
>
> BUG=skia:
> GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2138943002
>
> Committed: https://skia.googlesource.com/skia/+/1bd13ca922d6448d595064faee486eaf3fa56e56

TBR=mtklein@google.com,msarett@google.com,reed@google.com
# Skipping CQ checks because original CL landed less than 1 days ago.
NOPRESUBMIT=true
NOTREECHECKS=true
NOTRY=true
BUG=skia:

Review-Url: https://codereview.chromium.org/2234843002
diff --git a/bench/MatrixBench.cpp b/bench/MatrixBench.cpp
index 185ea79..53e7296 100644
--- a/bench/MatrixBench.cpp
+++ b/bench/MatrixBench.cpp
@@ -320,7 +320,7 @@
         SkRect dst;
         if (fScaleTrans) {
             for (int i = 0; i < MEGA_LOOP; ++i) {
-                dst = fM.mapRectScaleTranslate(fR);
+                fM.mapRectScaleTranslate(&dst, fR);
             }
         } else {
             for (int i = 0; i < MEGA_LOOP; ++i) {
diff --git a/include/core/SkMatrix.h b/include/core/SkMatrix.h
index 8f70d4c..5b22a77 100644
--- a/include/core/SkMatrix.h
+++ b/include/core/SkMatrix.h
@@ -565,7 +565,7 @@
      *  Maps a rect to another rect, asserting (in debug mode) that the matrix only contains
      *  scale and translate elements. If it contains other elements, the results are undefined.
      */
-    SkRect mapRectScaleTranslate(SkRect src) const;
+    void mapRectScaleTranslate(SkRect* dst, const SkRect& src) const;
     
     /** Return the mean radius of a circle after it has been mapped by
         this matrix. NOTE: in perspective this value assumes the circle
diff --git a/src/core/SkCanvas.cpp b/src/core/SkCanvas.cpp
index 6361144..24d9506 100644
--- a/src/core/SkCanvas.cpp
+++ b/src/core/SkCanvas.cpp
@@ -77,11 +77,13 @@
     }
 
     if (rect) {
-        const SkMatrix& ctm = this->getTotalMatrix();
-        if (!ctm.isScaleTranslate()) {
+        if (!this->getTotalMatrix().isScaleTranslate()) {
             return false; // conservative
         }
-        if (!ctm.mapRectScaleTranslate(*rect).contains(bounds)) {
+
+        SkRect devRect;
+        this->getTotalMatrix().mapRectScaleTranslate(&devRect, *rect);
+        if (!devRect.contains(bounds)) {
             return false;
         }
     }
@@ -1542,7 +1544,8 @@
     // Check if we can quick-accept the clip call (and do nothing)
     //
     if (SkRegion::kIntersect_Op == op && !doAA && fMCRec->fMatrix.isScaleTranslate()) {
-        SkRect devR = fMCRec->fMatrix.mapRectScaleTranslate(rect);
+        SkRect devR;
+        fMCRec->fMatrix.mapRectScaleTranslate(&devR, rect);
         // NOTE: this check is CTM specific, since we might round differently with a different
         //       CTM. Thus this is only 100% reliable if there is not global CTM scale to be
         //       applied later (i.e. if this is going into a picture).
@@ -1582,7 +1585,7 @@
     const bool isScaleTrans = fMCRec->fMatrix.isScaleTranslate();
     SkRect devR;
     if (isScaleTrans) {
-        devR = fMCRec->fMatrix.mapRectScaleTranslate(rect);
+        fMCRec->fMatrix.mapRectScaleTranslate(&devR, rect);
     }
 
 #ifndef SK_SUPPORT_PRECHECK_CLIPRECT
diff --git a/src/core/SkMatrix.cpp b/src/core/SkMatrix.cpp
index f9c8c9d..0fd8020 100644
--- a/src/core/SkMatrix.cpp
+++ b/src/core/SkMatrix.cpp
@@ -1097,7 +1097,8 @@
     }
 }
 
-SkRect SkMatrix::mapRectScaleTranslate(SkRect src) const {
+void SkMatrix::mapRectScaleTranslate(SkRect* dst, const SkRect& src) const {
+    SkASSERT(dst);
     SkASSERT(this->isScaleTranslate());
     
     SkScalar sx = fMat[kMScaleX];
@@ -1106,25 +1107,22 @@
     SkScalar ty = fMat[kMTransY];
     Sk4f scale(sx, sy, sx, sy);
     Sk4f trans(tx, ty, tx, ty);
-    
+
     Sk4f ltrb = Sk4f::Load(&src.fLeft) * scale + trans;
     // need to sort so we're not inverted
     Sk4f rblt(ltrb[2], ltrb[3], ltrb[0], ltrb[1]);
     Sk4f min = Sk4f::Min(ltrb, rblt);
     Sk4f max = Sk4f::Max(ltrb, rblt);
-    // We can extract either pair [0,1] or [2,3] from min and max and be correct.
-    // However, the current ABI for returning multiple floats is to use only 2 slots in each
-    // vector register. Thus we take [0..1] from min and max, as that perfectly matches the ABI.
-    SkRect dst;
-    Sk4f(min[0], min[1], max[0], max[1]).store(&dst.fLeft);
-    return dst;
+    // We can extract either pair [0,1] or [2,3] from min and max and be correct, but on
+    // ARM this sequence generates the fastest (a single instruction).
+    Sk4f(min[2], min[3], max[0], max[1]).store(&dst->fLeft);
 }
 
 bool SkMatrix::mapRect(SkRect* dst, const SkRect& src) const {
     SkASSERT(dst);
 
     if (this->isScaleTranslate()) {
-        *dst = this->mapRectScaleTranslate(src);
+        this->mapRectScaleTranslate(dst, src);
         return true;
     } else {
         SkPoint quad[4];
diff --git a/src/utils/SkDeferredCanvas.cpp b/src/utils/SkDeferredCanvas.cpp
index c8a37c3..1422020 100644
--- a/src/utils/SkDeferredCanvas.cpp
+++ b/src/utils/SkDeferredCanvas.cpp
@@ -200,7 +200,7 @@
                 if (canScale) {
                     SkMatrix m;
                     rec.getConcat(&m);
-                    *bounds = m.mapRectScaleTranslate(*bounds);
+                    m.mapRectScaleTranslate(bounds, *bounds);
                 } else {
                     goto STOP;
                 }
diff --git a/tests/MatrixTest.cpp b/tests/MatrixTest.cpp
index 6cba604..414aab2 100644
--- a/tests/MatrixTest.cpp
+++ b/tests/MatrixTest.cpp
@@ -984,7 +984,7 @@
         mat.mapPoints((SkPoint*)&dst[0].fLeft, (SkPoint*)&src.fLeft, 2);
         dst[0].sort();
         mat.mapRect(&dst[1], src);
-        dst[2] = mat.mapRectScaleTranslate(src);
+        mat.mapRectScaleTranslate(&dst[2], src);
 
         REPORTER_ASSERT(r, dst[0] == dst[1]);
         REPORTER_ASSERT(r, dst[0] == dst[2]);