Revert of Change mapRectScaleTranslate to pass args/ret by value (patchset #2 id:20001 of https://codereview.chromium.org/2138943002/ )

Reason for revert:
Build-Ubuntu-GCC-Arm7-Release-Android fails.

Original issue's description:
> Change mapRectScaleTranslate to pass args/ret by value
>
> This reverts commit 6092b6e0e57be20d2e1ad079c0af133d2f67bfd3.
>
> BUG=skia:
> GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2138943002
>
> Committed: https://skia.googlesource.com/skia/+/1bd13ca922d6448d595064faee486eaf3fa56e56

TBR=mtklein@google.com,msarett@google.com,reed@google.com
# Skipping CQ checks because original CL landed less than 1 days ago.
NOPRESUBMIT=true
NOTREECHECKS=true
NOTRY=true
BUG=skia:

Review-Url: https://codereview.chromium.org/2234843002
diff --git a/src/core/SkCanvas.cpp b/src/core/SkCanvas.cpp
index 6361144..24d9506 100644
--- a/src/core/SkCanvas.cpp
+++ b/src/core/SkCanvas.cpp
@@ -77,11 +77,13 @@
     }
 
     if (rect) {
-        const SkMatrix& ctm = this->getTotalMatrix();
-        if (!ctm.isScaleTranslate()) {
+        if (!this->getTotalMatrix().isScaleTranslate()) {
             return false; // conservative
         }
-        if (!ctm.mapRectScaleTranslate(*rect).contains(bounds)) {
+
+        SkRect devRect;
+        this->getTotalMatrix().mapRectScaleTranslate(&devRect, *rect);
+        if (!devRect.contains(bounds)) {
             return false;
         }
     }
@@ -1542,7 +1544,8 @@
     // Check if we can quick-accept the clip call (and do nothing)
     //
     if (SkRegion::kIntersect_Op == op && !doAA && fMCRec->fMatrix.isScaleTranslate()) {
-        SkRect devR = fMCRec->fMatrix.mapRectScaleTranslate(rect);
+        SkRect devR;
+        fMCRec->fMatrix.mapRectScaleTranslate(&devR, rect);
         // NOTE: this check is CTM specific, since we might round differently with a different
         //       CTM. Thus this is only 100% reliable if there is not global CTM scale to be
         //       applied later (i.e. if this is going into a picture).
@@ -1582,7 +1585,7 @@
     const bool isScaleTrans = fMCRec->fMatrix.isScaleTranslate();
     SkRect devR;
     if (isScaleTrans) {
-        devR = fMCRec->fMatrix.mapRectScaleTranslate(rect);
+        fMCRec->fMatrix.mapRectScaleTranslate(&devR, rect);
     }
 
 #ifndef SK_SUPPORT_PRECHECK_CLIPRECT
diff --git a/src/core/SkMatrix.cpp b/src/core/SkMatrix.cpp
index f9c8c9d..0fd8020 100644
--- a/src/core/SkMatrix.cpp
+++ b/src/core/SkMatrix.cpp
@@ -1097,7 +1097,8 @@
     }
 }
 
-SkRect SkMatrix::mapRectScaleTranslate(SkRect src) const {
+void SkMatrix::mapRectScaleTranslate(SkRect* dst, const SkRect& src) const {
+    SkASSERT(dst);
     SkASSERT(this->isScaleTranslate());
     
     SkScalar sx = fMat[kMScaleX];
@@ -1106,25 +1107,22 @@
     SkScalar ty = fMat[kMTransY];
     Sk4f scale(sx, sy, sx, sy);
     Sk4f trans(tx, ty, tx, ty);
-    
+
     Sk4f ltrb = Sk4f::Load(&src.fLeft) * scale + trans;
     // need to sort so we're not inverted
     Sk4f rblt(ltrb[2], ltrb[3], ltrb[0], ltrb[1]);
     Sk4f min = Sk4f::Min(ltrb, rblt);
     Sk4f max = Sk4f::Max(ltrb, rblt);
-    // We can extract either pair [0,1] or [2,3] from min and max and be correct.
-    // However, the current ABI for returning multiple floats is to use only 2 slots in each
-    // vector register. Thus we take [0..1] from min and max, as that perfectly matches the ABI.
-    SkRect dst;
-    Sk4f(min[0], min[1], max[0], max[1]).store(&dst.fLeft);
-    return dst;
+    // We can extract either pair [0,1] or [2,3] from min and max and be correct, but on
+    // ARM this sequence generates the fastest (a single instruction).
+    Sk4f(min[2], min[3], max[0], max[1]).store(&dst->fLeft);
 }
 
 bool SkMatrix::mapRect(SkRect* dst, const SkRect& src) const {
     SkASSERT(dst);
 
     if (this->isScaleTranslate()) {
-        *dst = this->mapRectScaleTranslate(src);
+        this->mapRectScaleTranslate(dst, src);
         return true;
     } else {
         SkPoint quad[4];