Add SkDivMod with a special case for ARM.

BUG=skia:1663
R=djsollen@google.com, tomhudson@google.com, reed@google.com

Author: mtklein@google.com

Review URL: https://chromiumcodereview.appspot.com/24159009

git-svn-id: http://skia.googlecode.com/svn/trunk@11482 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/bench/MathBench.cpp b/bench/MathBench.cpp
index c34be44..260159f 100644
--- a/bench/MathBench.cpp
+++ b/bench/MathBench.cpp
@@ -512,6 +512,42 @@
 
 ///////////////////////////////////////////////////////////////////////////////
 
+template <typename T>
+class DivModBench : public SkBenchmark {
+    const char* fName;
+public:
+    explicit DivModBench(const char* name) : fName(name) {
+        fIsRendering = false;
+    }
+
+protected:
+    virtual const char* onGetName() {
+        return SkStringPrintf("divmod_%s", fName).c_str();
+    }
+
+    virtual void onDraw(SkCanvas*) {
+        volatile T a = 0, b = 0;
+        T div = 0, mod = 0;
+        for (int i = 0; i < this->getLoops(); i++) {
+            if ((T)i == 0) continue;  // Small T will wrap around.
+            SkTDivMod((T)(i+1), (T)i, &div, &mod);
+            a ^= div;
+            b ^= mod;
+        }
+    }
+};
+DEF_BENCH(return new DivModBench<uint8_t>("uint8_t"))
+DEF_BENCH(return new DivModBench<uint16_t>("uint16_t"))
+DEF_BENCH(return new DivModBench<uint32_t>("uint32_t"))
+DEF_BENCH(return new DivModBench<uint64_t>("uint64_t"))
+
+DEF_BENCH(return new DivModBench<int8_t>("int8_t"))
+DEF_BENCH(return new DivModBench<int16_t>("int16_t"))
+DEF_BENCH(return new DivModBench<int32_t>("int32_t"))
+DEF_BENCH(return new DivModBench<int64_t>("int64_t"))
+
+///////////////////////////////////////////////////////////////////////////////
+
 DEF_BENCH( return new NoOpMathBench(); )
 DEF_BENCH( return new SlowISqrtMathBench(); )
 DEF_BENCH( return new FastISqrtMathBench(); )
diff --git a/include/core/SkMath.h b/include/core/SkMath.h
index 078c8fc..affcada 100644
--- a/include/core/SkMath.h
+++ b/include/core/SkMath.h
@@ -173,4 +173,26 @@
     return (prod + (prod >> 8)) >> 8;
 }
 
+/**
+ * Stores numer/denom and numer%denom into div and mod respectively.
+ */
+template <typename In, typename Out>
+inline void SkTDivMod(In numer, In denom, Out* div, Out* mod) {
+#ifdef SK_CPU_ARM
+    // If we wrote this as in the else branch, GCC won't fuse the two into one
+    // divmod call, but rather a div call followed by a divmod.  Silly!  This
+    // version is just as fast as calling __aeabi_[u]idivmod manually, but with
+    // prettier code.
+    //
+    // This benches as around 2x faster than the code in the else branch.
+    const In d = numer/denom;
+    *div = static_cast<Out>(d);
+    *mod = static_cast<Out>(numer-d*denom);
+#else
+    // On x86 this will just be a single idiv.
+    *div = static_cast<Out>(numer/denom);
+    *mod = static_cast<Out>(numer%denom);
+#endif  // SK_CPU_ARM
+}
+
 #endif
diff --git a/src/core/SkBitmap.cpp b/src/core/SkBitmap.cpp
index d3bbecd..128726c 100644
--- a/src/core/SkBitmap.cpp
+++ b/src/core/SkBitmap.cpp
@@ -911,9 +911,8 @@
         return true;
     }
     // Use integer division to find the correct y position.
-    *y = SkToS32(offset / rowBytes);
     // The remainder will be the x position, after we reverse get_sub_offset.
-    *x = SkToS32(offset % rowBytes);
+    SkTDivMod(offset, rowBytes, y, x);
     switch (config) {
         case SkBitmap::kA8_Config:
             // Fall through.
diff --git a/src/core/SkScaledImageCache.cpp b/src/core/SkScaledImageCache.cpp
index 75dac78..11a0ee4 100644
--- a/src/core/SkScaledImageCache.cpp
+++ b/src/core/SkScaledImageCache.cpp
@@ -48,10 +48,9 @@
             return false;
         }
 
-        size_t offset = bm.pixelRefOffset();
-        size_t rowBytes = bm.rowBytes();
-        int x = (offset % rowBytes) >> 2;
-        int y = offset / rowBytes;
+        size_t x, y;
+        SkTDivMod(bm.pixelRefOffset(), bm.rowBytes(), &y, &x);
+        x >>= 2;
 
         fGenID = pr->getGenerationID();
         fBounds.set(x, y, x + bm.width(), y + bm.height());
diff --git a/tests/MathTest.cpp b/tests/MathTest.cpp
index cb4d0b8..bc8e6a3 100644
--- a/tests/MathTest.cpp
+++ b/tests/MathTest.cpp
@@ -691,3 +691,74 @@
 }
 
 DEFINE_TESTCLASS("Endian", EndianTestClass, TestEndian)
+
+template <typename T>
+static void test_divmod(skiatest::Reporter* r) {
+    const struct {
+        T numer;
+        T denom;
+    } kEdgeCases[] = {
+        {(T)17, (T)17},
+        {(T)17, (T)4},
+        {(T)0,  (T)17},
+        // For unsigned T these negatives are just some large numbers.  Doesn't hurt to test them.
+        {(T)-17, (T)-17},
+        {(T)-17, (T)4},
+        {(T)17,  (T)-4},
+        {(T)-17, (T)-4},
+    };
+
+    for (size_t i = 0; i < SK_ARRAY_COUNT(kEdgeCases); i++) {
+        const T numer = kEdgeCases[i].numer;
+        const T denom = kEdgeCases[i].denom;
+        T div, mod;
+        SkTDivMod(numer, denom, &div, &mod);
+        REPORTER_ASSERT(r, numer/denom == div);
+        REPORTER_ASSERT(r, numer%denom == mod);
+    }
+
+    SkRandom rand;
+    for (size_t i = 0; i < 10000; i++) {
+        const T numer = (T)rand.nextS();
+        T denom = 0;
+        while (0 == denom) {
+            denom = (T)rand.nextS();
+        }
+        T div, mod;
+        SkTDivMod(numer, denom, &div, &mod);
+        REPORTER_ASSERT(r, numer/denom == div);
+        REPORTER_ASSERT(r, numer%denom == mod);
+    }
+}
+
+DEF_TEST(divmod_u8, r) {
+    test_divmod<uint8_t>(r);
+}
+
+DEF_TEST(divmod_u16, r) {
+    test_divmod<uint16_t>(r);
+}
+
+DEF_TEST(divmod_u32, r) {
+    test_divmod<uint32_t>(r);
+}
+
+DEF_TEST(divmod_u64, r) {
+    test_divmod<uint64_t>(r);
+}
+
+DEF_TEST(divmod_s8, r) {
+    test_divmod<int8_t>(r);
+}
+
+DEF_TEST(divmod_s16, r) {
+    test_divmod<int16_t>(r);
+}
+
+DEF_TEST(divmod_s32, r) {
+    test_divmod<int32_t>(r);
+}
+
+DEF_TEST(divmod_s64, r) {
+    test_divmod<int64_t>(r);
+}