argb scale 2x upsample with specialization for 25/75%
BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/938014

git-svn-id: http://libyuv.googlecode.com/svn/trunk@486 16f28f9a-4ce2-e073-06de-1de4eb20be90
diff --git a/source/scale_neon.cc b/source/scale_neon.cc
index 4af3c15..f521c63 100644
--- a/source/scale_neon.cc
+++ b/source/scale_neon.cc
@@ -477,14 +477,19 @@
                           int dst_width, int source_y_fraction) {
   asm volatile (
     "cmp          %4, #0                       \n"
-    "beq          2f                           \n"
+    "beq          100f                         \n"
     "add          %2, %1                       \n"
+    "cmp          %4, #64                      \n"
+    "beq          75f                          \n"
     "cmp          %4, #128                     \n"
-    "beq          3f                           \n"
+    "beq          50f                          \n"
+    "cmp          %4, #192                     \n"
+    "beq          25f                          \n"
 
     "vdup.8       d5, %4                       \n"
     "rsb          %4, #256                     \n"
     "vdup.8       d4, %4                       \n"
+    // General purpose row blend.
   "1:                                          \n"
     "vld1.u8      {q0}, [%1]!                  \n"
     "vld1.u8      {q1}, [%2]!                  \n"
@@ -497,23 +502,48 @@
     "vrshrn.u16   d1, q14, #8                  \n"
     "vst1.u8      {q0}, [%0]!                  \n"
     "bgt          1b                           \n"
-    "b            4f                           \n"
+    "b            99f                          \n"
 
-  "2:                                          \n"
+    // Blend 25 / 75.
+  "25:                                         \n"
     "vld1.u8      {q0}, [%1]!                  \n"
+    "vld1.u8      {q1}, [%2]!                  \n"
     "subs         %3, #16                      \n"
+    "vrhadd.u8    q0, q1                       \n"
+    "vrhadd.u8    q0, q1                       \n"
     "vst1.u8      {q0}, [%0]!                  \n"
-    "bgt          2b                           \n"
-    "b            4f                           \n"
+    "bgt          25b                          \n"
+    "b            99f                          \n"
 
-  "3:                                          \n"
+    // Blend 50 / 50.
+  "50:                                         \n"
     "vld1.u8      {q0}, [%1]!                  \n"
     "vld1.u8      {q1}, [%2]!                  \n"
     "subs         %3, #16                      \n"
     "vrhadd.u8    q0, q1                       \n"
     "vst1.u8      {q0}, [%0]!                  \n"
-    "bgt          3b                           \n"
-  "4:                                          \n"
+    "bgt          50b                          \n"
+    "b            99f                          \n"
+
+    // Blend 75 / 25.
+  "75:                                         \n"
+    "vld1.u8      {q1}, [%1]!                  \n"
+    "vld1.u8      {q0}, [%2]!                  \n"
+    "subs         %3, #16                      \n"
+    "vrhadd.u8    q0, q1                       \n"
+    "vrhadd.u8    q0, q1                       \n"
+    "vst1.u8      {q0}, [%0]!                  \n"
+    "bgt          75b                          \n"
+    "b            99f                          \n"
+
+    // Blend 100 / 0 - Copy row unchanged.
+  "100:                                        \n"
+    "vld1.u8      {q0}, [%1]!                  \n"
+    "subs         %3, #16                      \n"
+    "vst1.u8      {q0}, [%0]!                  \n"
+    "bgt          100b                         \n"
+
+  "99:                                         \n"
     "vst1.u8      {d1[7]}, [%0]                \n"
     : "+r"(dst_ptr),          // %0
       "+r"(src_ptr),          // %1