argb scale 2x upsample with specialization for 25/75%
BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/938014
git-svn-id: http://libyuv.googlecode.com/svn/trunk@486 16f28f9a-4ce2-e073-06de-1de4eb20be90
diff --git a/source/scale_neon.cc b/source/scale_neon.cc
index 4af3c15..f521c63 100644
--- a/source/scale_neon.cc
+++ b/source/scale_neon.cc
@@ -477,14 +477,19 @@
int dst_width, int source_y_fraction) {
asm volatile (
"cmp %4, #0 \n"
- "beq 2f \n"
+ "beq 100f \n"
"add %2, %1 \n"
+ "cmp %4, #64 \n"
+ "beq 75f \n"
"cmp %4, #128 \n"
- "beq 3f \n"
+ "beq 50f \n"
+ "cmp %4, #192 \n"
+ "beq 25f \n"
"vdup.8 d5, %4 \n"
"rsb %4, #256 \n"
"vdup.8 d4, %4 \n"
+ // General purpose row blend.
"1: \n"
"vld1.u8 {q0}, [%1]! \n"
"vld1.u8 {q1}, [%2]! \n"
@@ -497,23 +502,48 @@
"vrshrn.u16 d1, q14, #8 \n"
"vst1.u8 {q0}, [%0]! \n"
"bgt 1b \n"
- "b 4f \n"
+ "b 99f \n"
- "2: \n"
+ // Blend 25 / 75.
+ "25: \n"
"vld1.u8 {q0}, [%1]! \n"
+ "vld1.u8 {q1}, [%2]! \n"
"subs %3, #16 \n"
+ "vrhadd.u8 q0, q1 \n"
+ "vrhadd.u8 q0, q1 \n"
"vst1.u8 {q0}, [%0]! \n"
- "bgt 2b \n"
- "b 4f \n"
+ "bgt 25b \n"
+ "b 99f \n"
- "3: \n"
+ // Blend 50 / 50.
+ "50: \n"
"vld1.u8 {q0}, [%1]! \n"
"vld1.u8 {q1}, [%2]! \n"
"subs %3, #16 \n"
"vrhadd.u8 q0, q1 \n"
"vst1.u8 {q0}, [%0]! \n"
- "bgt 3b \n"
- "4: \n"
+ "bgt 50b \n"
+ "b 99f \n"
+
+ // Blend 75 / 25.
+ "75: \n"
+ "vld1.u8 {q1}, [%1]! \n"
+ "vld1.u8 {q0}, [%2]! \n"
+ "subs %3, #16 \n"
+ "vrhadd.u8 q0, q1 \n"
+ "vrhadd.u8 q0, q1 \n"
+ "vst1.u8 {q0}, [%0]! \n"
+ "bgt 75b \n"
+ "b 99f \n"
+
+ // Blend 100 / 0 - Copy row unchanged.
+ "100: \n"
+ "vld1.u8 {q0}, [%1]! \n"
+ "subs %3, #16 \n"
+ "vst1.u8 {q0}, [%0]! \n"
+ "bgt 100b \n"
+
+ "99: \n"
"vst1.u8 {d1[7]}, [%0] \n"
: "+r"(dst_ptr), // %0
"+r"(src_ptr), // %1