optimization of resampling by 2 in ARMv7, in spl.
Review URL: http://webrtc-codereview.appspot.com/92015
git-svn-id: http://webrtc.googlecode.com/svn/trunk@327 4adac7df-926f-26a2-2b94-8c16560cd09d
diff --git a/android-webrtc.mk b/android-webrtc.mk
index 735c766..510fd0b 100644
--- a/android-webrtc.mk
+++ b/android-webrtc.mk
@@ -23,13 +23,16 @@
MY_WEBRTC_COMMON_DEFS += \
'-DWEBRTC_ARM_INLINE_CALLS' \
'-DWEBRTC_ARCH_ARM'
-# TODO: test if the code under next two MACROs works with generic GCC compilers
+
+# TODO(kma): test if the code under next two macros works with generic GCC compilers
ifeq ($(ARCH_ARM_HAVE_NEON),true)
MY_WEBRTC_COMMON_DEFS += \
- '-DWEBRTC_ANDROID_ARMV7A_NEON'
-else ifeq ($(ARCH_ARM_HAVE_ARMV7A),true)
+ '-DWEBRTC_ARCH_ARM_NEON'
+endif
+
+ifeq ($(ARCH_ARM_HAVE_ARMV7A),true)
MY_WEBRTC_COMMON_DEFS += \
- '-DWEBRTC_ANDROID_ARMV7A'
+ '-DWEBRTC_ARCH_ARM_V7A'
endif
else ifeq ($(TARGET_ARCH),x86)
MY_WEBRTC_COMMON_DEFS += \
diff --git a/src/common_audio/signal_processing_library/main/source/resample_by_2.c b/src/common_audio/signal_processing_library/main/source/resample_by_2.c
index 7ed4cfd..2083b75 100644
--- a/src/common_audio/signal_processing_library/main/source/resample_by_2.c
+++ b/src/common_audio/signal_processing_library/main/source/resample_by_2.c
@@ -36,43 +36,66 @@
outptr = out; // output array (of length len/2)
state = filtState; // filter state array; length = 8
+ register WebRtc_Word32 state0 = state[0];
+ register WebRtc_Word32 state1 = state[1];
+ register WebRtc_Word32 state2 = state[2];
+ register WebRtc_Word32 state3 = state[3];
+ register WebRtc_Word32 state4 = state[4];
+ register WebRtc_Word32 state5 = state[5];
+ register WebRtc_Word32 state6 = state[6];
+ register WebRtc_Word32 state7 = state[7];
+
for (i = (len >> 1); i > 0; i--)
{
// lower allpass filter
in32 = (WebRtc_Word32)(*inptr++) << 10;
- diff = in32 - state[1];
- tmp1 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass2[0], diff, state[0] );
- state[0] = in32;
- diff = tmp1 - state[2];
- tmp2 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass2[1], diff, state[1] );
- state[1] = tmp1;
- diff = tmp2 - state[3];
- state[3] = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass2[2], diff, state[2] );
- state[2] = tmp2;
+ diff = in32 - state1;
+ tmp1 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass2[0], diff, state0 );
+ state0 = in32;
+ diff = tmp1 - state2;
+ tmp2 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass2[1], diff, state1 );
+ state1 = tmp1;
+ diff = tmp2 - state3;
+ state3 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass2[2], diff, state2 );
+ state2 = tmp2;
// upper allpass filter
in32 = (WebRtc_Word32)(*inptr++) << 10;
- diff = in32 - state[5];
- tmp1 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass1[0], diff, state[4] );
- state[4] = in32;
- diff = tmp1 - state[6];
- tmp2 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass1[1], diff, state[5] );
- state[5] = tmp1;
- diff = tmp2 - state[7];
- state[7] = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass1[2], diff, state[6] );
- state[6] = tmp2;
+ diff = in32 - state5;
+ tmp1 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass1[0], diff, state4 );
+ state4 = in32;
+ diff = tmp1 - state6;
+ tmp2 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass1[1], diff, state5 );
+ state5 = tmp1;
+ diff = tmp2 - state7;
+ state7 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass1[2], diff, state6 );
+ state6 = tmp2;
// add two allpass outputs, divide by two and round
- out32 = (state[3] + state[7] + 1024) >> 11;
+ out32 = (state3 + state7 + 1024) >> 11;
// limit amplitude to prevent wrap-around, and write to output array
+#ifdef WEBRTC_ARCH_ARM_V7A
+ __asm__("ssat %r0, #16, %r1" : "=r"(*outptr) : "r"(out32));
+ outptr++;
+#else
if (out32 > 32767)
*outptr++ = 32767;
else if (out32 < -32768)
*outptr++ = -32768;
else
*outptr++ = (WebRtc_Word16)out32;
+#endif
}
+
+ state[0]=state0;
+ state[1]=state1;
+ state[2]=state2;
+ state[3]=state3;
+ state[4]=state4;
+ state[5]=state5;
+ state[6]=state6;
+ state[7]=state7;
}
void WebRtcSpl_UpsampleBy2(const WebRtc_Word16* in, WebRtc_Word16 len, WebRtc_Word16* out,
@@ -89,47 +112,75 @@
outptr = out; // output array (of length len*2)
state = filtState; // filter state array; length = 8
+ register WebRtc_Word32 state0 = state[0];
+ register WebRtc_Word32 state1 = state[1];
+ register WebRtc_Word32 state2 = state[2];
+ register WebRtc_Word32 state3 = state[3];
+ register WebRtc_Word32 state4 = state[4];
+ register WebRtc_Word32 state5 = state[5];
+ register WebRtc_Word32 state6 = state[6];
+ register WebRtc_Word32 state7 = state[7];
+
for (i = len; i > 0; i--)
{
// lower allpass filter
in32 = (WebRtc_Word32)(*inptr++) << 10;
- diff = in32 - state[1];
- tmp1 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass1[0], diff, state[0] );
- state[0] = in32;
- diff = tmp1 - state[2];
- tmp2 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass1[1], diff, state[1] );
- state[1] = tmp1;
- diff = tmp2 - state[3];
- state[3] = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass1[2], diff, state[2] );
- state[2] = tmp2;
+ diff = in32 - state1;
+ tmp1 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass1[0], diff, state0 );
+ state0 = in32;
+ diff = tmp1 - state2;
+ tmp2 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass1[1], diff, state1 );
+ state1 = tmp1;
+ diff = tmp2 - state3;
+ state3 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass1[2], diff, state2 );
+ state2 = tmp2;
// round; limit amplitude to prevent wrap-around; write to output array
- out32 = (state[3] + 512) >> 10;
+ out32 = (state3 + 512) >> 10;
+#ifdef WEBRTC_ARCH_ARM_V7A
+ __asm__("ssat %r0, #16, %r1":"=r"(*outptr): "r"(out32));
+ outptr++;
+#else
if (out32 > 32767)
*outptr++ = 32767;
else if (out32 < -32768)
*outptr++ = -32768;
else
*outptr++ = (WebRtc_Word16)out32;
+#endif
// upper allpass filter
- diff = in32 - state[5];
- tmp1 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass2[0], diff, state[4] );
- state[4] = in32;
- diff = tmp1 - state[6];
- tmp2 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass2[1], diff, state[5] );
- state[5] = tmp1;
- diff = tmp2 - state[7];
- state[7] = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass2[2], diff, state[6] );
- state[6] = tmp2;
+ diff = in32 - state5;
+ tmp1 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass2[0], diff, state4 );
+ state4 = in32;
+ diff = tmp1 - state6;
+ tmp2 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass2[1], diff, state5 );
+ state5 = tmp1;
+ diff = tmp2 - state7;
+ state7 = WEBRTC_SPL_SCALEDIFF32( kResampleAllpass2[2], diff, state6 );
+ state6 = tmp2;
// round; limit amplitude to prevent wrap-around; write to output array
- out32 = (state[7] + 512) >> 10;
+ out32 = (state7 + 512) >> 10;
+#ifdef WEBRTC_ARCH_ARM_V7A
+ __asm__("ssat %r0, #16, %r1":"=r"(*outptr): "r"(out32));
+ outptr++;
+#else
if (out32 > 32767)
*outptr++ = 32767;
else if (out32 < -32768)
*outptr++ = -32768;
else
*outptr++ = (WebRtc_Word16)out32;
+#endif
}
+ state[0]=state0;
+ state[1]=state1;
+ state[2]=state2;
+ state[3]=state3;
+ state[4]=state4;
+ state[5]=state5;
+ state[6]=state6;
+ state[7]=state7;
+
}