p2align all loops, copy stride to local for scale, and copy last byte in bilinear more efficiently
BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/547007
git-svn-id: http://libyuv.googlecode.com/svn/trunk@255 16f28f9a-4ce2-e073-06de-1de4eb20be90
diff --git a/source/rotate_neon.cc b/source/rotate_neon.cc
index 70dd420..af790ae 100644
--- a/source/rotate_neon.cc
+++ b/source/rotate_neon.cc
@@ -32,6 +32,7 @@
"sub %4, #8 \n"
// handle 8x8 blocks. this should be the majority of the plane
+ ".p2align 4 \n"
"1: \n"
"mov r9, %0 \n"
@@ -198,6 +199,7 @@
"sub %6, #8 \n"
// handle 8x8 blocks. this should be the majority of the plane
+ ".p2align 4 \n"
"1: \n"
"mov r9, %0 \n"