Merge "Revert "Add VP9 inter-frame prediction intrinsic""
diff --git a/cpp/ScriptIntrinsics.cpp b/cpp/ScriptIntrinsics.cpp
index 34b2162..ba1e5f4 100644
--- a/cpp/ScriptIntrinsics.cpp
+++ b/cpp/ScriptIntrinsics.cpp
@@ -66,52 +66,6 @@
Script::setVar(0, lut);
}
-sp<ScriptIntrinsicVP9InterPred> ScriptIntrinsicVP9InterPred::create(sp<RS> rs, sp<const Element> e) {
- if (e->isCompatible(Element::U8(rs)) == false) {
- rs->throwError(RS_ERROR_INVALID_ELEMENT, "Element not supported for intrinsic");
- return NULL;
- }
- return new ScriptIntrinsicVP9InterPred(rs, e);
-}
-
-ScriptIntrinsicVP9InterPred::ScriptIntrinsicVP9InterPred(sp<RS> rs, sp<const Element> e)
- : ScriptIntrinsic(rs, RS_SCRIPT_INTRINSIC_ID_INTER_PRED, e) {
-}
-
-void ScriptIntrinsicVP9InterPred::forEach(sp<Allocation> asize) {
- if (asize->getType()->getElement()->isCompatible(mElement) == false) {
- mRS->throwError(RS_ERROR_INVALID_ELEMENT, "InterPred forEach element mismatch");
- return;
- }
- Script::forEach(0, asize, NULL, NULL, 0);
-}
-
-void ScriptIntrinsicVP9InterPred::setRef(sp<Allocation> ref) {
- sp<const Type> t = ref->getType();
- if (!t->getElement()->isCompatible(mElement)) {
- mRS->throwError(RS_ERROR_INVALID_ELEMENT, "setRef element does not match");
- return;
- }
- Script::setVar(0, ref);
-}
-
-void ScriptIntrinsicVP9InterPred::setParam(sp<Allocation> param) {
- sp<const Type> t = param->getType();
- if (!t->getElement()->isCompatible(mElement)) {
- mRS->throwError(RS_ERROR_INVALID_ELEMENT, "setFriParam element does not match");
- return;
- }
- Script::setVar(1, param);
-}
-
-void ScriptIntrinsicVP9InterPred::setParamCount(int fri, int sec, int offset) {
- FieldPacker fp(12);
- fp.add(fri);
- fp.add(sec);
- fp.add(offset);
- Script::setVar(2, fp.getData(), fp.getLength());
-}
-
sp<ScriptIntrinsicBlend> ScriptIntrinsicBlend::create(sp<RS> rs, sp<const Element> e) {
if (e->isCompatible(Element::U8_4(rs)) == false) {
rs->throwError(RS_ERROR_INVALID_ELEMENT, "Element not supported for intrinsic");
diff --git a/cpp/rsCppStructs.h b/cpp/rsCppStructs.h
index 8268b61..8b07bf0 100644
--- a/cpp/rsCppStructs.h
+++ b/cpp/rsCppStructs.h
@@ -1435,20 +1435,6 @@
*/
void setLUT(sp<Allocation> lut);
};
-/**
- * Intrinsic for VP9InterPrediction
- */
-class ScriptIntrinsicVP9InterPred : public ScriptIntrinsic {
- private:
- ScriptIntrinsicVP9InterPred(sp<RS> rs, sp<const Element> e);
- public:
- static sp<ScriptIntrinsicVP9InterPred> create(sp<RS> rs, sp<const Element> e);
-
- void forEach(sp<Allocation> asize);
- void setRef(sp<Allocation> ref);
- void setParamCount(int fri, int sec, int offset);
- void setParam(sp<Allocation> param);
-};
/**
* Intrinsic kernel for blending two Allocations.
diff --git a/cpu_ref/Android.mk b/cpu_ref/Android.mk
index f87ac6e..96d8b07 100644
--- a/cpu_ref/Android.mk
+++ b/cpu_ref/Android.mk
@@ -29,12 +29,10 @@
rsCpuIntrinsicConvolve3x3.cpp \
rsCpuIntrinsicConvolve5x5.cpp \
rsCpuIntrinsicHistogram.cpp \
- rsCpuIntrinsicInterPred.cpp \
rsCpuIntrinsicLoopFilter.cpp \
rsCpuIntrinsicYuvToRGB.cpp \
rsCpuIntrinsicResize.cpp \
- rsCpuIntrinsicLUT.cpp \
- convolve/convolve.c
+ rsCpuIntrinsicLUT.cpp
LOCAL_CFLAGS_arm64 += -DARCH_ARM_HAVE_NEON
LOCAL_CFLAGS_64 += -DFAKE_ARM64_BUILD
@@ -61,11 +59,6 @@
rsCpuIntrinsics_neon_Convolve.S \
rsCpuIntrinsics_neon_ColorMatrix.S \
rsCpuIntrinsics_neon_YuvToRGB.S \
- convolve/convolve_copy_neon.s \
- convolve/convolve_avg_neon.s \
- convolve/convolve8_neon.s \
- convolve/convolve8_avg_neon.s \
- convolve/convolve_neon.c\
vp9_loopfilter_16_neon.S \
vp9_loopfilter_neon.S \
vp9_mb_lpf_neon.S
diff --git a/cpu_ref/convolve/convolve.c b/cpu_ref/convolve/convolve.c
deleted file mode 100644
index c85db92..0000000
--- a/cpu_ref/convolve/convolve.c
+++ /dev/null
@@ -1,257 +0,0 @@
-/*
- * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- *
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "vp9_common.h"
-#include "vp9_filter.h"
-#include <string.h>
-#include <stdio.h>
-
-static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const subpel_kernel *x_filters,
- int x0_q4, int x_step_q4, int w, int h) {
- int x, y;
- src -= SUBPEL_TAPS / 2 - 1;
- for (y = 0; y < h; ++y) {
- int x_q4 = x0_q4;
- for (x = 0; x < w; ++x) {
- const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
- const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
- int k, sum = 0;
- for (k = 0; k < SUBPEL_TAPS; ++k)
- sum += src_x[k] * x_filter[k];
- dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
- x_q4 += x_step_q4;
- }
- src += src_stride;
- dst += dst_stride;
- }
-}
-
-static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const subpel_kernel *x_filters,
- int x0_q4, int x_step_q4, int w, int h) {
- int x, y;
- src -= SUBPEL_TAPS / 2 - 1;
- for (y = 0; y < h; ++y) {
- int x_q4 = x0_q4;
- for (x = 0; x < w; ++x) {
- const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
- const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
- int k, sum = 0;
- for (k = 0; k < SUBPEL_TAPS; ++k)
- sum += src_x[k] * x_filter[k];
- dst[x] = ROUND_POWER_OF_TWO(dst[x] +
- clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
- x_q4 += x_step_q4;
- }
- src += src_stride;
- dst += dst_stride;
- }
-}
-
-static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const subpel_kernel *y_filters,
- int y0_q4, int y_step_q4, int w, int h) {
- int x, y;
- src -= src_stride * (SUBPEL_TAPS / 2 - 1);
-
- for (x = 0; x < w; ++x) {
- int y_q4 = y0_q4;
- for (y = 0; y < h; ++y) {
- const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
- const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
- int k, sum = 0;
- for (k = 0; k < SUBPEL_TAPS; ++k)
- sum += src_y[k * src_stride] * y_filter[k];
- dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
- y_q4 += y_step_q4;
- }
- ++src;
- ++dst;
- }
-}
-
-static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const subpel_kernel *y_filters,
- int y0_q4, int y_step_q4, int w, int h) {
- int x, y;
- src -= src_stride * (SUBPEL_TAPS / 2 - 1);
-
- for (x = 0; x < w; ++x) {
- int y_q4 = y0_q4;
- for (y = 0; y < h; ++y) {
- const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
- const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
- int k, sum = 0;
- for (k = 0; k < SUBPEL_TAPS; ++k)
- sum += src_y[k * src_stride] * y_filter[k];
- dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +
- clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
- y_q4 += y_step_q4;
- }
- ++src;
- ++dst;
- }
-}
-
-static void convolve(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const subpel_kernel *const x_filters,
- int x0_q4, int x_step_q4,
- const subpel_kernel *const y_filters,
- int y0_q4, int y_step_q4,
- int w, int h) {
- // Fixed size intermediate buffer places limits on parameters.
- // Maximum intermediate_height is 324, for y_step_q4 == 80,
- // h == 64, taps == 8.
- // y_step_q4 of 80 allows for 1/10 scale for 5 layer svc
- uint8_t temp[64 * 324];
- int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + SUBPEL_TAPS;
-
- if (intermediate_height < h)
- intermediate_height = h;
-
- convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64,
- x_filters, x0_q4, x_step_q4, w, intermediate_height);
- convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride,
- y_filters, y0_q4, y_step_q4, w, h);
-}
-
-static const subpel_kernel *get_filter_base(const int16_t *filter) {
- // NOTE: This assumes that the filter table is 256-byte aligned.
- // TODO(agrange) Modify to make independent of table alignment.
- return (const subpel_kernel *)(filter);
-}
-
-static int get_filter_offset(const int16_t *f, const subpel_kernel *base) {
- return 0;
-}
-
-void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- const subpel_kernel *const filters_x = get_filter_base(filter_x);
- const int x0_q4 = get_filter_offset(filter_x, filters_x);
-
- convolve_horiz(src, src_stride, dst, dst_stride, filters_x,
- x0_q4, x_step_q4, w, h);
-}
-
-void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- const subpel_kernel *const filters_x = get_filter_base(filter_x);
- const int x0_q4 = get_filter_offset(filter_x, filters_x);
-
- convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x,
- x0_q4, x_step_q4, w, h);
-}
-
-void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- const subpel_kernel *const filters_y = get_filter_base(filter_y);
- const int y0_q4 = get_filter_offset(filter_y, filters_y);
- convolve_vert(src, src_stride, dst, dst_stride, filters_y,
- y0_q4, y_step_q4, w, h);
-}
-
-void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- const subpel_kernel *const filters_y = get_filter_base(filter_y);
- const int y0_q4 = get_filter_offset(filter_y, filters_y);
- convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y,
- y0_q4, y_step_q4, w, h);
-}
-
-void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- const subpel_kernel *const filters_x = get_filter_base(filter_x);
- const int x0_q4 = get_filter_offset(filter_x, filters_x);
-
- const subpel_kernel *const filters_y = get_filter_base(filter_y);
- const int y0_q4 = get_filter_offset(filter_y, filters_y);
-
- convolve(src, src_stride, dst, dst_stride,
- filters_x, x0_q4, x_step_q4,
- filters_y, y0_q4, y_step_q4, w, h);
-}
-
-void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int filter_x_stride,
- const int16_t *filter_y, int filter_y_stride,
- int w, int h) {
- int x, y;
- for (y = 0; y < h; ++y) {
- for (x = 0; x < w; ++x)
- dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
-
- src += src_stride;
- dst += dst_stride;
- }
-}
-
-void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- /* Fixed size intermediate buffer places limits on parameters. */
- DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 64 * 64);
-
- vp9_convolve8_c(src, src_stride, temp, 64,
- filter_x, x_step_q4, filter_y, y_step_q4, w, h);
- vp9_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);
-}
-
-void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int filter_x_stride,
- const int16_t *filter_y, int filter_y_stride,
- int w, int h) {
- int r;
-
- for (r = h; r > 0; --r) {
- memcpy(dst, src, w);
- src += src_stride;
- dst += dst_stride;
- }
-}
diff --git a/cpu_ref/convolve/convolve8_avg_neon.s b/cpu_ref/convolve/convolve8_avg_neon.s
deleted file mode 100644
index 7821446..0000000
--- a/cpu_ref/convolve/convolve8_avg_neon.s
+++ /dev/null
@@ -1,323 +0,0 @@
-@ This file was created from a .asm file
-@ using the ads2gas.pl script.
- .equ DO1STROUNDING, 0
-@
-@ Copyright (c) 2013 The WebM project authors. All Rights Reserved.
-@
-@ Use of this source code is governed by a BSD-style license
-@ that can be found in the LICENSE file in the root of the source
-@ tree. An additional intellectual property rights grant can be found
-@ in the file PATENTS. All contributing project authors may
-@ be found in the AUTHORS file in the root of the source tree.
-@
-@ Copyright (c) 2014 The Android Open Source Project
-@
-@ Licensed under the Apache License, Version 2.0 (the "License");
-@ you may not use this file except in compliance with the License.
-@ You may obtain a copy of the License at
-@
-@ http://www.apache.org/licenses/LICENSE-2.0
-@
-@ Unless required by applicable law or agreed to in writing, software
-@ distributed under the License is distributed on an "AS IS" BASIS,
-@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-@ See the License for the specific language governing permissions and
-@ limitations under the License.
-
-
-
- @ These functions are only valid when:
- @ x_step_q4 == 16
- @ w%4 == 0
- @ h%4 == 0
- @ taps == 8
- @ VP9_FILTER_WEIGHT == 128
- @ VP9_FILTER_SHIFT == 7
-
- .global vp9_convolve8_avg_horiz_neon
- .type vp9_convolve8_avg_horiz_neon, function
- .global vp9_convolve8_avg_vert_neon
- .type vp9_convolve8_avg_vert_neon, function
- .global vp9_convolve8_avg_horiz_c
- .global vp9_convolve8_avg_vert_c
- .arm
- .eabi_attribute 24, 1 @Tag_ABI_align_needed
- .eabi_attribute 25, 1 @Tag_ABI_align_preserved
-
-.text
-.p2align 2
-
- @ Multiply and accumulate by q0
-.macro MULTIPLY_BY_Q0 dst, src0, src1, src2, src3, src4, src5, src6, src7
- vmull.s16 \dst, \src0, d0[0]
- vmlal.s16 \dst, \src1, d0[1]
- vmlal.s16 \dst, \src2, d0[2]
- vmlal.s16 \dst, \src3, d0[3]
- vmlal.s16 \dst, \src4, d1[0]
- vmlal.s16 \dst, \src5, d1[1]
- vmlal.s16 \dst, \src6, d1[2]
- vmlal.s16 \dst, \src7, d1[3]
- .endm
-
-@ r0 const uint8_t *src
-@ r1 int src_stride
-@ r2 uint8_t *dst
-@ r3 int dst_stride
-@ sp[]const int16_t *filter_x
-@ sp[]int x_step_q4
-@ sp[]const int16_t *filter_y ; unused
-@ sp[]int y_step_q4 ; unused
-@ sp[]int w
-@ sp[]int h
-
-_vp9_convolve8_avg_horiz_neon:
- vp9_convolve8_avg_horiz_neon: @ PROC
- ldr r12, [sp, #4] @ x_step_q4
- cmp r12, #16
- bne vp9_convolve8_avg_horiz_c
-
- push {r4-r10, lr}
-
- sub r0, r0, #3 @ adjust for taps
-
- ldr r5, [sp, #32] @ filter_x
- ldr r6, [sp, #48] @ w
- ldr r7, [sp, #52] @ h
-
- vld1.s16 {q0}, [r5] @ filter_x
-
- sub r8, r1, r1, lsl #2 @ -src_stride * 3
- add r8, r8, #4 @ -src_stride * 3 + 4
-
- sub r4, r3, r3, lsl #2 @ -dst_stride * 3
- add r4, r4, #4 @ -dst_stride * 3 + 4
-
- rsb r9, r6, r1, lsl #2 @ reset src for outer loop
- sub r9, r9, #7
- rsb r12, r6, r3, lsl #2 @ reset dst for outer loop
-
- mov r10, r6 @ w loop counter
-
-loop_horiz_v:
- vld1.8 {d24}, [r0], r1
- vld1.8 {d25}, [r0], r1
- vld1.8 {d26}, [r0], r1
- vld1.8 {d27}, [r0], r8
-
- vtrn.16 q12, q13
- vtrn.8 d24, d25
- vtrn.8 d26, d27
-
- pld [r0, r1, lsl #2]
-
- vmovl.u8 q8, d24
- vmovl.u8 q9, d25
- vmovl.u8 q10, d26
- vmovl.u8 q11, d27
-
- @ save a few instructions in the inner loop
- vswp d17, d18
- vmov d23, d21
-
- add r0, r0, #3
-
-loop_horiz:
- add r5, r0, #64
-
- vld1.32 {d28[]}, [r0], r1
- vld1.32 {d29[]}, [r0], r1
- vld1.32 {d31[]}, [r0], r1
- vld1.32 {d30[]}, [r0], r8
-
- pld [r5]
-
- vtrn.16 d28, d31
- vtrn.16 d29, d30
- vtrn.8 d28, d29
- vtrn.8 d31, d30
-
- pld [r5, r1]
-
- @ extract to s16
- vtrn.32 q14, q15
- vmovl.u8 q12, d28
- vmovl.u8 q13, d29
-
- pld [r5, r1, lsl #1]
-
- @ slightly out of order load to match the existing data
- vld1.u32 {d6[0]}, [r2], r3
- vld1.u32 {d7[0]}, [r2], r3
- vld1.u32 {d6[1]}, [r2], r3
- vld1.u32 {d7[1]}, [r2], r3
-
- sub r2, r2, r3, lsl #2 @ reset for store
-
- @ src[] * filter_x
- MULTIPLY_BY_Q0 q1, d16, d17, d20, d22, d18, d19, d23, d24
- MULTIPLY_BY_Q0 q2, d17, d20, d22, d18, d19, d23, d24, d26
- MULTIPLY_BY_Q0 q14, d20, d22, d18, d19, d23, d24, d26, d27
- MULTIPLY_BY_Q0 q15, d22, d18, d19, d23, d24, d26, d27, d25
-
- pld [r5, -r8]
-
- @ += 64 >> 7
- vqrshrun.s32 d2, q1, #7
- vqrshrun.s32 d3, q2, #7
- vqrshrun.s32 d4, q14, #7
- vqrshrun.s32 d5, q15, #7
-
- @ saturate
- vqmovn.u16 d2, q1
- vqmovn.u16 d3, q2
-
- @ transpose
- vtrn.16 d2, d3
- vtrn.32 d2, d3
- vtrn.8 d2, d3
-
- @ average the new value and the dst value
- vrhadd.u8 q1, q1, q3
-
- vst1.u32 {d2[0]}, [r2,:32], r3
- vst1.u32 {d3[0]}, [r2,:32], r3
- vst1.u32 {d2[1]}, [r2,:32], r3
- vst1.u32 {d3[1]}, [r2,:32], r4
-
- vmov q8, q9
- vmov d20, d23
- vmov q11, q12
- vmov q9, q13
-
- subs r6, r6, #4 @ w -= 4
- bgt loop_horiz
-
- @ outer loop
- mov r6, r10 @ restore w counter
- add r0, r0, r9 @ src += src_stride * 4 - w
- add r2, r2, r12 @ dst += dst_stride * 4 - w
- subs r7, r7, #4 @ h -= 4
- bgt loop_horiz_v
-
- pop {r4-r10, pc}
-
- .size vp9_convolve8_avg_horiz_neon, .-vp9_convolve8_avg_horiz_neon @ ENDP
-
-_vp9_convolve8_avg_vert_neon:
- vp9_convolve8_avg_vert_neon: @ PROC
- ldr r12, [sp, #12]
- cmp r12, #16
- bne vp9_convolve8_avg_vert_c
-
- push {r4-r8, lr}
-
- @ adjust for taps
- sub r0, r0, r1
- sub r0, r0, r1, lsl #1
-
- ldr r4, [sp, #32] @ filter_y
- ldr r6, [sp, #40] @ w
- ldr lr, [sp, #44] @ h
-
- vld1.s16 {q0}, [r4] @ filter_y
-
- lsl r1, r1, #1
- lsl r3, r3, #1
-
-loop_vert_h:
- mov r4, r0
- add r7, r0, r1, asr #1
- mov r5, r2
- add r8, r2, r3, asr #1
- mov r12, lr @ h loop counter
-
- vld1.u32 {d16[0]}, [r4], r1
- vld1.u32 {d16[1]}, [r7], r1
- vld1.u32 {d18[0]}, [r4], r1
- vld1.u32 {d18[1]}, [r7], r1
- vld1.u32 {d20[0]}, [r4], r1
- vld1.u32 {d20[1]}, [r7], r1
- vld1.u32 {d22[0]}, [r4], r1
-
- vmovl.u8 q8, d16
- vmovl.u8 q9, d18
- vmovl.u8 q10, d20
- vmovl.u8 q11, d22
-
-loop_vert:
- @ always process a 4x4 block at a time
- vld1.u32 {d24[0]}, [r7], r1
- vld1.u32 {d26[0]}, [r4], r1
- vld1.u32 {d26[1]}, [r7], r1
- vld1.u32 {d24[1]}, [r4], r1
-
- @ extract to s16
- vmovl.u8 q12, d24
- vmovl.u8 q13, d26
-
- vld1.u32 {d6[0]}, [r5,:32], r3
- vld1.u32 {d6[1]}, [r8,:32], r3
- vld1.u32 {d7[0]}, [r5,:32], r3
- vld1.u32 {d7[1]}, [r8,:32], r3
-
- pld [r7]
- pld [r4]
-
- @ src[] * filter_y
- MULTIPLY_BY_Q0 q1, d16, d17, d18, d19, d20, d21, d22, d24
-
- pld [r7, r1]
- pld [r4, r1]
-
- MULTIPLY_BY_Q0 q2, d17, d18, d19, d20, d21, d22, d24, d26
-
- pld [r5]
- pld [r8]
-
- MULTIPLY_BY_Q0 q14, d18, d19, d20, d21, d22, d24, d26, d27
-
- pld [r5, r3]
- pld [r8, r3]
-
- MULTIPLY_BY_Q0 q15, d19, d20, d21, d22, d24, d26, d27, d25
-
- @ += 64 >> 7
- vqrshrun.s32 d2, q1, #7
- vqrshrun.s32 d3, q2, #7
- vqrshrun.s32 d4, q14, #7
- vqrshrun.s32 d5, q15, #7
-
- @ saturate
- vqmovn.u16 d2, q1
- vqmovn.u16 d3, q2
-
- @ average the new value and the dst value
- vrhadd.u8 q1, q1, q3
-
- sub r5, r5, r3, lsl #1 @ reset for store
- sub r8, r8, r3, lsl #1
-
- vst1.u32 {d2[0]}, [r5,:32], r3
- vst1.u32 {d2[1]}, [r8,:32], r3
- vst1.u32 {d3[0]}, [r5,:32], r3
- vst1.u32 {d3[1]}, [r8,:32], r3
-
- vmov q8, q10
- vmov d18, d22
- vmov d19, d24
- vmov q10, q13
- vmov d22, d25
-
- subs r12, r12, #4 @ h -= 4
- bgt loop_vert
-
- @ outer loop
- add r0, r0, #4
- add r2, r2, #4
- subs r6, r6, #4 @ w -= 4
- bgt loop_vert_h
-
- pop {r4-r8, pc}
-
- .size vp9_convolve8_avg_vert_neon, .-vp9_convolve8_avg_vert_neon @ ENDP
- .section .note.GNU-stack,"",%progbits
diff --git a/cpu_ref/convolve/convolve8_neon.s b/cpu_ref/convolve/convolve8_neon.s
deleted file mode 100644
index 0bc15d9..0000000
--- a/cpu_ref/convolve/convolve8_neon.s
+++ /dev/null
@@ -1,300 +0,0 @@
-@ This file was created from a .asm file
-@ using the ads2gas.pl script.
- .equ DO1STROUNDING, 0
-@
-@ Copyright (c) 2013 The WebM project authors. All Rights Reserved.
-@
-@ Use of this source code is governed by a BSD-style license
-@ that can be found in the LICENSE file in the root of the source
-@ tree. An additional intellectual property rights grant can be found
-@ in the file PATENTS. All contributing project authors may
-@ be found in the AUTHORS file in the root of the source tree.
-@
-@ Copyright (c) 2014 The Android Open Source Project
-@
-@ Licensed under the Apache License, Version 2.0 (the "License");
-@ you may not use this file except in compliance with the License.
-@ You may obtain a copy of the License at
-@
-@ http://www.apache.org/licenses/LICENSE-2.0
-@
-@ Unless required by applicable law or agreed to in writing, software
-@ distributed under the License is distributed on an "AS IS" BASIS,
-@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-@ See the License for the specific language governing permissions and
-@ limitations under the License.
-
-
- @ These functions are only valid when:
- @ x_step_q4 == 16
- @ w%4 == 0
- @ h%4 == 0
- @ taps == 8
- @ VP9_FILTER_WEIGHT == 128
- @ VP9_FILTER_SHIFT == 7
-
- .global vp9_convolve8_horiz_neon
- .type vp9_convolve8_horiz_neon, function
- .global vp9_convolve8_vert_neon
- .type vp9_convolve8_vert_neon, function
- .global vp9_convolve8_horiz_c
- .global vp9_convolve8_vert_c
- .arm
- .eabi_attribute 24, 1 @Tag_ABI_align_needed
- .eabi_attribute 25, 1 @Tag_ABI_align_preserved
-
-.text
-.p2align 2
-
- @ Multiply and accumulate by q0
-.macro MULTIPLY_BY_Q0 dst, src0, src1, src2, src3, src4, src5, src6, src7
- vmull.s16 \dst, \src0, d0[0]
- vmlal.s16 \dst, \src1, d0[1]
- vmlal.s16 \dst, \src2, d0[2]
- vmlal.s16 \dst, \src3, d0[3]
- vmlal.s16 \dst, \src4, d1[0]
- vmlal.s16 \dst, \src5, d1[1]
- vmlal.s16 \dst, \src6, d1[2]
- vmlal.s16 \dst, \src7, d1[3]
- .endm
-
-@ r0 const uint8_t *src
-@ r1 int src_stride
-@ r2 uint8_t *dst
-@ r3 int dst_stride
-@ sp[]const int16_t *filter_x
-@ sp[]int x_step_q4
-@ sp[]const int16_t *filter_y ; unused
-@ sp[]int y_step_q4 ; unused
-@ sp[]int w
-@ sp[]int h
-
-_vp9_convolve8_horiz_neon:
- vp9_convolve8_horiz_neon: @ PROC
- ldr r12, [sp, #4] @ x_step_q4
- cmp r12, #16
- bne vp9_convolve8_horiz_c
-
- push {r4-r10, lr}
-
- sub r0, r0, #3 @ adjust for taps
-
- ldr r5, [sp, #32] @ filter_x
- ldr r6, [sp, #48] @ w
- ldr r7, [sp, #52] @ h
-
- vld1.s16 {q0}, [r5] @ filter_x
-
- sub r8, r1, r1, lsl #2 @ -src_stride * 3
- add r8, r8, #4 @ -src_stride * 3 + 4
-
- sub r4, r3, r3, lsl #2 @ -dst_stride * 3
- add r4, r4, #4 @ -dst_stride * 3 + 4
-
- rsb r9, r6, r1, lsl #2 @ reset src for outer loop
- sub r9, r9, #7
- rsb r12, r6, r3, lsl #2 @ reset dst for outer loop
-
- mov r10, r6 @ w loop counter
-
-loop_horiz_v:
- vld1.8 {d24}, [r0], r1
- vld1.8 {d25}, [r0], r1
- vld1.8 {d26}, [r0], r1
- vld1.8 {d27}, [r0], r8
-
- vtrn.16 q12, q13
- vtrn.8 d24, d25
- vtrn.8 d26, d27
-
- pld [r0, r1, lsl #2]
-
- vmovl.u8 q8, d24
- vmovl.u8 q9, d25
- vmovl.u8 q10, d26
- vmovl.u8 q11, d27
-
- @ save a few instructions in the inner loop
- vswp d17, d18
- vmov d23, d21
-
- add r0, r0, #3
-
-loop_horiz:
- add r5, r0, #64
-
- vld1.32 {d28[]}, [r0], r1
- vld1.32 {d29[]}, [r0], r1
- vld1.32 {d31[]}, [r0], r1
- vld1.32 {d30[]}, [r0], r8
-
- pld [r5]
-
- vtrn.16 d28, d31
- vtrn.16 d29, d30
- vtrn.8 d28, d29
- vtrn.8 d31, d30
-
- pld [r5, r1]
-
- @ extract to s16
- vtrn.32 q14, q15
- vmovl.u8 q12, d28
- vmovl.u8 q13, d29
-
- pld [r5, r1, lsl #1]
-
- @ src[] * filter_x
- MULTIPLY_BY_Q0 q1, d16, d17, d20, d22, d18, d19, d23, d24
- MULTIPLY_BY_Q0 q2, d17, d20, d22, d18, d19, d23, d24, d26
- MULTIPLY_BY_Q0 q14, d20, d22, d18, d19, d23, d24, d26, d27
- MULTIPLY_BY_Q0 q15, d22, d18, d19, d23, d24, d26, d27, d25
-
- pld [r5, -r8]
-
- @ += 64 >> 7
- vqrshrun.s32 d2, q1, #7
- vqrshrun.s32 d3, q2, #7
- vqrshrun.s32 d4, q14, #7
- vqrshrun.s32 d5, q15, #7
-
- @ saturate
- vqmovn.u16 d2, q1
- vqmovn.u16 d3, q2
-
- @ transpose
- vtrn.16 d2, d3
- vtrn.32 d2, d3
- vtrn.8 d2, d3
-
- vst1.u32 {d2[0]}, [r2,:32], r3
- vst1.u32 {d3[0]}, [r2,:32], r3
- vst1.u32 {d2[1]}, [r2,:32], r3
- vst1.u32 {d3[1]}, [r2,:32], r4
-
- vmov q8, q9
- vmov d20, d23
- vmov q11, q12
- vmov q9, q13
-
- subs r6, r6, #4 @ w -= 4
- bgt loop_horiz
-
- @ outer loop
- mov r6, r10 @ restore w counter
- add r0, r0, r9 @ src += src_stride * 4 - w
- add r2, r2, r12 @ dst += dst_stride * 4 - w
- subs r7, r7, #4 @ h -= 4
- bgt loop_horiz_v
-
- pop {r4-r10, pc}
-
- .size vp9_convolve8_horiz_neon, .-vp9_convolve8_horiz_neon @ ENDP
-
-_vp9_convolve8_vert_neon:
- vp9_convolve8_vert_neon: @ PROC
- ldr r12, [sp, #12]
- cmp r12, #16
- bne vp9_convolve8_vert_c
-
- push {r4-r8, lr}
-
- @ adjust for taps
- sub r0, r0, r1
- sub r0, r0, r1, lsl #1
-
- ldr r4, [sp, #32] @ filter_y
- ldr r6, [sp, #40] @ w
- ldr lr, [sp, #44] @ h
-
- vld1.s16 {q0}, [r4] @ filter_y
-
- lsl r1, r1, #1
- lsl r3, r3, #1
-
-loop_vert_h:
- mov r4, r0
- add r7, r0, r1, asr #1
- mov r5, r2
- add r8, r2, r3, asr #1
- mov r12, lr @ h loop counter
-
- vld1.u32 {d16[0]}, [r4], r1
- vld1.u32 {d16[1]}, [r7], r1
- vld1.u32 {d18[0]}, [r4], r1
- vld1.u32 {d18[1]}, [r7], r1
- vld1.u32 {d20[0]}, [r4], r1
- vld1.u32 {d20[1]}, [r7], r1
- vld1.u32 {d22[0]}, [r4], r1
-
- vmovl.u8 q8, d16
- vmovl.u8 q9, d18
- vmovl.u8 q10, d20
- vmovl.u8 q11, d22
-
-loop_vert:
- @ always process a 4x4 block at a time
- vld1.u32 {d24[0]}, [r7], r1
- vld1.u32 {d26[0]}, [r4], r1
- vld1.u32 {d26[1]}, [r7], r1
- vld1.u32 {d24[1]}, [r4], r1
-
- @ extract to s16
- vmovl.u8 q12, d24
- vmovl.u8 q13, d26
-
- pld [r5]
- pld [r8]
-
- @ src[] * filter_y
- MULTIPLY_BY_Q0 q1, d16, d17, d18, d19, d20, d21, d22, d24
-
- pld [r5, r3]
- pld [r8, r3]
-
- MULTIPLY_BY_Q0 q2, d17, d18, d19, d20, d21, d22, d24, d26
-
- pld [r7]
- pld [r4]
-
- MULTIPLY_BY_Q0 q14, d18, d19, d20, d21, d22, d24, d26, d27
-
- pld [r7, r1]
- pld [r4, r1]
-
- MULTIPLY_BY_Q0 q15, d19, d20, d21, d22, d24, d26, d27, d25
-
- @ += 64 >> 7
- vqrshrun.s32 d2, q1, #7
- vqrshrun.s32 d3, q2, #7
- vqrshrun.s32 d4, q14, #7
- vqrshrun.s32 d5, q15, #7
-
- @ saturate
- vqmovn.u16 d2, q1
- vqmovn.u16 d3, q2
-
- vst1.u32 {d2[0]}, [r5,:32], r3
- vst1.u32 {d2[1]}, [r8,:32], r3
- vst1.u32 {d3[0]}, [r5,:32], r3
- vst1.u32 {d3[1]}, [r8,:32], r3
-
- vmov q8, q10
- vmov d18, d22
- vmov d19, d24
- vmov q10, q13
- vmov d22, d25
-
- subs r12, r12, #4 @ h -= 4
- bgt loop_vert
-
- @ outer loop
- add r0, r0, #4
- add r2, r2, #4
- subs r6, r6, #4 @ w -= 4
- bgt loop_vert_h
-
- pop {r4-r8, pc}
-
- .size vp9_convolve8_vert_neon, .-vp9_convolve8_vert_neon @ ENDP
- .section .note.GNU-stack,"",%progbits
diff --git a/cpu_ref/convolve/convolve_avg_neon.s b/cpu_ref/convolve/convolve_avg_neon.s
deleted file mode 100644
index 41e79f1..0000000
--- a/cpu_ref/convolve/convolve_avg_neon.s
+++ /dev/null
@@ -1,135 +0,0 @@
-@ This file was created from a .asm file
-@ using the ads2gas.pl script.
- .equ DO1STROUNDING, 0
-@
-@ Copyright (c) 2013 The WebM project authors. All Rights Reserved.
-@
-@ Use of this source code is governed by a BSD-style license
-@ that can be found in the LICENSE file in the root of the source
-@ tree. An additional intellectual property rights grant can be found
-@ in the file PATENTS. All contributing project authors may
-@ be found in the AUTHORS file in the root of the source tree.
-@
-@ Copyright (c) 2014 The Android Open Source Project
-@
-@ Licensed under the Apache License, Version 2.0 (the "License");
-@ you may not use this file except in compliance with the License.
-@ You may obtain a copy of the License at
-@
-@ http://www.apache.org/licenses/LICENSE-2.0
-@
-@ Unless required by applicable law or agreed to in writing, software
-@ distributed under the License is distributed on an "AS IS" BASIS,
-@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-@ See the License for the specific language governing permissions and
-@ limitations under the License.
-
- .global vp9_convolve_avg_neon
- .type vp9_convolve_avg_neon, function
- .arm
- .eabi_attribute 24, 1 @Tag_ABI_align_needed
- .eabi_attribute 25, 1 @Tag_ABI_align_preserved
-
-.text
-.p2align 2
-
-_vp9_convolve_avg_neon:
- vp9_convolve_avg_neon: @ PROC
- push {r4-r6, lr}
- ldrd r4, r5, [sp, #32]
- mov r6, r2
-
- cmp r4, #32
- bgt avg64
- beq avg32
- cmp r4, #8
- bgt avg16
- beq avg8
- b avg4
-
-avg64:
- sub lr, r1, #32
- sub r4, r3, #32
-avg64_h:
- pld [r0, r1, lsl #1]
- vld1.8 {q0-q1}, [r0]!
- vld1.8 {q2-q3}, [r0], lr
- pld [r2, r3]
- vld1.8 {q8-q9}, [r6,:128]!
- vld1.8 {q10-q11}, [r6,:128], r4
- vrhadd.u8 q0, q0, q8
- vrhadd.u8 q1, q1, q9
- vrhadd.u8 q2, q2, q10
- vrhadd.u8 q3, q3, q11
- vst1.8 {q0-q1}, [r2,:128]!
- vst1.8 {q2-q3}, [r2,:128], r4
- subs r5, r5, #1
- bgt avg64_h
- pop {r4-r6, pc}
-
-avg32:
- vld1.8 {q0-q1}, [r0], r1
- vld1.8 {q2-q3}, [r0], r1
- vld1.8 {q8-q9}, [r6,:128], r3
- vld1.8 {q10-q11}, [r6,:128], r3
- pld [r0]
- vrhadd.u8 q0, q0, q8
- pld [r0, r1]
- vrhadd.u8 q1, q1, q9
- pld [r6]
- vrhadd.u8 q2, q2, q10
- pld [r6, r3]
- vrhadd.u8 q3, q3, q11
- vst1.8 {q0-q1}, [r2,:128], r3
- vst1.8 {q2-q3}, [r2,:128], r3
- subs r5, r5, #2
- bgt avg32
- pop {r4-r6, pc}
-
-avg16:
- vld1.8 {q0}, [r0], r1
- vld1.8 {q1}, [r0], r1
- vld1.8 {q2}, [r6,:128], r3
- vld1.8 {q3}, [r6,:128], r3
- pld [r0]
- pld [r0, r1]
- vrhadd.u8 q0, q0, q2
- pld [r6]
- pld [r6, r3]
- vrhadd.u8 q1, q1, q3
- vst1.8 {q0}, [r2,:128], r3
- vst1.8 {q1}, [r2,:128], r3
- subs r5, r5, #2
- bgt avg16
- pop {r4-r6, pc}
-
-avg8:
- vld1.8 {d0}, [r0], r1
- vld1.8 {d1}, [r0], r1
- vld1.8 {d2}, [r6,:64], r3
- vld1.8 {d3}, [r6,:64], r3
- pld [r0]
- pld [r0, r1]
- vrhadd.u8 q0, q0, q1
- pld [r6]
- pld [r6, r3]
- vst1.8 {d0}, [r2,:64], r3
- vst1.8 {d1}, [r2,:64], r3
- subs r5, r5, #2
- bgt avg8
- pop {r4-r6, pc}
-
-avg4:
- vld1.32 {d0[0]}, [r0], r1
- vld1.32 {d0[1]}, [r0], r1
- vld1.32 {d2[0]}, [r6,:32], r3
- vld1.32 {d2[1]}, [r6,:32], r3
- vrhadd.u8 d0, d0, d2
- vst1.32 {d0[0]}, [r2,:32], r3
- vst1.32 {d0[1]}, [r2,:32], r3
- subs r5, r5, #2
- bgt avg4
- pop {r4-r6, pc}
- .size vp9_convolve_avg_neon, .-vp9_convolve_avg_neon @ ENDP
-
- .section .note.GNU-stack,"",%progbits
diff --git a/cpu_ref/convolve/convolve_copy_neon.s b/cpu_ref/convolve/convolve_copy_neon.s
deleted file mode 100644
index 60ada14..0000000
--- a/cpu_ref/convolve/convolve_copy_neon.s
+++ /dev/null
Binary files differ
diff --git a/cpu_ref/convolve/convolve_neon.c b/cpu_ref/convolve/convolve_neon.c
deleted file mode 100644
index 3d4bf30..0000000
--- a/cpu_ref/convolve/convolve_neon.c
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- *
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "vp9_common.h"
-#include "vp9_filter.h"
-#include <string.h>
-#include <stdio.h>
-
-extern void vp9_convolve8_horiz_neon(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h);
-
-extern void vp9_convolve8_vert_neon(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h);
-
-extern void vp9_convolve8_avg_vert_neon(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h);
-extern void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h);
-
-extern void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h);
-
-void vp9_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- /* Given our constraints: w <= 64, h <= 64, taps == 8 we can reduce the
- * maximum buffer size to 64 * 64 + 7 (+ 1 to make it divisible by 4).
- */
- DECLARE_ALIGNED_ARRAY(8, uint8_t, temp, 64 * 72);
-
- // Account for the vertical phase needing 3 lines prior and 4 lines post
- int intermediate_height = h + 7;
-
- if (x_step_q4 != 16 || y_step_q4 != 16)
- return vp9_convolve8_c(src, src_stride,
- dst, dst_stride,
- filter_x, x_step_q4,
- filter_y, y_step_q4,
- w, h);
-
- /* Filter starting 3 lines back. The neon implementation will ignore the
- * given height and filter a multiple of 4 lines. Since this goes in to
- * the temp buffer which has lots of extra room and is subsequently discarded
- * this is safe if somewhat less than ideal.
- */
- vp9_convolve8_horiz_neon(src - src_stride * 3, src_stride,
- temp, 64, filter_x, x_step_q4,
- filter_y, y_step_q4,
- w, intermediate_height);
-
- /* Step into the temp buffer 3 lines to get the actual frame data */
- vp9_convolve8_vert_neon(temp + 64 * 3, 64, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
-}
-
-void vp9_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h) {
- DECLARE_ALIGNED_ARRAY(8, uint8_t, temp, 64 * 72);
- int intermediate_height = h + 7;
-
- if (x_step_q4 != 16 || y_step_q4 != 16)
- return vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h);
-
- /* This implementation has the same issues as above. In addition, we only want
- * to average the values after both passes.
- */
- vp9_convolve8_horiz_neon(src - src_stride * 3, src_stride, temp, 64,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, intermediate_height);
- vp9_convolve8_avg_vert_neon(temp + 64 * 3, 64, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4,
- w, h);
-}
diff --git a/cpu_ref/convolve/vp9_common.h b/cpu_ref/convolve/vp9_common.h
deleted file mode 100644
index 73a1021..0000000
--- a/cpu_ref/convolve/vp9_common.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2011 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- *
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef VP9_COMMON_VP9_COMMON_H_
-#define VP9_COMMON_VP9_COMMON_H_
-#include <stdint.h>
-
-#define DECLARE_ALIGNED_ARRAY(a,typ,val,n)\
- typ val##_[(n)+(a)/sizeof(typ)+1];\
- typ *val = (typ*)((((intptr_t)val##_)+(a)-1)&((intptr_t)-(a)))
-/* Interface header for common constant data structures and lookup tables */
-
-#define MIN(x, y) (((x) < (y)) ? (x) : (y))
-#define MAX(x, y) (((x) > (y)) ? (x) : (y))
-
-#define ROUND_POWER_OF_TWO(value, n) \
- (((value) + (1 << ((n) - 1))) >> (n))
-
-#define ALIGN_POWER_OF_TWO(value, n) \
- (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1))
-
-// Only need this for fixed-size arrays, for structs just assign.
-#define vp9_copy(dest, src) { \
- memcpy(dest, src, sizeof(src)); \
- }
-
-// Use this for variably-sized arrays.
-#define vp9_copy_array(dest, src, n) { \
- memcpy(dest, src, n * sizeof(*src)); \
- }
-
-#define vp9_zero(dest) memset(&dest, 0, sizeof(dest))
-#define vp9_zero_array(dest, n) memset(dest, 0, n * sizeof(*dest))
-
-static inline uint8_t clip_pixel(int val) {
- return (val > 255) ? 255u : (val < 0) ? 0u : val;
-}
-
-static inline int clamp(int value, int low, int high) {
- return value < low ? low : (value > high ? high : value);
-}
-
-static inline double fclamp(double value, double low, double high) {
- return value < low ? low : (value > high ? high : value);
-}
-
-#define VP9_SYNC_CODE_0 0x49
-#define VP9_SYNC_CODE_1 0x83
-#define VP9_SYNC_CODE_2 0x42
-
-#define VP9_FRAME_MARKER 0x2
-
-
-#endif // VP9_COMMON_VP9_COMMON_H_
diff --git a/cpu_ref/convolve/vp9_filter.h b/cpu_ref/convolve/vp9_filter.h
deleted file mode 100644
index 754578d..0000000
--- a/cpu_ref/convolve/vp9_filter.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2011 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- *
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef VP9_COMMON_VP9_FILTER_H_
-#define VP9_COMMON_VP9_FILTER_H_
-
-#define FILTER_BITS 7
-
-#define SUBPEL_BITS 4
-#define SUBPEL_MASK ((1 << SUBPEL_BITS) - 1)
-#define SUBPEL_SHIFTS (1 << SUBPEL_BITS)
-#define SUBPEL_TAPS 8
-
-typedef enum {
- EIGHTTAP = 0,
- EIGHTTAP_SMOOTH = 1,
- EIGHTTAP_SHARP = 2,
- BILINEAR = 3,
- SWITCHABLE = 4 /* should be the last one */
-} INTERPOLATION_TYPE;
-
-typedef int16_t subpel_kernel[SUBPEL_TAPS];
-
-struct subpix_fn_table {
- const subpel_kernel *filter_x;
- const subpel_kernel *filter_y;
-};
-
-const subpel_kernel *vp9_get_filter_kernel(INTERPOLATION_TYPE type);
-extern const subpel_kernel vp9_bilinear_filters[SUBPEL_SHIFTS];
-extern const subpel_kernel vp9_sub_pel_filters_8[SUBPEL_SHIFTS];
-extern const subpel_kernel vp9_sub_pel_filters_8s[SUBPEL_SHIFTS];
-extern const subpel_kernel vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS];
-
-// The VP9_BILINEAR_FILTERS_2TAP macro returns a pointer to the bilinear
-// filter kernel as a 2 tap filter.
-#define BILINEAR_FILTERS_2TAP(x) \
- (vp9_bilinear_filters[(x)] + SUBPEL_TAPS/2 - 1)
-
-#endif // VP9_COMMON_VP9_FILTER_H_
diff --git a/cpu_ref/rsCpuConvolve.h b/cpu_ref/rsCpuConvolve.h
deleted file mode 100644
index d7d2d16..0000000
--- a/cpu_ref/rsCpuConvolve.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef RSD_CPU_CONVOLVE_NEON_H
-#define RSD_CPU_CONVOLVE_NEON_H
-
-#include <stdint.h>
-
-extern "C" {
-#if defined(ARCH_ARM_HAVE_VFP)
-void vp9_convolve_copy_neon(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h);
-
-void vp9_convolve_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h);
-
-void vp9_convolve8_vert_neon(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h);
-
-void vp9_convolve8_avg_vert_neon(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h);
-
-void vp9_convolve8_horiz_neon(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h);
-
-void vp9_convolve8_avg_horiz_neon(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h);
-void vp9_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h);
-
-void vp9_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h);
-
-#else
-void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h);
-
-void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h);
-
-void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h);
-
-void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h);
-
-void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h);
-
-void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h);
-
-void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h);
-
-void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h);
-#endif
-}
-#endif
diff --git a/cpu_ref/rsCpuCore.cpp b/cpu_ref/rsCpuCore.cpp
index 277836a..f2ce358 100644
--- a/cpu_ref/rsCpuCore.cpp
+++ b/cpu_ref/rsCpuCore.cpp
@@ -513,8 +513,6 @@
extern RsdCpuScriptImpl * rsdIntrinsic_3DLUT(RsdCpuReferenceImpl *ctx,
const Script *s, const Element *e);
-extern RsdCpuScriptImpl * rsdIntrinsic_InterPred(RsdCpuReferenceImpl *ctx,
- const Script *s, const Element *e);
extern RsdCpuScriptImpl * rsdIntrinsic_Convolve3x3(RsdCpuReferenceImpl *ctx,
const Script *s, const Element *e);
extern RsdCpuScriptImpl * rsdIntrinsic_ColorMatrix(RsdCpuReferenceImpl *ctx,
@@ -544,11 +542,6 @@
case RS_SCRIPT_INTRINSIC_ID_3DLUT:
i = rsdIntrinsic_3DLUT(this, s, e);
break;
-#ifndef RS_COMPATIBILITY_LIB
- case RS_SCRIPT_INTRINSIC_ID_INTER_PRED:
- i = rsdIntrinsic_InterPred(this, s, e);
- break;
-#endif
case RS_SCRIPT_INTRINSIC_ID_CONVOLVE_3x3:
i = rsdIntrinsic_Convolve3x3(this, s, e);
break;
diff --git a/cpu_ref/rsCpuIntrinsicInterPred.cpp b/cpu_ref/rsCpuIntrinsicInterPred.cpp
deleted file mode 100644
index 20e0f2e..0000000
--- a/cpu_ref/rsCpuIntrinsicInterPred.cpp
+++ /dev/null
@@ -1,173 +0,0 @@
-#include "rsCpuIntrinsicInterPred.h"
-
-void RsdCpuScriptIntrinsicInterPred::setGlobalObj(uint32_t slot,
- ObjectBase *data) {
- Allocation *alloc = static_cast<Allocation *>(data);
- if (slot == 0) mRef = (uint8_t *)alloc->mHal.state.userProvidedPtr;
- if (slot == 1) mParam = (uint8_t *)alloc->mHal.state.userProvidedPtr;
-}
-
-void RsdCpuScriptIntrinsicInterPred::setGlobalVar(uint32_t slot,
- const void *data,
- size_t dataLength) {
- mFriParamCount = ((int32_t *)data)[0];
- mSecParamCount = ((int32_t *)data)[1];
- mParamOffset = ((int32_t *)data)[2];
-}
-
-void RsdCpuScriptIntrinsicInterPred::kernel(const RsForEachStubParamStruct *p,
- uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
- RsdCpuScriptIntrinsicInterPred *cp = (RsdCpuScriptIntrinsicInterPred *)p->usr;
- cp->mCount++;
- const int vp9_convolve_mode[2][2] = {{24, 16}, {8, 0}};
- uint8_t *ref_base = cp->mRef;
- INTER_PRED_PARAM *fri_param = (INTER_PRED_PARAM *)cp->mParam;
- INTER_PRED_PARAM *sec_param = (INTER_PRED_PARAM *)(cp->mParam + cp->mParamOffset);
- int32_t fri_count = cp->mFriParamCount;
- int32_t sec_count = cp->mSecParamCount;
- int mode_num;
- uint8_t *src;
- uint8_t *dst;
- const int16_t *filter_x;
- const int16_t *filter_y;
- for (int i = 0; i < fri_count; i++) {
-
- mode_num = vp9_convolve_mode[(fri_param[i].x_step_q4 == 16)]
- [(fri_param[i].y_step_q4 == 16)];
- src = ref_base + fri_param[i].src_mv;
- dst = ref_base + fri_param[i].dst_mv;
-
- filter_x = inter_pred_filters + fri_param[i].filter_x_mv;
- filter_y = inter_pred_filters + fri_param[i].filter_y_mv;
-
- cp->mSwitchConvolve[fri_param[i].pred_mode + mode_num](
- src, fri_param[i].src_stride,
- dst, fri_param[i].dst_stride,
- filter_x, fri_param[i].x_step_q4,
- filter_y, fri_param[i].y_step_q4,
- fri_param[i].w, fri_param[i].h
- );
- }
-
- for (int i = 0; i < sec_count; i++) {
- mode_num = vp9_convolve_mode[(sec_param[i].x_step_q4 == 16)]
- [(sec_param[i].y_step_q4 == 16)];
- src = ref_base + sec_param[i].src_mv;
- dst = ref_base + sec_param[i].dst_mv;
-
- filter_x = inter_pred_filters + sec_param[i].filter_x_mv;
- filter_y = inter_pred_filters + sec_param[i].filter_y_mv;
-
- cp->mSwitchConvolve[sec_param[i].pred_mode + mode_num + 1](
- src, sec_param[i].src_stride,
- dst, sec_param[i].dst_stride,
- filter_x, sec_param[i].x_step_q4,
- filter_y, sec_param[i].y_step_q4,
- sec_param[i].w, sec_param[i].h
- );
- }
-
-}
-
-RsdCpuScriptIntrinsicInterPred::RsdCpuScriptIntrinsicInterPred(RsdCpuReferenceImpl *ctx,
- const Script *s, const Element *e)
- : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_INTER_PRED) {
- mRootPtr = &kernel;
- mCount = 0;
- mParamOffset = 0;
- mFriParamCount = 0;
- mSecParamCount = 0;
- mRef = NULL;
- mParam = NULL;
-
-#if defined(ARCH_ARM_HAVE_VFP)
- mSwitchConvolve[0] = vp9_convolve_copy_neon;
- mSwitchConvolve[1] = vp9_convolve_avg_neon;
- mSwitchConvolve[2] = vp9_convolve8_vert_neon;
- mSwitchConvolve[3] = vp9_convolve8_avg_vert_neon;
- mSwitchConvolve[4] = vp9_convolve8_horiz_neon;
- mSwitchConvolve[5] = vp9_convolve8_avg_horiz_neon;
- mSwitchConvolve[6] = vp9_convolve8_neon;
- mSwitchConvolve[7] = vp9_convolve8_avg_neon;
-
- mSwitchConvolve[8] = vp9_convolve8_vert_neon;
- mSwitchConvolve[9] = vp9_convolve8_avg_vert_neon;
- mSwitchConvolve[10] = vp9_convolve8_vert_neon;
- mSwitchConvolve[11] = vp9_convolve8_avg_vert_neon;
- mSwitchConvolve[12] = vp9_convolve8_neon;
- mSwitchConvolve[13] = vp9_convolve8_avg_neon;
- mSwitchConvolve[14] = vp9_convolve8_neon;
- mSwitchConvolve[15] = vp9_convolve8_avg_neon;
-
- mSwitchConvolve[16] = vp9_convolve8_horiz_neon;
- mSwitchConvolve[17] = vp9_convolve8_avg_horiz_neon;
- mSwitchConvolve[18] = vp9_convolve8_neon;
- mSwitchConvolve[19] = vp9_convolve8_avg_neon;
- mSwitchConvolve[20] = vp9_convolve8_horiz_neon;
- mSwitchConvolve[21] = vp9_convolve8_avg_horiz_neon;
- mSwitchConvolve[22] = vp9_convolve8_neon;
- mSwitchConvolve[23] = vp9_convolve8_avg_neon;
-
- mSwitchConvolve[24] = vp9_convolve8_neon;
- mSwitchConvolve[25] = vp9_convolve8_avg_neon;
- mSwitchConvolve[26] = vp9_convolve8_neon;
- mSwitchConvolve[27] = vp9_convolve8_avg_neon;
- mSwitchConvolve[28] = vp9_convolve8_neon;
- mSwitchConvolve[29] = vp9_convolve8_avg_neon;
- mSwitchConvolve[30] = vp9_convolve8_neon;
- mSwitchConvolve[31] = vp9_convolve8_avg_neon;
-#else
- mSwitchConvolve[0] = vp9_convolve_copy_c;
- mSwitchConvolve[1] = vp9_convolve_avg_c;
- mSwitchConvolve[2] = vp9_convolve8_vert_c;
- mSwitchConvolve[3] = vp9_convolve8_avg_vert_c;
- mSwitchConvolve[4] = vp9_convolve8_horiz_c;
- mSwitchConvolve[5] = vp9_convolve8_avg_horiz_c;
- mSwitchConvolve[6] = vp9_convolve8_c;
- mSwitchConvolve[7] = vp9_convolve8_avg_c;
-
- mSwitchConvolve[8] = vp9_convolve8_vert_c;
- mSwitchConvolve[9] = vp9_convolve8_avg_vert_c;
- mSwitchConvolve[10] = vp9_convolve8_vert_c;
- mSwitchConvolve[11] = vp9_convolve8_avg_vert_c;
- mSwitchConvolve[12] = vp9_convolve8_c;
- mSwitchConvolve[13] = vp9_convolve8_avg_c;
- mSwitchConvolve[14] = vp9_convolve8_c;
- mSwitchConvolve[15] = vp9_convolve8_avg_c;
-
- mSwitchConvolve[16] = vp9_convolve8_horiz_c;
- mSwitchConvolve[17] = vp9_convolve8_avg_horiz_c;
- mSwitchConvolve[18] = vp9_convolve8_c;
- mSwitchConvolve[19] = vp9_convolve8_avg_c;
- mSwitchConvolve[20] = vp9_convolve8_horiz_c;
- mSwitchConvolve[21] = vp9_convolve8_avg_horiz_c;
- mSwitchConvolve[22] = vp9_convolve8_c;
- mSwitchConvolve[23] = vp9_convolve8_avg_c;
-
- mSwitchConvolve[24] = vp9_convolve8_c;
- mSwitchConvolve[25] = vp9_convolve8_avg_c;
- mSwitchConvolve[26] = vp9_convolve8_c;
- mSwitchConvolve[27] = vp9_convolve8_avg_c;
- mSwitchConvolve[28] = vp9_convolve8_c;
- mSwitchConvolve[29] = vp9_convolve8_avg_c;
- mSwitchConvolve[30] = vp9_convolve8_c;
- mSwitchConvolve[31] = vp9_convolve8_avg_c;
-#endif
-}
-
-RsdCpuScriptIntrinsicInterPred::~RsdCpuScriptIntrinsicInterPred() {
-}
-
-void RsdCpuScriptIntrinsicInterPred::populateScript(Script *s) {
- s->mHal.info.exportedVariableCount = 3;
-}
-
-void RsdCpuScriptIntrinsicInterPred::invokeFreeChildren() {
-}
-
-
-RsdCpuScriptImpl * rsdIntrinsic_InterPred(RsdCpuReferenceImpl *ctx,
- const Script *s, const Element *e) {
- return new RsdCpuScriptIntrinsicInterPred(ctx, s, e);
-}
diff --git a/cpu_ref/rsCpuIntrinsicInterPred.h b/cpu_ref/rsCpuIntrinsicInterPred.h
deleted file mode 100644
index 552f4eb..0000000
--- a/cpu_ref/rsCpuIntrinsicInterPred.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef RSD_CPU_SCRIPT_INTRINSIC_INTER_PRED_H
-#define RSD_CPU_SCRIPT_INTRINSIC_INTER_PRED_H
-
-#include "rsCpuIntrinsic.h"
-#include "rsCpuIntrinsicInlines.h"
-#include "rsCpuConvolve.h"
-
-using namespace android;
-using namespace android::renderscript;
-
-namespace android {
-namespace renderscript {
-
-typedef struct inter_pred_param {
- int pred_mode;
-
- int src_mv;
- int src_stride;
- int dst_mv;
- int dst_stride;
-
- int filter_x_mv;
- int x_step_q4;
- int filter_y_mv;
- int y_step_q4;
-
- int w;
- int h;
-}INTER_PRED_PARAM;
-
-static const int16_t inter_pred_filters[512] = {
- 0, 0, 0, 128, 0, 0, 0, 0, 0, 1, -5, 126, 8, -3, 1, 0,
- -1, 3, -10, 122, 18, -6, 2, 0, -1, 4, -13, 118, 27, -9, 3, -1,
- -1, 4, -16, 112, 37, -11, 4, -1, -1, 5, -18, 105, 48, -14, 4, -1,
- -1, 5, -19, 97, 58, -16, 5, -1, -1, 6, -19, 88, 68, -18, 5, -1,
- -1, 6, -19, 78, 78, -19, 6, -1, -1, 5, -18, 68, 88, -19, 6, -1,
- -1, 5, -16, 58, 97, -19, 5, -1, -1, 4, -14, 48, 105, -18, 5, -1,
- -1, 4, -11, 37, 112, -16, 4, -1, -1, 3, -9, 27, 118, -13, 4, -1,
- 0, 2, -6, 18, 122, -10, 3, -1, 0, 1, -3, 8, 126, -5, 1, 0,
- 0, 0, 0, 128, 0, 0, 0, 0, -3, -1, 32, 64, 38, 1, -3, 0,
- -2, -2, 29, 63, 41, 2, -3, 0, -2, -2, 26, 63, 43, 4, -4, 0,
- -2, -3, 24, 62, 46, 5, -4, 0, -2, -3, 21, 60, 49, 7, -4, 0,
- -1, -4, 18, 59, 51, 9, -4, 0, -1, -4, 16, 57, 53, 12, -4, -1,
- -1, -4, 14, 55, 55, 14, -4, -1, -1, -4, 12, 53, 57, 16, -4, -1,
- 0, -4, 9, 51, 59, 18, -4, -1, 0, -4, 7, 49, 60, 21, -3, -2,
- 0, -4, 5, 46, 62, 24, -3, -2, 0, -4, 4, 43, 63, 26, -2, -2,
- 0, -3, 2, 41, 63, 29, -2, -2, 0, -3, 1, 38, 64, 32, -1, -3,
- 0, 0, 0, 128, 0, 0, 0, 0, -1, 3, -7, 127, 8, -3, 1, 0,
- -2, 5, -13, 125, 17, -6, 3, -1, -3, 7, -17, 121, 27, -10, 5, -2,
- -4, 9, -20, 115, 37, -13, 6, -2, -4, 10, -23, 108, 48, -16, 8, -3,
- -4, 10, -24, 100, 59, -19, 9, -3, -4, 11, -24, 90, 70, -21, 10, -4,
- -4, 11, -23, 80, 80, -23, 11, -4, -4, 10, -21, 70, 90, -24, 11, -4,
- -3, 9, -19, 59, 100, -24, 10, -4, -3, 8, -16, 48, 108, -23, 10, -4,
- -2, 6, -13, 37, 115, -20, 9, -4, -2, 5, -10, 27, 121, -17, 7, -3,
- -1, 3, -6, 17, 125, -13, 5, -2, 0, 1, -3, 8, 127, -7, 3, -1,
- 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 120, 8, 0, 0, 0,
- 0, 0, 0, 112, 16, 0, 0, 0, 0, 0, 0, 104, 24, 0, 0, 0,
- 0, 0, 0, 96, 32, 0, 0, 0, 0, 0, 0, 88, 40, 0, 0, 0,
- 0, 0, 0, 80, 48, 0, 0, 0, 0, 0, 0, 72, 56, 0, 0, 0,
- 0, 0, 0, 64, 64, 0, 0, 0, 0, 0, 0, 56, 72, 0, 0, 0,
- 0, 0, 0, 48, 80, 0, 0, 0, 0, 0, 0, 40, 88, 0, 0, 0,
- 0, 0, 0, 32, 96, 0, 0, 0, 0, 0, 0, 24, 104, 0, 0, 0,
- 0, 0, 0, 16, 112, 0, 0, 0, 0, 0, 0, 8, 120, 0, 0, 0
-};
-
-typedef void (*convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
- int w, int h);
-
-
-class RsdCpuScriptIntrinsicInterPred: public RsdCpuScriptIntrinsic {
-public:
- virtual void populateScript(Script *);
- virtual void invokeFreeChildren();
-
- virtual void setGlobalObj(uint32_t slot, ObjectBase *data);
- virtual void setGlobalVar(uint32_t slot, const void *data, size_t dataLength);
- virtual ~RsdCpuScriptIntrinsicInterPred();
- RsdCpuScriptIntrinsicInterPred(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
-
-protected:
- uint8_t *mRef;
- uint8_t *mParam;
- int mFriParamCount;
- int mSecParamCount;
- int mParamOffset;
- int mCount;
- convolve_fn_t mSwitchConvolve[32];
- static void kernel(const RsForEachStubParamStruct *p,
- uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
-};
-
-}
-}
-#endif
-
diff --git a/rsDefines.h b/rsDefines.h
index 5efb2f1..1731eb7 100644
--- a/rsDefines.h
+++ b/rsDefines.h
@@ -365,7 +365,7 @@
RS_SCRIPT_INTRINSIC_ID_BLEND = 7,
RS_SCRIPT_INTRINSIC_ID_3DLUT = 8,
RS_SCRIPT_INTRINSIC_ID_HISTOGRAM = 9,
- RS_SCRIPT_INTRINSIC_ID_INTER_PRED= 10,
+ // unused 10
RS_SCRIPT_INTRINSIC_ID_LOOP_FILTER = 11,
RS_SCRIPT_INTRINSIC_ID_RESIZE = 12
};