Add VP9 inter-frame prediction intrinsic
Change-Id: If8985a6200fb6d34083eff711ccdf2f1b3c374e6
diff --git a/cpu_ref/rsCpuIntrinsicInterPred.cpp b/cpu_ref/rsCpuIntrinsicInterPred.cpp
new file mode 100644
index 0000000..20e0f2e
--- /dev/null
+++ b/cpu_ref/rsCpuIntrinsicInterPred.cpp
@@ -0,0 +1,173 @@
+#include "rsCpuIntrinsicInterPred.h"
+
+void RsdCpuScriptIntrinsicInterPred::setGlobalObj(uint32_t slot,
+ ObjectBase *data) {
+ Allocation *alloc = static_cast<Allocation *>(data);
+ if (slot == 0) mRef = (uint8_t *)alloc->mHal.state.userProvidedPtr;
+ if (slot == 1) mParam = (uint8_t *)alloc->mHal.state.userProvidedPtr;
+}
+
+void RsdCpuScriptIntrinsicInterPred::setGlobalVar(uint32_t slot,
+ const void *data,
+ size_t dataLength) {
+ mFriParamCount = ((int32_t *)data)[0];
+ mSecParamCount = ((int32_t *)data)[1];
+ mParamOffset = ((int32_t *)data)[2];
+}
+
+void RsdCpuScriptIntrinsicInterPred::kernel(const RsForEachStubParamStruct *p,
+ uint32_t xstart, uint32_t xend,
+ uint32_t instep, uint32_t outstep) {
+ RsdCpuScriptIntrinsicInterPred *cp = (RsdCpuScriptIntrinsicInterPred *)p->usr;
+ cp->mCount++;
+ const int vp9_convolve_mode[2][2] = {{24, 16}, {8, 0}};
+ uint8_t *ref_base = cp->mRef;
+ INTER_PRED_PARAM *fri_param = (INTER_PRED_PARAM *)cp->mParam;
+ INTER_PRED_PARAM *sec_param = (INTER_PRED_PARAM *)(cp->mParam + cp->mParamOffset);
+ int32_t fri_count = cp->mFriParamCount;
+ int32_t sec_count = cp->mSecParamCount;
+ int mode_num;
+ uint8_t *src;
+ uint8_t *dst;
+ const int16_t *filter_x;
+ const int16_t *filter_y;
+ for (int i = 0; i < fri_count; i++) {
+
+ mode_num = vp9_convolve_mode[(fri_param[i].x_step_q4 == 16)]
+ [(fri_param[i].y_step_q4 == 16)];
+ src = ref_base + fri_param[i].src_mv;
+ dst = ref_base + fri_param[i].dst_mv;
+
+ filter_x = inter_pred_filters + fri_param[i].filter_x_mv;
+ filter_y = inter_pred_filters + fri_param[i].filter_y_mv;
+
+ cp->mSwitchConvolve[fri_param[i].pred_mode + mode_num](
+ src, fri_param[i].src_stride,
+ dst, fri_param[i].dst_stride,
+ filter_x, fri_param[i].x_step_q4,
+ filter_y, fri_param[i].y_step_q4,
+ fri_param[i].w, fri_param[i].h
+ );
+ }
+
+ for (int i = 0; i < sec_count; i++) {
+ mode_num = vp9_convolve_mode[(sec_param[i].x_step_q4 == 16)]
+ [(sec_param[i].y_step_q4 == 16)];
+ src = ref_base + sec_param[i].src_mv;
+ dst = ref_base + sec_param[i].dst_mv;
+
+ filter_x = inter_pred_filters + sec_param[i].filter_x_mv;
+ filter_y = inter_pred_filters + sec_param[i].filter_y_mv;
+
+ cp->mSwitchConvolve[sec_param[i].pred_mode + mode_num + 1](
+ src, sec_param[i].src_stride,
+ dst, sec_param[i].dst_stride,
+ filter_x, sec_param[i].x_step_q4,
+ filter_y, sec_param[i].y_step_q4,
+ sec_param[i].w, sec_param[i].h
+ );
+ }
+
+}
+
+RsdCpuScriptIntrinsicInterPred::RsdCpuScriptIntrinsicInterPred(RsdCpuReferenceImpl *ctx,
+ const Script *s, const Element *e)
+ : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_INTER_PRED) {
+ mRootPtr = &kernel;
+ mCount = 0;
+ mParamOffset = 0;
+ mFriParamCount = 0;
+ mSecParamCount = 0;
+ mRef = NULL;
+ mParam = NULL;
+
+#if defined(ARCH_ARM_HAVE_VFP)
+ mSwitchConvolve[0] = vp9_convolve_copy_neon;
+ mSwitchConvolve[1] = vp9_convolve_avg_neon;
+ mSwitchConvolve[2] = vp9_convolve8_vert_neon;
+ mSwitchConvolve[3] = vp9_convolve8_avg_vert_neon;
+ mSwitchConvolve[4] = vp9_convolve8_horiz_neon;
+ mSwitchConvolve[5] = vp9_convolve8_avg_horiz_neon;
+ mSwitchConvolve[6] = vp9_convolve8_neon;
+ mSwitchConvolve[7] = vp9_convolve8_avg_neon;
+
+ mSwitchConvolve[8] = vp9_convolve8_vert_neon;
+ mSwitchConvolve[9] = vp9_convolve8_avg_vert_neon;
+ mSwitchConvolve[10] = vp9_convolve8_vert_neon;
+ mSwitchConvolve[11] = vp9_convolve8_avg_vert_neon;
+ mSwitchConvolve[12] = vp9_convolve8_neon;
+ mSwitchConvolve[13] = vp9_convolve8_avg_neon;
+ mSwitchConvolve[14] = vp9_convolve8_neon;
+ mSwitchConvolve[15] = vp9_convolve8_avg_neon;
+
+ mSwitchConvolve[16] = vp9_convolve8_horiz_neon;
+ mSwitchConvolve[17] = vp9_convolve8_avg_horiz_neon;
+ mSwitchConvolve[18] = vp9_convolve8_neon;
+ mSwitchConvolve[19] = vp9_convolve8_avg_neon;
+ mSwitchConvolve[20] = vp9_convolve8_horiz_neon;
+ mSwitchConvolve[21] = vp9_convolve8_avg_horiz_neon;
+ mSwitchConvolve[22] = vp9_convolve8_neon;
+ mSwitchConvolve[23] = vp9_convolve8_avg_neon;
+
+ mSwitchConvolve[24] = vp9_convolve8_neon;
+ mSwitchConvolve[25] = vp9_convolve8_avg_neon;
+ mSwitchConvolve[26] = vp9_convolve8_neon;
+ mSwitchConvolve[27] = vp9_convolve8_avg_neon;
+ mSwitchConvolve[28] = vp9_convolve8_neon;
+ mSwitchConvolve[29] = vp9_convolve8_avg_neon;
+ mSwitchConvolve[30] = vp9_convolve8_neon;
+ mSwitchConvolve[31] = vp9_convolve8_avg_neon;
+#else
+ mSwitchConvolve[0] = vp9_convolve_copy_c;
+ mSwitchConvolve[1] = vp9_convolve_avg_c;
+ mSwitchConvolve[2] = vp9_convolve8_vert_c;
+ mSwitchConvolve[3] = vp9_convolve8_avg_vert_c;
+ mSwitchConvolve[4] = vp9_convolve8_horiz_c;
+ mSwitchConvolve[5] = vp9_convolve8_avg_horiz_c;
+ mSwitchConvolve[6] = vp9_convolve8_c;
+ mSwitchConvolve[7] = vp9_convolve8_avg_c;
+
+ mSwitchConvolve[8] = vp9_convolve8_vert_c;
+ mSwitchConvolve[9] = vp9_convolve8_avg_vert_c;
+ mSwitchConvolve[10] = vp9_convolve8_vert_c;
+ mSwitchConvolve[11] = vp9_convolve8_avg_vert_c;
+ mSwitchConvolve[12] = vp9_convolve8_c;
+ mSwitchConvolve[13] = vp9_convolve8_avg_c;
+ mSwitchConvolve[14] = vp9_convolve8_c;
+ mSwitchConvolve[15] = vp9_convolve8_avg_c;
+
+ mSwitchConvolve[16] = vp9_convolve8_horiz_c;
+ mSwitchConvolve[17] = vp9_convolve8_avg_horiz_c;
+ mSwitchConvolve[18] = vp9_convolve8_c;
+ mSwitchConvolve[19] = vp9_convolve8_avg_c;
+ mSwitchConvolve[20] = vp9_convolve8_horiz_c;
+ mSwitchConvolve[21] = vp9_convolve8_avg_horiz_c;
+ mSwitchConvolve[22] = vp9_convolve8_c;
+ mSwitchConvolve[23] = vp9_convolve8_avg_c;
+
+ mSwitchConvolve[24] = vp9_convolve8_c;
+ mSwitchConvolve[25] = vp9_convolve8_avg_c;
+ mSwitchConvolve[26] = vp9_convolve8_c;
+ mSwitchConvolve[27] = vp9_convolve8_avg_c;
+ mSwitchConvolve[28] = vp9_convolve8_c;
+ mSwitchConvolve[29] = vp9_convolve8_avg_c;
+ mSwitchConvolve[30] = vp9_convolve8_c;
+ mSwitchConvolve[31] = vp9_convolve8_avg_c;
+#endif
+}
+
+RsdCpuScriptIntrinsicInterPred::~RsdCpuScriptIntrinsicInterPred() {
+}
+
+void RsdCpuScriptIntrinsicInterPred::populateScript(Script *s) {
+ s->mHal.info.exportedVariableCount = 3;
+}
+
+void RsdCpuScriptIntrinsicInterPred::invokeFreeChildren() {
+}
+
+
+RsdCpuScriptImpl * rsdIntrinsic_InterPred(RsdCpuReferenceImpl *ctx,
+ const Script *s, const Element *e) {
+ return new RsdCpuScriptIntrinsicInterPred(ctx, s, e);
+}