RS: Add VP9 LoopFilter Intrinsic

Change-Id: I5caa46da2c825a95cc1ed35a1cdbcd6da0ffce88
diff --git a/cpp/ScriptIntrinsics.cpp b/cpp/ScriptIntrinsics.cpp
index f9a1d97..34b2162 100644
--- a/cpp/ScriptIntrinsics.cpp
+++ b/cpp/ScriptIntrinsics.cpp
@@ -644,3 +644,48 @@
 
     Script::forEach(0, NULL, out, NULL, 0);
 }
+
+sp<ScriptIntrinsicVP9LoopFilter> ScriptIntrinsicVP9LoopFilter::create(sp<RS> rs, sp<const Element> e) {
+    if (!(e->isCompatible(Element::U8(rs)))) {
+        rs->throwError(RS_ERROR_INVALID_ELEMENT, "Invalid element for Vp9LoopFilter");
+        return NULL;
+    }
+    return new ScriptIntrinsicVP9LoopFilter(rs, e);
+}
+
+ScriptIntrinsicVP9LoopFilter::ScriptIntrinsicVP9LoopFilter(sp<RS> rs, sp<const Element> e)
+    : ScriptIntrinsic(rs, RS_SCRIPT_INTRINSIC_ID_LOOP_FILTER, e) {
+    sp<const Type> t_pad = Type::create(rs, e, 1, 0, 0);
+    mPadAlloc = Allocation::createTyped(rs, t_pad, RS_ALLOCATION_MIPMAP_NONE, RS_ALLOCATION_USAGE_SCRIPT, NULL);
+}
+
+void ScriptIntrinsicVP9LoopFilter::setLoopFilterDomain(int start, int stop, int numPlanes, int miRows, int miCols) {
+    FieldPacker fp(20);
+    fp.add(start);
+    fp.add(stop);
+    fp.add(numPlanes);
+    fp.add(miRows);
+    fp.add(miCols);
+    Script::setVar(0, fp.getData(), fp.getLength());
+}
+
+void ScriptIntrinsicVP9LoopFilter::setBufferInfo(const BufferInfo *bufInfo) {
+    Script::setVar(1, bufInfo, sizeof(BufferInfo));
+}
+
+void ScriptIntrinsicVP9LoopFilter::setLoopFilterInfo(sp<Allocation> lfInfo) {
+    Script::setVar(2, lfInfo);
+}
+
+void ScriptIntrinsicVP9LoopFilter::setLoopFilterMasks(sp<Allocation> lfMasks) {
+    Script::setVar(3, lfMasks);
+}
+
+void ScriptIntrinsicVP9LoopFilter::forEach(sp<Allocation> frameBuffer) {
+    if (!(frameBuffer->getType()->getElement()->isCompatible(mElement))) {
+        mRS->throwError(RS_ERROR_INVALID_ELEMENT, "Invalid element for input/output in Vp9LoopFilter");
+        return;
+    }
+    Script::setVar(4, frameBuffer);
+    Script::forEach(0, mPadAlloc, NULL, NULL, 0);
+}
diff --git a/cpp/rsCppStructs.h b/cpp/rsCppStructs.h
index 805f072..8268b61 100644
--- a/cpp/rsCppStructs.h
+++ b/cpp/rsCppStructs.h
@@ -1883,6 +1883,134 @@
 };
 
 /**
+ * Intrinsic for vp9 loopfilter.
+ */
+#if (defined(__GNUC__) && __GNUC__) || defined(__SUNPRO_C)
+#define DECLARE_ALIGNED(n,typ,val)  typ val __attribute__ ((aligned (n)))
+#elif defined(_MSC_VER)
+#define DECLARE_ALIGNED(n,typ,val)  __declspec(align(n)) typ val
+#else
+#warning No alignment directives known for this compiler.
+#define DECLARE_ALIGNED(n,typ,val)  typ val
+#endif
+
+#define TX_SIZES            4
+#define SIMD_WIDTH          16
+#define MAX_LOOP_FILTER     63
+#define MAX_SEGMENTS        8
+#define MAX_REF_FRAMES      4
+#define MAX_MODE_LF_DELTAS  2
+#define MB_MODE_COUNT       14
+
+/**
+ * Intrinsic for VP9 loop filter
+ */
+class ScriptIntrinsicVP9LoopFilter : public ScriptIntrinsic {
+ private:
+    ScriptIntrinsicVP9LoopFilter(sp<RS> rs, sp<const Element> e);
+    sp<Allocation> mPadAlloc;
+
+ public:
+    // This structure holds bit masks for all 8x8 blocks in a 64x64 region.
+    // Each 1 bit represents a position in which we want to apply the loop filter.
+    // Left_ entries refer to whether we apply a filter on the border to the
+    // left of the block.   Above_ entries refer to whether or not to apply a
+    // filter on the above border.   Int_ entries refer to whether or not to
+    // apply borders on the 4x4 edges within the 8x8 block that each bit
+    // represents.
+    // Since each transform is accompanied by a potentially different type of
+    // loop filter there is a different entry in the array for each transform size.
+    struct LoopFilterMask {
+        uint64_t left_y[TX_SIZES];
+        uint64_t above_y[TX_SIZES];
+        uint64_t int_4x4_y;
+        uint16_t left_uv[TX_SIZES];
+        uint16_t above_uv[TX_SIZES];
+        uint16_t int_4x4_uv;
+        uint8_t lfl_y[64];
+        uint8_t lfl_uv[16];
+    };
+    // Need to align this structure so when it is declared and
+    // passed it can be loaded into vector registers.
+    struct LoopFilterThresh {
+        DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, mblim[SIMD_WIDTH]);
+        DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, lim[SIMD_WIDTH]);
+        DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, hev_thr[SIMD_WIDTH]);
+    };
+    struct LoopFilterInfoN {
+        LoopFilterThresh lfthr[MAX_LOOP_FILTER + 1];
+        uint8_t lvl[MAX_SEGMENTS][MAX_REF_FRAMES][MAX_MODE_LF_DELTAS];
+        uint8_t mode_lf_lut[MB_MODE_COUNT];
+    };
+    struct BufferInfo {
+        int y_offset;
+        int u_offset;
+        int v_offset;
+        int y_stride;
+        int uv_stride;
+    };
+
+    /**
+     * Create an intrinsic for LoopFilter.
+     *
+     * Supported elements types are U8.
+     *
+     * @param[in] rs The RenderScript context
+     * @param[in] e Element type for output
+     *
+     * @return ScriptIntrinsicVP9LoopFilter
+     */
+    static sp<ScriptIntrinsicVP9LoopFilter> create(sp<RS> rs, sp<const Element> e);
+    /**
+     * Set loop filter domain.
+     *
+     * @param[in] start The start mi(mode info) row
+     * @param[in] stop The stop mi row
+     * @param[in] numPlanes The number of planes
+     * @param[in] miRows The number of mi rows
+     * @param[in] miCols The number of mi columns
+     */
+    void setLoopFilterDomain(int start, int stop, int numPlanes, int miRows, int miCols);
+    /**
+     * Set the layout info of the frame buffer(the parameter passed to forEach).
+     *
+     * @param[in] bufInfo The BufferInfo pointer contains the frame layout info
+     */
+    void setBufferInfo(const BufferInfo *bufInfo);
+    /**
+     * Set the loop filter info, including infomation like high edge variance thresholds
+     * and loop filter levels that apply to the whole frame.
+     *
+     * @param[in] lfInfo The Allocation obj that contains the LoopFilterInfoN object
+     */
+    void setLoopFilterInfo(sp<Allocation> lfInfo);
+    /**
+     * Set loop filter masks.
+     *
+     * @param[in] lfMasks The Allocation obj that contains the masks for each 64*64
+     * super block within the loop filter domain
+     */
+    void setLoopFilterMasks(sp<Allocation> lfMasks);
+
+    /**
+     * Apply loop filter on the frame.
+     *
+     * @param[in] frameBuffer The Allocation obj that contains the frame
+     */
+    void forEach(sp<Allocation> frameBuffer);
+};
+
+#undef DECLARE_ALIGNED
+
+#undef TX_SIZES
+#undef SIMD_WIDTH
+#undef MAX_LOOP_FILTER
+#undef MAX_SEGMENTS
+#undef MAX_REF_FRAMES
+#undef MAX_MODE_LF_DELTAS
+#undef MB_MODE_COUNT
+
+/**
  * Sampler object that defines how Allocations can be read as textures
  * within a kernel. Samplers are used in conjunction with the rsSample
  * runtime function to return values from normalized coordinates.