RS: Add VP9 LoopFilter Intrinsic
Change-Id: I5caa46da2c825a95cc1ed35a1cdbcd6da0ffce88
diff --git a/cpp/ScriptIntrinsics.cpp b/cpp/ScriptIntrinsics.cpp
index f9a1d97..34b2162 100644
--- a/cpp/ScriptIntrinsics.cpp
+++ b/cpp/ScriptIntrinsics.cpp
@@ -644,3 +644,48 @@
Script::forEach(0, NULL, out, NULL, 0);
}
+
+sp<ScriptIntrinsicVP9LoopFilter> ScriptIntrinsicVP9LoopFilter::create(sp<RS> rs, sp<const Element> e) {
+ if (!(e->isCompatible(Element::U8(rs)))) {
+ rs->throwError(RS_ERROR_INVALID_ELEMENT, "Invalid element for Vp9LoopFilter");
+ return NULL;
+ }
+ return new ScriptIntrinsicVP9LoopFilter(rs, e);
+}
+
+ScriptIntrinsicVP9LoopFilter::ScriptIntrinsicVP9LoopFilter(sp<RS> rs, sp<const Element> e)
+ : ScriptIntrinsic(rs, RS_SCRIPT_INTRINSIC_ID_LOOP_FILTER, e) {
+ sp<const Type> t_pad = Type::create(rs, e, 1, 0, 0);
+ mPadAlloc = Allocation::createTyped(rs, t_pad, RS_ALLOCATION_MIPMAP_NONE, RS_ALLOCATION_USAGE_SCRIPT, NULL);
+}
+
+void ScriptIntrinsicVP9LoopFilter::setLoopFilterDomain(int start, int stop, int numPlanes, int miRows, int miCols) {
+ FieldPacker fp(20);
+ fp.add(start);
+ fp.add(stop);
+ fp.add(numPlanes);
+ fp.add(miRows);
+ fp.add(miCols);
+ Script::setVar(0, fp.getData(), fp.getLength());
+}
+
+void ScriptIntrinsicVP9LoopFilter::setBufferInfo(const BufferInfo *bufInfo) {
+ Script::setVar(1, bufInfo, sizeof(BufferInfo));
+}
+
+void ScriptIntrinsicVP9LoopFilter::setLoopFilterInfo(sp<Allocation> lfInfo) {
+ Script::setVar(2, lfInfo);
+}
+
+void ScriptIntrinsicVP9LoopFilter::setLoopFilterMasks(sp<Allocation> lfMasks) {
+ Script::setVar(3, lfMasks);
+}
+
+void ScriptIntrinsicVP9LoopFilter::forEach(sp<Allocation> frameBuffer) {
+ if (!(frameBuffer->getType()->getElement()->isCompatible(mElement))) {
+ mRS->throwError(RS_ERROR_INVALID_ELEMENT, "Invalid element for input/output in Vp9LoopFilter");
+ return;
+ }
+ Script::setVar(4, frameBuffer);
+ Script::forEach(0, mPadAlloc, NULL, NULL, 0);
+}
diff --git a/cpp/rsCppStructs.h b/cpp/rsCppStructs.h
index 805f072..8268b61 100644
--- a/cpp/rsCppStructs.h
+++ b/cpp/rsCppStructs.h
@@ -1883,6 +1883,134 @@
};
/**
+ * Intrinsic for vp9 loopfilter.
+ */
+#if (defined(__GNUC__) && __GNUC__) || defined(__SUNPRO_C)
+#define DECLARE_ALIGNED(n,typ,val) typ val __attribute__ ((aligned (n)))
+#elif defined(_MSC_VER)
+#define DECLARE_ALIGNED(n,typ,val) __declspec(align(n)) typ val
+#else
+#warning No alignment directives known for this compiler.
+#define DECLARE_ALIGNED(n,typ,val) typ val
+#endif
+
+#define TX_SIZES 4
+#define SIMD_WIDTH 16
+#define MAX_LOOP_FILTER 63
+#define MAX_SEGMENTS 8
+#define MAX_REF_FRAMES 4
+#define MAX_MODE_LF_DELTAS 2
+#define MB_MODE_COUNT 14
+
+/**
+ * Intrinsic for VP9 loop filter
+ */
+class ScriptIntrinsicVP9LoopFilter : public ScriptIntrinsic {
+ private:
+ ScriptIntrinsicVP9LoopFilter(sp<RS> rs, sp<const Element> e);
+ sp<Allocation> mPadAlloc;
+
+ public:
+ // This structure holds bit masks for all 8x8 blocks in a 64x64 region.
+ // Each 1 bit represents a position in which we want to apply the loop filter.
+ // Left_ entries refer to whether we apply a filter on the border to the
+ // left of the block. Above_ entries refer to whether or not to apply a
+ // filter on the above border. Int_ entries refer to whether or not to
+ // apply borders on the 4x4 edges within the 8x8 block that each bit
+ // represents.
+ // Since each transform is accompanied by a potentially different type of
+ // loop filter there is a different entry in the array for each transform size.
+ struct LoopFilterMask {
+ uint64_t left_y[TX_SIZES];
+ uint64_t above_y[TX_SIZES];
+ uint64_t int_4x4_y;
+ uint16_t left_uv[TX_SIZES];
+ uint16_t above_uv[TX_SIZES];
+ uint16_t int_4x4_uv;
+ uint8_t lfl_y[64];
+ uint8_t lfl_uv[16];
+ };
+ // Need to align this structure so when it is declared and
+ // passed it can be loaded into vector registers.
+ struct LoopFilterThresh {
+ DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, mblim[SIMD_WIDTH]);
+ DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, lim[SIMD_WIDTH]);
+ DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, hev_thr[SIMD_WIDTH]);
+ };
+ struct LoopFilterInfoN {
+ LoopFilterThresh lfthr[MAX_LOOP_FILTER + 1];
+ uint8_t lvl[MAX_SEGMENTS][MAX_REF_FRAMES][MAX_MODE_LF_DELTAS];
+ uint8_t mode_lf_lut[MB_MODE_COUNT];
+ };
+ struct BufferInfo {
+ int y_offset;
+ int u_offset;
+ int v_offset;
+ int y_stride;
+ int uv_stride;
+ };
+
+ /**
+ * Create an intrinsic for LoopFilter.
+ *
+ * Supported elements types are U8.
+ *
+ * @param[in] rs The RenderScript context
+ * @param[in] e Element type for output
+ *
+ * @return ScriptIntrinsicVP9LoopFilter
+ */
+ static sp<ScriptIntrinsicVP9LoopFilter> create(sp<RS> rs, sp<const Element> e);
+ /**
+ * Set loop filter domain.
+ *
+ * @param[in] start The start mi(mode info) row
+ * @param[in] stop The stop mi row
+ * @param[in] numPlanes The number of planes
+ * @param[in] miRows The number of mi rows
+ * @param[in] miCols The number of mi columns
+ */
+ void setLoopFilterDomain(int start, int stop, int numPlanes, int miRows, int miCols);
+ /**
+ * Set the layout info of the frame buffer(the parameter passed to forEach).
+ *
+ * @param[in] bufInfo The BufferInfo pointer contains the frame layout info
+ */
+ void setBufferInfo(const BufferInfo *bufInfo);
+ /**
+ * Set the loop filter info, including infomation like high edge variance thresholds
+ * and loop filter levels that apply to the whole frame.
+ *
+ * @param[in] lfInfo The Allocation obj that contains the LoopFilterInfoN object
+ */
+ void setLoopFilterInfo(sp<Allocation> lfInfo);
+ /**
+ * Set loop filter masks.
+ *
+ * @param[in] lfMasks The Allocation obj that contains the masks for each 64*64
+ * super block within the loop filter domain
+ */
+ void setLoopFilterMasks(sp<Allocation> lfMasks);
+
+ /**
+ * Apply loop filter on the frame.
+ *
+ * @param[in] frameBuffer The Allocation obj that contains the frame
+ */
+ void forEach(sp<Allocation> frameBuffer);
+};
+
+#undef DECLARE_ALIGNED
+
+#undef TX_SIZES
+#undef SIMD_WIDTH
+#undef MAX_LOOP_FILTER
+#undef MAX_SEGMENTS
+#undef MAX_REF_FRAMES
+#undef MAX_MODE_LF_DELTAS
+#undef MB_MODE_COUNT
+
+/**
* Sampler object that defines how Allocations can be read as textures
* within a kernel. Samplers are used in conjunction with the rsSample
* runtime function to return values from normalized coordinates.