Merge "[RenderScript] AutoPadding & Unpadding for Vec3 Elements during copyTo & copyFrom."
diff --git a/rs/java/android/renderscript/Allocation.java b/rs/java/android/renderscript/Allocation.java
index a211b70..daf00e8 100644
--- a/rs/java/android/renderscript/Allocation.java
+++ b/rs/java/android/renderscript/Allocation.java
@@ -60,6 +60,7 @@
 
     boolean mReadAllowed = true;
     boolean mWriteAllowed = true;
+    boolean mAutoPadding = false;
     int mSelectedX;
     int mSelectedY;
     int mSelectedZ;
@@ -270,6 +271,17 @@
     }
 
     /**
+     * @hide
+     * Enable/Disable AutoPadding for Vec3 elements.
+     *
+     * @param useAutoPadding True: enable AutoPadding; flase: disable AutoPadding
+     *
+     */
+    public void setAutoPadding(boolean useAutoPadding) {
+        mAutoPadding = useAutoPadding;
+    }
+
+    /**
      * Get the size of the Allocation in bytes.
      *
      * @return size of the Allocation in bytes.
@@ -851,7 +863,7 @@
                                    component_number, data, data_length);
     }
 
-    private void data1DChecks(int off, int count, int len, int dataSize) {
+    private void data1DChecks(int off, int count, int len, int dataSize, boolean usePadding) {
         mRS.validate();
         if(off < 0) {
             throw new RSIllegalArgumentException("Offset must be >= 0.");
@@ -863,8 +875,14 @@
             throw new RSIllegalArgumentException("Overflow, Available count " + mCurrentCount +
                                                ", got " + count + " at offset " + off + ".");
         }
-        if(len < dataSize) {
-            throw new RSIllegalArgumentException("Array too small for allocation type.");
+        if(usePadding) {
+            if(len < dataSize / 4 * 3) {
+                throw new RSIllegalArgumentException("Array too small for allocation type.");
+            }
+        } else {
+            if(len < dataSize) {
+                throw new RSIllegalArgumentException("Array too small for allocation type.");
+            }
         }
     }
 
@@ -886,8 +904,14 @@
                                           Element.DataType dt, int arrayLen) {
         Trace.traceBegin(RenderScript.TRACE_TAG, "copy1DRangeFromUnchecked");
         final int dataSize = mType.mElement.getBytesSize() * count;
-        data1DChecks(off, count, arrayLen * dt.mSize, dataSize);
-        mRS.nAllocationData1D(getIDSafe(), off, mSelectedLOD, count, array, dataSize, dt);
+        // AutoPadding for Vec3 Element
+        boolean usePadding = false;
+        if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) {
+            usePadding = true;
+        }
+        data1DChecks(off, count, arrayLen * dt.mSize, dataSize, usePadding);
+        mRS.nAllocationData1D(getIDSafe(), off, mSelectedLOD, count, array, dataSize, dt,
+                              mType.mElement.mType.mSize, usePadding);
         Trace.traceEnd(RenderScript.TRACE_TAG);
     }
 
@@ -1064,8 +1088,24 @@
         Trace.traceBegin(RenderScript.TRACE_TAG, "copy2DRangeFromUnchecked");
         mRS.validate();
         validate2DRange(xoff, yoff, w, h);
+        final int dataSize = mType.mElement.getBytesSize() * w * h;
+        // AutoPadding for Vec3 Element
+        boolean usePadding = false;
+        int sizeBytes = arrayLen * dt.mSize;
+        if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) {
+            if (dataSize / 4 * 3 > sizeBytes) {
+                throw new RSIllegalArgumentException("Array too small for allocation type.");
+            }
+            usePadding = true;
+            sizeBytes = dataSize;
+        } else {
+            if (dataSize > sizeBytes) {
+                throw new RSIllegalArgumentException("Array too small for allocation type.");
+            }
+        }
         mRS.nAllocationData2D(getIDSafe(), xoff, yoff, mSelectedLOD, mSelectedFace.mID, w, h,
-                              array, arrayLen * dt.mSize, dt);
+                              array, sizeBytes, dt,
+                              mType.mElement.mType.mSize, usePadding);
         Trace.traceEnd(RenderScript.TRACE_TAG);
     }
 
@@ -1226,8 +1266,24 @@
         Trace.traceBegin(RenderScript.TRACE_TAG, "copy3DRangeFromUnchecked");
         mRS.validate();
         validate3DRange(xoff, yoff, zoff, w, h, d);
+        final int dataSize = mType.mElement.getBytesSize() * w * h * d;
+        // AutoPadding for Vec3 Element
+        boolean usePadding = false;
+        int sizeBytes = arrayLen * dt.mSize;
+        if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) {
+            if (dataSize / 4 * 3 > sizeBytes) {
+                throw new RSIllegalArgumentException("Array too small for allocation type.");
+            }
+            usePadding = true;
+            sizeBytes = dataSize;
+        } else {
+            if (dataSize > sizeBytes) {
+                throw new RSIllegalArgumentException("Array too small for allocation type.");
+            }
+        }
         mRS.nAllocationData3D(getIDSafe(), xoff, yoff, zoff, mSelectedLOD, w, h, d,
-                              array, arrayLen * dt.mSize, dt);
+                              array, sizeBytes, dt,
+                              mType.mElement.mType.mSize, usePadding);
         Trace.traceEnd(RenderScript.TRACE_TAG);
     }
 
@@ -1242,7 +1298,7 @@
      * @param w Width of the region to update
      * @param h Height of the region to update
      * @param d Depth of the region to update
-     * @param data to be placed into the allocation
+     * @param array to be placed into the allocation
      */
     public void copy3DRangeFrom(int xoff, int yoff, int zoff, int w, int h, int d, Object array) {
         Trace.traceBegin(RenderScript.TRACE_TAG, "copy3DRangeFrom");
@@ -1296,7 +1352,11 @@
     private void copyTo(Object array, Element.DataType dt, int arrayLen) {
         Trace.traceBegin(RenderScript.TRACE_TAG, "copyTo");
         mRS.validate();
-        mRS.nAllocationRead(getID(mRS), array, dt);
+        boolean usePadding = false;
+        if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) {
+            usePadding = true;
+        }
+        mRS.nAllocationRead(getID(mRS), array, dt, mType.mElement.mType.mSize, usePadding);
         Trace.traceEnd(RenderScript.TRACE_TAG);
     }
 
@@ -1465,8 +1525,14 @@
                                         Element.DataType dt, int arrayLen) {
         Trace.traceBegin(RenderScript.TRACE_TAG, "copy1DRangeToUnchecked");
         final int dataSize = mType.mElement.getBytesSize() * count;
-        data1DChecks(off, count, arrayLen * dt.mSize, dataSize);
-        mRS.nAllocationRead1D(getIDSafe(), off, mSelectedLOD, count, array, dataSize, dt);
+        // AutoPadding for Vec3 Element
+        boolean usePadding = false;
+        if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) {
+            usePadding = true;
+        }
+        data1DChecks(off, count, arrayLen * dt.mSize, dataSize, usePadding);
+        mRS.nAllocationRead1D(getIDSafe(), off, mSelectedLOD, count, array, dataSize, dt,
+                              mType.mElement.mType.mSize, usePadding);
         Trace.traceEnd(RenderScript.TRACE_TAG);
     }
 
@@ -1620,8 +1686,23 @@
         Trace.traceBegin(RenderScript.TRACE_TAG, "copy2DRangeToUnchecked");
         mRS.validate();
         validate2DRange(xoff, yoff, w, h);
+        final int dataSize = mType.mElement.getBytesSize() * w * h;
+        // AutoPadding for Vec3 Element
+        boolean usePadding = false;
+        int sizeBytes = arrayLen * dt.mSize;
+        if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) {
+            if (dataSize / 4 * 3 > sizeBytes) {
+                throw new RSIllegalArgumentException("Array too small for allocation type.");
+            }
+            usePadding = true;
+            sizeBytes = dataSize;
+        } else {
+            if (dataSize > sizeBytes) {
+                throw new RSIllegalArgumentException("Array too small for allocation type.");
+            }
+        }
         mRS.nAllocationRead2D(getIDSafe(), xoff, yoff, mSelectedLOD, mSelectedFace.mID, w, h,
-                              array, arrayLen * dt.mSize, dt);
+                              array, sizeBytes, dt, mType.mElement.mType.mSize, usePadding);
         Trace.traceEnd(RenderScript.TRACE_TAG);
     }
 
@@ -1649,7 +1730,7 @@
      * @param yoff Y offset of the region to copy in this Allocation
      * @param w Width of the region to copy
      * @param h Height of the region to copy
-     * @param array Dest Array to be copied into
+     * @param data Dest Array to be copied into
      */
     public void copy2DRangeTo(int xoff, int yoff, int w, int h, byte[] data) {
         validateIsInt8();
@@ -1665,7 +1746,7 @@
      * @param yoff Y offset of the region to copy in this Allocation
      * @param w Width of the region to copy
      * @param h Height of the region to copy
-     * @param array Dest Array to be copied into
+     * @param data Dest Array to be copied into
      */
     public void copy2DRangeTo(int xoff, int yoff, int w, int h, short[] data) {
         validateIsInt16();
@@ -1681,7 +1762,7 @@
      * @param yoff Y offset of the region to copy in this Allocation
      * @param w Width of the region to copy
      * @param h Height of the region to copy
-     * @param array Dest Array to be copied into
+     * @param data Dest Array to be copied into
      */
     public void copy2DRangeTo(int xoff, int yoff, int w, int h, int[] data) {
         validateIsInt32();
@@ -1697,7 +1778,7 @@
      * @param yoff Y offset of the region to copy in this Allocation
      * @param w Width of the region to copy
      * @param h Height of the region to copy
-     * @param array Dest Array to be copied into
+     * @param data Dest Array to be copied into
      */
     public void copy2DRangeTo(int xoff, int yoff, int w, int h, float[] data) {
         validateIsFloat32();
@@ -1715,8 +1796,23 @@
         Trace.traceBegin(RenderScript.TRACE_TAG, "copy3DRangeToUnchecked");
         mRS.validate();
         validate3DRange(xoff, yoff, zoff, w, h, d);
+        final int dataSize = mType.mElement.getBytesSize() * w * h * d;
+        // AutoPadding for Vec3 Element
+        boolean usePadding = false;
+        int sizeBytes = arrayLen * dt.mSize;
+        if (mAutoPadding && (mType.getElement().getVectorSize() == 3)) {
+            if (dataSize / 4 * 3 > sizeBytes) {
+                throw new RSIllegalArgumentException("Array too small for allocation type.");
+            }
+            usePadding = true;
+            sizeBytes = dataSize;
+        } else {
+            if (dataSize > sizeBytes) {
+                throw new RSIllegalArgumentException("Array too small for allocation type.");
+            }
+        }
         mRS.nAllocationRead3D(getIDSafe(), xoff, yoff, zoff, mSelectedLOD, w, h, d,
-                              array, arrayLen * dt.mSize, dt);
+                              array, sizeBytes, dt, mType.mElement.mType.mSize, usePadding);
         Trace.traceEnd(RenderScript.TRACE_TAG);
     }
 
diff --git a/rs/java/android/renderscript/RenderScript.java b/rs/java/android/renderscript/RenderScript.java
index 3b40431..29004c5 100644
--- a/rs/java/android/renderscript/RenderScript.java
+++ b/rs/java/android/renderscript/RenderScript.java
@@ -485,10 +485,12 @@
     }
 
 
-    native void rsnAllocationData1D(long con, long id, int off, int mip, int count, Object d, int sizeBytes, int dt);
-    synchronized void nAllocationData1D(long id, int off, int mip, int count, Object d, int sizeBytes, Element.DataType dt) {
+    native void rsnAllocationData1D(long con, long id, int off, int mip, int count, Object d, int sizeBytes, int dt,
+                                    int mSize, boolean usePadding);
+    synchronized void nAllocationData1D(long id, int off, int mip, int count, Object d, int sizeBytes, Element.DataType dt,
+                                        int mSize, boolean usePadding) {
         validate();
-        rsnAllocationData1D(mContext, id, off, mip, count, d, sizeBytes, dt.mID);
+        rsnAllocationData1D(mContext, id, off, mip, count, d, sizeBytes, dt.mID, mSize, usePadding);
     }
 
     native void rsnAllocationElementData(long con,long id, int xoff, int yoff, int zoff, int mip, int compIdx, byte[] d, int sizeBytes);
@@ -518,11 +520,13 @@
     }
 
     native void rsnAllocationData2D(long con, long id, int xoff, int yoff, int mip, int face,
-                                    int w, int h, Object d, int sizeBytes, int dt);
+                                    int w, int h, Object d, int sizeBytes, int dt,
+                                    int mSize, boolean usePadding);
     synchronized void nAllocationData2D(long id, int xoff, int yoff, int mip, int face,
-                                        int w, int h, Object d, int sizeBytes, Element.DataType dt) {
+                                        int w, int h, Object d, int sizeBytes, Element.DataType dt,
+                                        int mSize, boolean usePadding) {
         validate();
-        rsnAllocationData2D(mContext, id, xoff, yoff, mip, face, w, h, d, sizeBytes, dt.mID);
+        rsnAllocationData2D(mContext, id, xoff, yoff, mip, face, w, h, d, sizeBytes, dt.mID, mSize, usePadding);
     }
 
     native void rsnAllocationData2D(long con, long id, int xoff, int yoff, int mip, int face, Bitmap b);
@@ -550,25 +554,28 @@
     }
 
     native void rsnAllocationData3D(long con, long id, int xoff, int yoff, int zoff, int mip,
-                                    int w, int h, int depth, Object d, int sizeBytes, int dt);
+                                    int w, int h, int depth, Object d, int sizeBytes, int dt,
+                                    int mSize, boolean usePadding);
     synchronized void nAllocationData3D(long id, int xoff, int yoff, int zoff, int mip,
-                                        int w, int h, int depth, Object d, int sizeBytes, Element.DataType dt) {
+                                        int w, int h, int depth, Object d, int sizeBytes, Element.DataType dt,
+                                        int mSize, boolean usePadding) {
         validate();
-        rsnAllocationData3D(mContext, id, xoff, yoff, zoff, mip, w, h, depth, d, sizeBytes, dt.mID);
+        rsnAllocationData3D(mContext, id, xoff, yoff, zoff, mip, w, h, depth, d, sizeBytes,
+                            dt.mID, mSize, usePadding);
     }
 
-    native void rsnAllocationRead(long con, long id, Object d, int dt);
-    synchronized void nAllocationRead(long id, Object d, Element.DataType dt) {
+    native void rsnAllocationRead(long con, long id, Object d, int dt, int mSize, boolean usePadding);
+    synchronized void nAllocationRead(long id, Object d, Element.DataType dt, int mSize, boolean usePadding) {
         validate();
-        rsnAllocationRead(mContext, id, d, dt.mID);
+        rsnAllocationRead(mContext, id, d, dt.mID, mSize, usePadding);
     }
 
     native void rsnAllocationRead1D(long con, long id, int off, int mip, int count, Object d,
-                                    int sizeBytes, int dt);
+                                    int sizeBytes, int dt, int mSize, boolean usePadding);
     synchronized void nAllocationRead1D(long id, int off, int mip, int count, Object d,
-                                        int sizeBytes, Element.DataType dt) {
+                                        int sizeBytes, Element.DataType dt, int mSize, boolean usePadding) {
         validate();
-        rsnAllocationRead1D(mContext, id, off, mip, count, d, sizeBytes, dt.mID);
+        rsnAllocationRead1D(mContext, id, off, mip, count, d, sizeBytes, dt.mID, mSize, usePadding);
     }
 
     native void rsnAllocationElementRead(long con,long id, int xoff, int yoff, int zoff,
@@ -581,19 +588,23 @@
     }
 
     native void rsnAllocationRead2D(long con, long id, int xoff, int yoff, int mip, int face,
-                                    int w, int h, Object d, int sizeBytes, int dt);
+                                    int w, int h, Object d, int sizeBytes, int dt,
+                                    int mSize, boolean usePadding);
     synchronized void nAllocationRead2D(long id, int xoff, int yoff, int mip, int face,
-                                        int w, int h, Object d, int sizeBytes, Element.DataType dt) {
+                                        int w, int h, Object d, int sizeBytes, Element.DataType dt,
+                                        int mSize, boolean usePadding) {
         validate();
-        rsnAllocationRead2D(mContext, id, xoff, yoff, mip, face, w, h, d, sizeBytes, dt.mID);
+        rsnAllocationRead2D(mContext, id, xoff, yoff, mip, face, w, h, d, sizeBytes, dt.mID, mSize, usePadding);
     }
 
     native void rsnAllocationRead3D(long con, long id, int xoff, int yoff, int zoff, int mip,
-                                    int w, int h, int depth, Object d, int sizeBytes, int dt);
+                                    int w, int h, int depth, Object d, int sizeBytes, int dt,
+                                    int mSize, boolean usePadding);
     synchronized void nAllocationRead3D(long id, int xoff, int yoff, int zoff, int mip,
-                                        int w, int h, int depth, Object d, int sizeBytes, Element.DataType dt) {
+                                        int w, int h, int depth, Object d, int sizeBytes, Element.DataType dt,
+                                        int mSize, boolean usePadding) {
         validate();
-        rsnAllocationRead3D(mContext, id, xoff, yoff, zoff, mip, w, h, depth, d, sizeBytes, dt.mID);
+        rsnAllocationRead3D(mContext, id, xoff, yoff, zoff, mip, w, h, depth, d, sizeBytes, dt.mID, mSize, usePadding);
     }
 
     native long  rsnAllocationGetType(long con, long id);
diff --git a/rs/jni/android_renderscript_RenderScript.cpp b/rs/jni/android_renderscript_RenderScript.cpp
index a145166..0bad827 100644
--- a/rs/jni/android_renderscript_RenderScript.cpp
+++ b/rs/jni/android_renderscript_RenderScript.cpp
@@ -54,10 +54,12 @@
 #define PER_ARRAY_TYPE(flag, fnc, readonly, ...) {                                      \
     jint len = 0;                                                                       \
     void *ptr = nullptr;                                                                \
+    void *srcPtr = nullptr;                                                             \
     size_t typeBytes = 0;                                                               \
     jint relFlag = 0;                                                                   \
     if (readonly) {                                                                     \
         /* The on-release mode should only be JNI_ABORT for read-only accesses. */      \
+        /* readonly = true, also indicates we are copying to the allocation   . */      \
         relFlag = JNI_ABORT;                                                            \
     }                                                                                   \
     switch(dataType) {                                                                  \
@@ -65,14 +67,50 @@
         len = _env->GetArrayLength((jfloatArray)data);                                  \
         ptr = _env->GetFloatArrayElements((jfloatArray)data, flag);                     \
         typeBytes = 4;                                                                  \
-        fnc(__VA_ARGS__);                                                               \
+        if (usePadding) {                                                               \
+            srcPtr = ptr;                                                               \
+            len = len / 3 * 4;                                                          \
+            if (count == 0) {                                                           \
+                count = len / 4;                                                        \
+            }                                                                           \
+            ptr = malloc (len * typeBytes);                                             \
+            if (readonly) {                                                             \
+                copyWithPadding(ptr, srcPtr, mSize, count);                             \
+                fnc(__VA_ARGS__);                                                       \
+            } else {                                                                    \
+                fnc(__VA_ARGS__);                                                       \
+                copyWithUnPadding(srcPtr, ptr, mSize, count);                           \
+            }                                                                           \
+            free(ptr);                                                                  \
+            ptr = srcPtr;                                                               \
+        } else {                                                                        \
+            fnc(__VA_ARGS__);                                                           \
+        }                                                                               \
         _env->ReleaseFloatArrayElements((jfloatArray)data, (jfloat *)ptr, relFlag);     \
         return;                                                                         \
     case RS_TYPE_FLOAT_64:                                                              \
         len = _env->GetArrayLength((jdoubleArray)data);                                 \
         ptr = _env->GetDoubleArrayElements((jdoubleArray)data, flag);                   \
         typeBytes = 8;                                                                  \
-        fnc(__VA_ARGS__);                                                               \
+        if (usePadding) {                                                               \
+            srcPtr = ptr;                                                               \
+            len = len / 3 * 4;                                                          \
+            if (count == 0) {                                                           \
+                count = len / 4;                                                        \
+            }                                                                           \
+            ptr = malloc (len * typeBytes);                                             \
+            if (readonly) {                                                             \
+                copyWithPadding(ptr, srcPtr, mSize, count);                             \
+                fnc(__VA_ARGS__);                                                       \
+            } else {                                                                    \
+                fnc(__VA_ARGS__);                                                       \
+                copyWithUnPadding(srcPtr, ptr, mSize, count);                           \
+            }                                                                           \
+            free(ptr);                                                                  \
+            ptr = srcPtr;                                                               \
+        } else {                                                                        \
+            fnc(__VA_ARGS__);                                                           \
+        }                                                                               \
         _env->ReleaseDoubleArrayElements((jdoubleArray)data, (jdouble *)ptr, relFlag);  \
         return;                                                                         \
     case RS_TYPE_SIGNED_8:                                                              \
@@ -80,7 +118,25 @@
         len = _env->GetArrayLength((jbyteArray)data);                                   \
         ptr = _env->GetByteArrayElements((jbyteArray)data, flag);                       \
         typeBytes = 1;                                                                  \
-        fnc(__VA_ARGS__);                                                               \
+        if (usePadding) {                                                               \
+            srcPtr = ptr;                                                               \
+            len = len / 3 * 4;                                                          \
+            if (count == 0) {                                                           \
+                count = len / 4;                                                        \
+            }                                                                           \
+            ptr = malloc (len * typeBytes);                                             \
+            if (readonly) {                                                             \
+                copyWithPadding(ptr, srcPtr, mSize, count);                             \
+                fnc(__VA_ARGS__);                                                       \
+            } else {                                                                    \
+                fnc(__VA_ARGS__);                                                       \
+                copyWithUnPadding(srcPtr, ptr, mSize, count);                           \
+            }                                                                           \
+            free(ptr);                                                                  \
+            ptr = srcPtr;                                                               \
+        } else {                                                                        \
+            fnc(__VA_ARGS__);                                                           \
+        }                                                                               \
         _env->ReleaseByteArrayElements((jbyteArray)data, (jbyte*)ptr, relFlag);         \
         return;                                                                         \
     case RS_TYPE_SIGNED_16:                                                             \
@@ -88,7 +144,25 @@
         len = _env->GetArrayLength((jshortArray)data);                                  \
         ptr = _env->GetShortArrayElements((jshortArray)data, flag);                     \
         typeBytes = 2;                                                                  \
-        fnc(__VA_ARGS__);                                                               \
+        if (usePadding) {                                                               \
+            srcPtr = ptr;                                                               \
+            len = len / 3 * 4;                                                          \
+            if (count == 0) {                                                           \
+                count = len / 4;                                                        \
+            }                                                                           \
+            ptr = malloc (len * typeBytes);                                             \
+            if (readonly) {                                                             \
+                copyWithPadding(ptr, srcPtr, mSize, count);                             \
+                fnc(__VA_ARGS__);                                                       \
+            } else {                                                                    \
+                fnc(__VA_ARGS__);                                                       \
+                copyWithUnPadding(srcPtr, ptr, mSize, count);                           \
+            }                                                                           \
+            free(ptr);                                                                  \
+            ptr = srcPtr;                                                               \
+        } else {                                                                        \
+            fnc(__VA_ARGS__);                                                           \
+        }                                                                               \
         _env->ReleaseShortArrayElements((jshortArray)data, (jshort *)ptr, relFlag);     \
         return;                                                                         \
     case RS_TYPE_SIGNED_32:                                                             \
@@ -96,7 +170,25 @@
         len = _env->GetArrayLength((jintArray)data);                                    \
         ptr = _env->GetIntArrayElements((jintArray)data, flag);                         \
         typeBytes = 4;                                                                  \
-        fnc(__VA_ARGS__);                                                               \
+        if (usePadding) {                                                               \
+            srcPtr = ptr;                                                               \
+            len = len / 3 * 4;                                                          \
+            if (count == 0) {                                                           \
+                count = len / 4;                                                        \
+            }                                                                           \
+            ptr = malloc (len * typeBytes);                                             \
+            if (readonly) {                                                             \
+                copyWithPadding(ptr, srcPtr, mSize, count);                             \
+                fnc(__VA_ARGS__);                                                       \
+            } else {                                                                    \
+                fnc(__VA_ARGS__);                                                       \
+                copyWithUnPadding(srcPtr, ptr, mSize, count);                           \
+            }                                                                           \
+            free(ptr);                                                                  \
+            ptr = srcPtr;                                                               \
+        } else {                                                                        \
+            fnc(__VA_ARGS__);                                                           \
+        }                                                                               \
         _env->ReleaseIntArrayElements((jintArray)data, (jint *)ptr, relFlag);           \
         return;                                                                         \
     case RS_TYPE_SIGNED_64:                                                             \
@@ -104,13 +196,31 @@
         len = _env->GetArrayLength((jlongArray)data);                                   \
         ptr = _env->GetLongArrayElements((jlongArray)data, flag);                       \
         typeBytes = 8;                                                                  \
-        fnc(__VA_ARGS__);                                                               \
+        if (usePadding) {                                                               \
+            srcPtr = ptr;                                                               \
+            len = len / 3 * 4;                                                          \
+            if (count == 0) {                                                           \
+                count = len / 4;                                                        \
+            }                                                                           \
+            ptr = malloc (len * typeBytes);                                             \
+            if (readonly) {                                                             \
+                copyWithPadding(ptr, srcPtr, mSize, count);                             \
+                fnc(__VA_ARGS__);                                                       \
+            } else {                                                                    \
+                fnc(__VA_ARGS__);                                                       \
+                copyWithUnPadding(srcPtr, ptr, mSize, count);                           \
+            }                                                                           \
+            free(ptr);                                                                  \
+            ptr = srcPtr;                                                               \
+        } else {                                                                        \
+            fnc(__VA_ARGS__);                                                           \
+        }                                                                               \
         _env->ReleaseLongArrayElements((jlongArray)data, (jlong *)ptr, relFlag);        \
         return;                                                                         \
     default:                                                                            \
         break;                                                                          \
     }                                                                                   \
-    UNUSED(len, ptr, typeBytes, relFlag);                                               \
+    UNUSED(len, ptr, srcPtr, typeBytes, relFlag);                                       \
 }
 
 
@@ -184,6 +294,32 @@
 
 // ---------------------------------------------------------------------------
 
+static void copyWithPadding(void* ptr, void* srcPtr, int mSize, int count) {
+    int sizeBytesPad = mSize * 4;
+    int sizeBytes = mSize * 3;
+    uint8_t *dst = static_cast<uint8_t *>(ptr);
+    uint8_t *src = static_cast<uint8_t *>(srcPtr);
+    for (int i = 0; i < count; i++) {
+        memcpy(dst, src, sizeBytes);
+        dst += sizeBytesPad;
+        src += sizeBytes;
+    }
+}
+
+static void copyWithUnPadding(void* ptr, void* srcPtr, int mSize, int count) {
+    int sizeBytesPad = mSize * 4;
+    int sizeBytes = mSize * 3;
+    uint8_t *dst = static_cast<uint8_t *>(ptr);
+    uint8_t *src = static_cast<uint8_t *>(srcPtr);
+    for (int i = 0; i < count; i++) {
+        memcpy(dst, src, sizeBytes);
+        dst += sizeBytes;
+        src += sizeBytesPad;
+    }
+}
+
+
+// ---------------------------------------------------------------------------
 static void
 nContextFinish(JNIEnv *_env, jobject _this, jlong con)
 {
@@ -1014,7 +1150,8 @@
 // Copies from the Java object data into the Allocation pointed to by _alloc.
 static void
 nAllocationData1D(JNIEnv *_env, jobject _this, jlong con, jlong _alloc, jint offset, jint lod,
-                  jint count, jobject data, jint sizeBytes, jint dataType)
+                  jint count, jobject data, jint sizeBytes, jint dataType, jint mSize,
+                  jboolean usePadding)
 {
     RsAllocation *alloc = (RsAllocation *)_alloc;
     if (kLogApi) {
@@ -1022,8 +1159,8 @@
               "dataType(%i)", (RsContext)con, (RsAllocation)alloc, offset, count, sizeBytes,
               dataType);
     }
-    PER_ARRAY_TYPE(nullptr, rsAllocation1DData, true, (RsContext)con, alloc, offset, lod, count,
-                   ptr, sizeBytes);
+    PER_ARRAY_TYPE(nullptr, rsAllocation1DData, true,
+                   (RsContext)con, alloc, offset, lod, count, ptr, sizeBytes);
 }
 
 static void
@@ -1048,7 +1185,8 @@
 // Copies from the Java object data into the Allocation pointed to by _alloc.
 static void
 nAllocationData2D(JNIEnv *_env, jobject _this, jlong con, jlong _alloc, jint xoff, jint yoff, jint lod, jint _face,
-                  jint w, jint h, jobject data, jint sizeBytes, jint dataType)
+                  jint w, jint h, jobject data, jint sizeBytes, jint dataType, jint mSize,
+                  jboolean usePadding)
 {
     RsAllocation *alloc = (RsAllocation *)_alloc;
     RsAllocationCubemapFace face = (RsAllocationCubemapFace)_face;
@@ -1056,7 +1194,9 @@
         ALOGD("nAllocation2DData, con(%p), adapter(%p), xoff(%i), yoff(%i), w(%i), h(%i), len(%i) "
               "type(%i)", (RsContext)con, alloc, xoff, yoff, w, h, sizeBytes, dataType);
     }
-    PER_ARRAY_TYPE(nullptr, rsAllocation2DData, true, (RsContext)con, alloc, xoff, yoff, lod, face, w, h, ptr, sizeBytes, 0);
+    int count = w * h;
+    PER_ARRAY_TYPE(nullptr, rsAllocation2DData, true,
+                   (RsContext)con, alloc, xoff, yoff, lod, face, w, h, ptr, sizeBytes, 0);
 }
 
 // Copies from the Allocation pointed to by srcAlloc into the Allocation
@@ -1090,7 +1230,8 @@
 // Copies from the Java object data into the Allocation pointed to by _alloc.
 static void
 nAllocationData3D(JNIEnv *_env, jobject _this, jlong con, jlong _alloc, jint xoff, jint yoff, jint zoff, jint lod,
-                    jint w, jint h, jint d, jobject data, int sizeBytes, int dataType)
+                  jint w, jint h, jint d, jobject data, jint sizeBytes, jint dataType,
+                  jint mSize, jboolean usePadding)
 {
     RsAllocation *alloc = (RsAllocation *)_alloc;
     if (kLogApi) {
@@ -1098,7 +1239,9 @@
               " h(%i), d(%i), sizeBytes(%i)", (RsContext)con, (RsAllocation)alloc, xoff, yoff, zoff,
               lod, w, h, d, sizeBytes);
     }
-    PER_ARRAY_TYPE(nullptr, rsAllocation3DData, true, (RsContext)con, alloc, xoff, yoff, zoff, lod, w, h, d, ptr, sizeBytes, 0);
+    int count = w * h * d;
+    PER_ARRAY_TYPE(nullptr, rsAllocation3DData, true,
+                   (RsContext)con, alloc, xoff, yoff, zoff, lod, w, h, d, ptr, sizeBytes, 0);
 }
 
 // Copies from the Allocation pointed to by srcAlloc into the Allocation
@@ -1130,26 +1273,31 @@
 
 // Copies from the Allocation pointed to by _alloc into the Java object data.
 static void
-nAllocationRead(JNIEnv *_env, jobject _this, jlong con, jlong _alloc, jobject data, int dataType)
+nAllocationRead(JNIEnv *_env, jobject _this, jlong con, jlong _alloc, jobject data, jint dataType,
+                jint mSize, jboolean usePadding)
 {
     RsAllocation *alloc = (RsAllocation *)_alloc;
     if (kLogApi) {
         ALOGD("nAllocationRead, con(%p), alloc(%p)", (RsContext)con, (RsAllocation)alloc);
     }
-    PER_ARRAY_TYPE(0, rsAllocationRead, false, (RsContext)con, alloc, ptr, len * typeBytes);
+    int count = 0;
+    PER_ARRAY_TYPE(0, rsAllocationRead, false,
+                   (RsContext)con, alloc, ptr, len * typeBytes);
 }
 
 // Copies from the Allocation pointed to by _alloc into the Java object data.
 static void
 nAllocationRead1D(JNIEnv *_env, jobject _this, jlong con, jlong _alloc, jint offset, jint lod,
-                  jint count, jobject data, int sizeBytes, int dataType)
+                  jint count, jobject data, jint sizeBytes, jint dataType,
+                  jint mSize, jboolean usePadding)
 {
     RsAllocation *alloc = (RsAllocation *)_alloc;
     if (kLogApi) {
         ALOGD("nAllocation1DRead, con(%p), adapter(%p), offset(%i), count(%i), sizeBytes(%i), "
               "dataType(%i)", (RsContext)con, alloc, offset, count, sizeBytes, dataType);
     }
-    PER_ARRAY_TYPE(0, rsAllocation1DRead, false, (RsContext)con, alloc, offset, lod, count, ptr, sizeBytes);
+    PER_ARRAY_TYPE(0, rsAllocation1DRead, false,
+                   (RsContext)con, alloc, offset, lod, count, ptr, sizeBytes);
 }
 
 // Copies from the Element in the Allocation pointed to by _alloc into the Java array data.
@@ -1170,7 +1318,8 @@
 // Copies from the Allocation pointed to by _alloc into the Java object data.
 static void
 nAllocationRead2D(JNIEnv *_env, jobject _this, jlong con, jlong _alloc, jint xoff, jint yoff, jint lod, jint _face,
-                  jint w, jint h, jobject data, int sizeBytes, int dataType)
+                  jint w, jint h, jobject data, jint sizeBytes, jint dataType,
+                  jint mSize, jboolean usePadding)
 {
     RsAllocation *alloc = (RsAllocation *)_alloc;
     RsAllocationCubemapFace face = (RsAllocationCubemapFace)_face;
@@ -1178,13 +1327,16 @@
         ALOGD("nAllocation2DRead, con(%p), adapter(%p), xoff(%i), yoff(%i), w(%i), h(%i), len(%i) "
               "type(%i)", (RsContext)con, alloc, xoff, yoff, w, h, sizeBytes, dataType);
     }
-    PER_ARRAY_TYPE(0, rsAllocation2DRead, false, (RsContext)con, alloc, xoff, yoff, lod, face, w, h,
-                   ptr, sizeBytes, 0);
+    int count = w * h;
+    PER_ARRAY_TYPE(0, rsAllocation2DRead, false,
+                   (RsContext)con, alloc, xoff, yoff, lod, face, w, h, ptr, sizeBytes, 0);
 }
+
 // Copies from the Allocation pointed to by _alloc into the Java object data.
 static void
 nAllocationRead3D(JNIEnv *_env, jobject _this, jlong con, jlong _alloc, jint xoff, jint yoff, jint zoff, jint lod,
-                  jint w, jint h, jint d, jobject data, int sizeBytes, int dataType)
+                  jint w, jint h, jint d, jobject data, int sizeBytes, int dataType,
+                  jint mSize, jboolean usePadding)
 {
     RsAllocation *alloc = (RsAllocation *)_alloc;
     if (kLogApi) {
@@ -1192,8 +1344,9 @@
               " h(%i), d(%i), sizeBytes(%i)", (RsContext)con, (RsAllocation)alloc, xoff, yoff, zoff,
               lod, w, h, d, sizeBytes);
     }
-    PER_ARRAY_TYPE(0, rsAllocation3DRead, false, (RsContext)con, alloc, xoff, yoff, zoff, lod, w, h, d,
-                   ptr, sizeBytes, 0);
+    int count = w * h * d;
+    PER_ARRAY_TYPE(nullptr, rsAllocation3DRead, false,
+                   (RsContext)con, alloc, xoff, yoff, zoff, lod, w, h, d, ptr, sizeBytes, 0);
 }
 
 static jlong
@@ -2214,17 +2367,17 @@
 {"rsnAllocationSetSurface",          "(JJLandroid/view/Surface;)V",           (void*)nAllocationSetSurface },
 {"rsnAllocationIoSend",              "(JJ)V",                                 (void*)nAllocationIoSend },
 {"rsnAllocationIoReceive",           "(JJ)V",                                 (void*)nAllocationIoReceive },
-{"rsnAllocationData1D",              "(JJIIILjava/lang/Object;II)V",          (void*)nAllocationData1D },
+{"rsnAllocationData1D",              "(JJIIILjava/lang/Object;IIIZ)V",        (void*)nAllocationData1D },
 {"rsnAllocationElementData",         "(JJIIIII[BI)V",                         (void*)nAllocationElementData },
-{"rsnAllocationData2D",              "(JJIIIIIILjava/lang/Object;II)V",       (void*)nAllocationData2D },
+{"rsnAllocationData2D",              "(JJIIIIIILjava/lang/Object;IIIZ)V",     (void*)nAllocationData2D },
 {"rsnAllocationData2D",              "(JJIIIIIIJIIII)V",                      (void*)nAllocationData2D_alloc },
-{"rsnAllocationData3D",              "(JJIIIIIIILjava/lang/Object;II)V",      (void*)nAllocationData3D },
+{"rsnAllocationData3D",              "(JJIIIIIIILjava/lang/Object;IIIZ)V",    (void*)nAllocationData3D },
 {"rsnAllocationData3D",              "(JJIIIIIIIJIIII)V",                     (void*)nAllocationData3D_alloc },
-{"rsnAllocationRead",                "(JJLjava/lang/Object;I)V",              (void*)nAllocationRead },
-{"rsnAllocationRead1D",              "(JJIIILjava/lang/Object;II)V",          (void*)nAllocationRead1D },
+{"rsnAllocationRead",                "(JJLjava/lang/Object;IIZ)V",            (void*)nAllocationRead },
+{"rsnAllocationRead1D",              "(JJIIILjava/lang/Object;IIIZ)V",        (void*)nAllocationRead1D },
 {"rsnAllocationElementRead",         "(JJIIIIILjava/lang/Object;II)V",        (void*)nAllocationElementRead },
-{"rsnAllocationRead2D",              "(JJIIIIIILjava/lang/Object;II)V",       (void*)nAllocationRead2D },
-{"rsnAllocationRead3D",              "(JJIIIIIIILjava/lang/Object;II)V",      (void*)nAllocationRead3D },
+{"rsnAllocationRead2D",              "(JJIIIIIILjava/lang/Object;IIIZ)V",     (void*)nAllocationRead2D },
+{"rsnAllocationRead3D",              "(JJIIIIIIILjava/lang/Object;IIIZ)V",    (void*)nAllocationRead3D },
 {"rsnAllocationGetType",             "(JJ)J",                                 (void*)nAllocationGetType},
 {"rsnAllocationResize1D",            "(JJI)V",                                (void*)nAllocationResize1D },
 {"rsnAllocationGenerateMipmaps",     "(JJ)V",                                 (void*)nAllocationGenerateMipmaps },