Merge "Fixing asynchronous performance issues."
diff --git a/rsContext.cpp b/rsContext.cpp
index 8b6ca31..f65dd47 100644
--- a/rsContext.cpp
+++ b/rsContext.cpp
@@ -246,20 +246,32 @@
 
     rsc->mRunning = true;
     bool mDraw = true;
+    bool doWait = true;
+
+    uint64_t targetTime = rsc->getTime();
     while (!rsc->mExit) {
-        mDraw |= rsc->mIO.playCoreCommands(rsc, !mDraw);
+        uint64_t waitTime = 0;
+        uint64_t now = rsc->getTime();
+        if (now < targetTime) {
+            waitTime = targetTime - now;
+        } else {
+            doWait = false;
+        }
+
+        mDraw |= rsc->mIO.playCoreCommands(rsc, doWait, waitTime);
         mDraw &= (rsc->mRootScript.get() != NULL);
         mDraw &= rsc->mHasSurface;
 
-        uint32_t targetTime = 0;
         if (mDraw && rsc->mIsGraphicsContext) {
-            targetTime = rsc->runRootScript();
+            uint64_t delay = rsc->runRootScript() * 1000000;
+            targetTime = rsc->getTime() + delay;
+            doWait = delay != 0;
 
             if (rsc->props.mLogVisual) {
                 rsc->displayDebugStats();
             }
 
-            mDraw = targetTime && !rsc->mPaused;
+            mDraw = !rsc->mPaused;
             rsc->timerSet(RS_TIMER_CLEAR_SWAP);
             rsc->mHal.funcs.swap(rsc);
             rsc->timerFrame();
@@ -267,12 +279,6 @@
             rsc->timerPrint();
             rsc->timerReset();
         }
-        if (targetTime > 1) {
-            int32_t t = (targetTime - (int32_t)(rsc->mTimeMSLastScript + rsc->mTimeMSLastSwap)) * 1000;
-            if (t > 0) {
-                usleep(t);
-            }
-        }
     }
 
     LOGV("%p, RS Thread exiting", rsc);
diff --git a/rsFont.cpp b/rsFont.cpp
index 63a9ee6..7efed9d 100644
--- a/rsFont.cpp
+++ b/rsFont.cpp
@@ -733,7 +733,7 @@
     Font *currentFont = mRSC->getFont();
     if (!currentFont) {
         if (!mDefault.get()) {
-            String8 fontsDir("/fonts/DroidSans.ttf");
+            String8 fontsDir("/fonts/Roboto-Regular.ttf");
             String8 fullPath(getenv("ANDROID_ROOT"));
             fullPath += fontsDir;
 
diff --git a/rsLocklessFifo.cpp b/rsLocklessFifo.cpp
index 7023a1f..02a76ab 100644
--- a/rsLocklessFifo.cpp
+++ b/rsLocklessFifo.cpp
@@ -129,21 +129,23 @@
     //dumpState("flush 2");
 }
 
-void LocklessCommandFifo::wait() {
+bool LocklessCommandFifo::wait(uint64_t timeout) {
     while (isEmpty() && !mInShutdown) {
         mSignalToControl.set();
-        mSignalToWorker.wait();
+        return mSignalToWorker.wait(timeout);
     }
+    return true;
 }
 
-const void * LocklessCommandFifo::get(uint32_t *command, uint32_t *bytesData) {
+const void * LocklessCommandFifo::get(uint32_t *command, uint32_t *bytesData, uint64_t timeout) {
     while (1) {
         //dumpState("get");
-        wait();
-        if (mInShutdown) {
+        wait(timeout);
+
+        if (isEmpty() || mInShutdown) {
             *command = 0;
             *bytesData = 0;
-            return 0;
+            return NULL;
         }
 
         *command = reinterpret_cast<const uint16_t *>(mGet)[0];
diff --git a/rsLocklessFifo.h b/rsLocklessFifo.h
index eabdc3e..4962ef6 100644
--- a/rsLocklessFifo.h
+++ b/rsLocklessFifo.h
@@ -57,9 +57,9 @@
     void commitSync(uint32_t command, uint32_t bytes);
 
     void flush();
-    void wait();
+    bool wait(uint64_t timeout = 0);
 
-    const void * get(uint32_t *command, uint32_t *bytesData);
+    const void * get(uint32_t *command, uint32_t *bytesData, uint64_t timeout = 0);
     void next();
 
     void makeSpace(uint32_t bytes);
diff --git a/rsSignal.cpp b/rsSignal.cpp
index ccd20b9..413ac2b 100644
--- a/rsSignal.cpp
+++ b/rsSignal.cpp
@@ -68,26 +68,43 @@
     }
 }
 
-void Signal::wait() {
+bool Signal::wait(uint64_t timeout) {
     int status;
+    bool ret = false;
 
     status = pthread_mutex_lock(&mMutex);
     if (status) {
         LOGE("LocklessCommandFifo: error %i locking for condition.", status);
-        return;
+        return false;
     }
 
     if (!mSet) {
-        status = pthread_cond_wait(&mCondition, &mMutex);
-        if (status) {
-            LOGE("LocklessCommandFifo: error %i waiting on condition.", status);
+        if (!timeout) {
+            status = pthread_cond_wait(&mCondition, &mMutex);
+        } else {
+#if defined(HAVE_PTHREAD_COND_TIMEDWAIT_RELATIVE)
+            status = pthread_cond_timeout_np(&mCondition, &mMutex, timeout / 1000000);
+#else
+            // This is safe it will just make things less reponsive
+            status = pthread_cond_wait(&mCondition, &mMutex);
+#endif
         }
     }
-    mSet = false;
+
+    if (!status) {
+        mSet = false;
+        ret = true;
+    } else {
+        if (status != ETIMEDOUT) {
+            LOGE("LocklessCommandFifo: error %i waiting for condition.", status);
+        }
+    }
 
     status = pthread_mutex_unlock(&mMutex);
     if (status) {
         LOGE("LocklessCommandFifo: error %i unlocking for condition.", status);
     }
+
+    return ret;
 }
 
diff --git a/rsSignal.h b/rsSignal.h
index 2e760f1..fc31883 100644
--- a/rsSignal.h
+++ b/rsSignal.h
@@ -31,7 +31,10 @@
     bool init();
 
     void set();
-    void wait();
+
+    // returns true if the signal occured
+    // false for timeout
+    bool wait(uint64_t timeout = 0);
 
 protected:
     bool mSet;
diff --git a/rsThreadIO.cpp b/rsThreadIO.cpp
index 1c8b89c..fe2c52e 100644
--- a/rsThreadIO.cpp
+++ b/rsThreadIO.cpp
@@ -113,8 +113,10 @@
 }
 
 
-bool ThreadIO::playCoreCommands(Context *con, bool waitForCommand) {
+bool ThreadIO::playCoreCommands(Context *con, bool waitForCommand, uint64_t timeToWait) {
     bool ret = false;
+    uint64_t startTime = con->getTime();
+
     while (!mToCore.isEmpty() || waitForCommand) {
         uint32_t cmdID = 0;
         uint32_t cmdSize = 0;
@@ -122,9 +124,17 @@
         if (con->props.mLogTimes) {
             con->timerSet(Context::RS_TIMER_IDLE);
         }
-        const void * data = mToCore.get(&cmdID, &cmdSize);
+
+        uint64_t delay = 0;
+        if (waitForCommand) {
+            delay = timeToWait - (con->getTime() - startTime);
+            if (delay > timeToWait) {
+                delay = 0;
+            }
+        }
+        const void * data = mToCore.get(&cmdID, &cmdSize, delay);
         if (!cmdSize) {
-            // exception occured, probably shutdown.
+            // exception or timeout occurred.
             return false;
         }
         if (con->props.mLogTimes) {
diff --git a/rsThreadIO.h b/rsThreadIO.h
index cad7318..9036118 100644
--- a/rsThreadIO.h
+++ b/rsThreadIO.h
@@ -37,7 +37,7 @@
 
     // Plays back commands from the client.
     // Returns true if any commands were processed.
-    bool playCoreCommands(Context *con, bool waitForCommand);
+    bool playCoreCommands(Context *con, bool waitForCommand, uint64_t timeToWait);
 
     //LocklessCommandFifo mToCore;
 
diff --git a/scriptc/rs_allocation.rsh b/scriptc/rs_allocation.rsh
new file mode 100644
index 0000000..1e755cd
--- /dev/null
+++ b/scriptc/rs_allocation.rsh
@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** @file rs_allocation.rsh
+ *  \brief Allocation routines
+ *
+ *
+ */
+
+#ifndef __RS_ALLOCATION_RSH__
+#define __RS_ALLOCATION_RSH__
+
+/**
+ * Returns the Allocation for a given pointer.  The pointer should point within
+ * a valid allocation.  The results are undefined if the pointer is not from a
+ * valid allocation.
+ */
+extern rs_allocation __attribute__((overloadable))
+    rsGetAllocation(const void *);
+
+/**
+ * Query the dimension of an allocation.
+ *
+ * @return uint32_t The X dimension of the allocation.
+ */
+extern uint32_t __attribute__((overloadable))
+    rsAllocationGetDimX(rs_allocation);
+
+/**
+ * Query the dimension of an allocation.
+ *
+ * @return uint32_t The Y dimension of the allocation.
+ */
+extern uint32_t __attribute__((overloadable))
+    rsAllocationGetDimY(rs_allocation);
+
+/**
+ * Query the dimension of an allocation.
+ *
+ * @return uint32_t The Z dimension of the allocation.
+ */
+extern uint32_t __attribute__((overloadable))
+    rsAllocationGetDimZ(rs_allocation);
+
+/**
+ * Query an allocation for the presence of more than one LOD.
+ *
+ * @return uint32_t Returns 1 if more than one LOD is present, 0 otherwise.
+ */
+extern uint32_t __attribute__((overloadable))
+    rsAllocationGetDimLOD(rs_allocation);
+
+/**
+ * Query an allocation for the presence of more than one face.
+ *
+ * @return uint32_t Returns 1 if more than one face is present, 0 otherwise.
+ */
+extern uint32_t __attribute__((overloadable))
+    rsAllocationGetDimFaces(rs_allocation);
+
+/**
+ * Copy part of an allocation from another allocation.
+ *
+ * @param dstAlloc Allocation to copy data into.
+ * @param dstOff The offset of the first element to be copied in
+ *               the destination allocation.
+ * @param dstMip Mip level in the destination allocation.
+ * @param count The number of elements to be copied.
+ * @param srcAlloc The source data allocation.
+ * @param srcOff The offset of the first element in data to be
+ *               copied in the source allocation.
+ * @param srcMip Mip level in the source allocation.
+ */
+extern void __attribute__((overloadable))
+    rsAllocationCopy1DRange(rs_allocation dstAlloc,
+                            uint32_t dstOff, uint32_t dstMip,
+                            uint32_t count,
+                            rs_allocation srcAlloc,
+                            uint32_t srcOff, uint32_t srcMip);
+
+/**
+ * Copy a rectangular region into the allocation from another
+ * allocation.
+ *
+ * @param dstAlloc allocation to copy data into.
+ * @param dstXoff X offset of the region to update in the
+ *                destination allocation.
+ * @param dstYoff Y offset of the region to update in the
+ *                destination allocation.
+ * @param dstMip Mip level in the destination allocation.
+ * @param dstFace Cubemap face of the destination allocation,
+ *                ignored for allocations that aren't cubemaps.
+ * @param width Width of the incoming region to update.
+ * @param height Height of the incoming region to update.
+ * @param srcAlloc The source data allocation.
+ * @param srcXoff X offset in data of the source allocation.
+ * @param srcYoff Y offset in data of the source allocation.
+ * @param srcMip Mip level in the source allocation.
+ * @param srcFace Cubemap face of the source allocation,
+ *                ignored for allocations that aren't cubemaps.
+ */
+extern void __attribute__((overloadable))
+    rsAllocationCopy2DRange(rs_allocation dstAlloc,
+                            uint32_t dstXoff, uint32_t dstYoff,
+                            uint32_t dstMip,
+                            rs_allocation_cubemap_face dstFace,
+                            uint32_t width, uint32_t height,
+                            rs_allocation srcAlloc,
+                            uint32_t srcXoff, uint32_t srcYoff,
+                            uint32_t srcMip,
+                            rs_allocation_cubemap_face srcFace);
+
+
+/**
+ * Extract a single element from an allocation.
+ */
+extern const void * __attribute__((overloadable))
+    rsGetElementAt(rs_allocation, uint32_t x);
+/**
+ * \overload
+ */
+extern const void * __attribute__((overloadable))
+    rsGetElementAt(rs_allocation, uint32_t x, uint32_t y);
+/**
+ * \overload
+ */
+extern const void * __attribute__((overloadable))
+    rsGetElementAt(rs_allocation, uint32_t x, uint32_t y, uint32_t z);
+
+#endif
+
diff --git a/scriptc/rs_atomic.rsh b/scriptc/rs_atomic.rsh
new file mode 100644
index 0000000..95513ad
--- /dev/null
+++ b/scriptc/rs_atomic.rsh
@@ -0,0 +1,248 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** @file rs_atomic.rsh
+ *  \brief Atomic routines
+ *
+ *
+ */
+
+#ifndef __RS_ATOMIC_RSH__
+#define __RS_ATOMIC_RSH__
+
+
+/**
+ * Atomic add one to the value at addr.
+ * Equal to rsAtomicAdd(addr, 1)
+ *
+ * @param addr Address of value to increment
+ *
+ * @return old value
+ */
+extern int32_t __attribute__((overloadable))
+    rsAtomicInc(volatile int32_t* addr);
+/**
+ * Atomic add one to the value at addr.
+ * Equal to rsAtomicAdd(addr, 1)
+ *
+ * @param addr Address of value to increment
+ *
+ * @return old value
+ */
+extern uint32_t __attribute__((overloadable))
+    rsAtomicInc(volatile uint32_t* addr);
+
+/**
+ * Atomic subtract one from the value at addr. Equal to rsAtomicSub(addr, 1)
+ *
+ * @param addr Address of value to decrement
+ *
+ * @return old value
+ */
+extern int32_t __attribute__((overloadable))
+    rsAtomicDec(volatile int32_t* addr);
+/**
+ * Atomic subtract one from the value at addr. Equal to rsAtomicSub(addr, 1)
+ *
+ * @param addr Address of value to decrement
+ *
+ * @return old value
+ */
+extern uint32_t __attribute__((overloadable))
+    rsAtomicDec(volatile uint32_t* addr);
+
+/**
+ * Atomic add a value to the value at addr.  addr[0] += value
+ *
+ * @param addr Address of value to modify
+ * @param value Amount to add to the value at addr
+ *
+ * @return old value
+ */
+extern int32_t __attribute__((overloadable))
+    rsAtomicAdd(volatile int32_t* addr, int32_t value);
+/**
+ * Atomic add a value to the value at addr.  addr[0] += value
+ *
+ * @param addr Address of value to modify
+ * @param value Amount to add to the value at addr
+ *
+ * @return old value
+ */
+extern uint32_t __attribute__((overloadable))
+    rsAtomicAdd(volatile uint32_t* addr, uint32_t value);
+
+/**
+ * Atomic Subtract a value from the value at addr.  addr[0] -= value
+ *
+ * @param addr Address of value to modify
+ * @param value Amount to subtract from the value at addr
+ *
+ * @return old value
+ */
+extern int32_t __attribute__((overloadable))
+    rsAtomicSub(volatile int32_t* addr, int32_t value);
+/**
+ * Atomic Subtract a value from the value at addr.  addr[0] -= value
+ *
+ * @param addr Address of value to modify
+ * @param value Amount to subtract from the value at addr
+ *
+ * @return old value
+ */
+extern uint32_t __attribute__((overloadable))
+    rsAtomicSub(volatile uint32_t* addr, uint32_t value);
+
+/**
+ * Atomic Bitwise and a value from the value at addr.  addr[0] &= value
+ *
+ * @param addr Address of value to modify
+ * @param value Amount to and with the value at addr
+ *
+ * @return old value
+ */
+extern int32_t __attribute__((overloadable))
+    rsAtomicAnd(volatile int32_t* addr, int32_t value);
+/**
+ * Atomic Bitwise and a value from the value at addr.  addr[0] &= value
+ *
+ * @param addr Address of value to modify
+ * @param value Amount to and with the value at addr
+ *
+ * @return old value
+ */
+extern uint32_t __attribute__((overloadable))
+    rsAtomicAnd(volatile uint32_t* addr, uint32_t value);
+
+/**
+ * Atomic Bitwise or a value from the value at addr.  addr[0] |= value
+ *
+ * @param addr Address of value to modify
+ * @param value Amount to or with the value at addr
+ *
+ * @return old value
+ */
+extern int32_t __attribute__((overloadable))
+    rsAtomicOr(volatile int32_t* addr, int32_t value);
+/**
+ * Atomic Bitwise or a value from the value at addr.  addr[0] |= value
+ *
+ * @param addr Address of value to modify
+ * @param value Amount to or with the value at addr
+ *
+ * @return old value
+ */
+extern uint32_t __attribute__((overloadable))
+    rsAtomicOr(volatile uint32_t* addr, uint32_t value);
+
+/**
+ * Atomic Bitwise xor a value from the value at addr.  addr[0] ^= value
+ *
+ * @param addr Address of value to modify
+ * @param value Amount to xor with the value at addr
+ *
+ * @return old value
+ */
+extern uint32_t __attribute__((overloadable))
+    rsAtomicXor(volatile uint32_t* addr, uint32_t value);
+/**
+ * Atomic Bitwise xor a value from the value at addr.  addr[0] ^= value
+ *
+ * @param addr Address of value to modify
+ * @param value Amount to xor with the value at addr
+ *
+ * @return old value
+ */
+extern int32_t __attribute__((overloadable))
+    rsAtomicXor(volatile int32_t* addr, int32_t value);
+
+/**
+ * Atomic Set the value at addr to the min of addr and value
+ * addr[0] = rsMin(addr[0], value)
+ *
+ * @param addr Address of value to modify
+ * @param value comparison value
+ *
+ * @return old value
+ */
+extern uint32_t __attribute__((overloadable))
+    rsAtomicMin(volatile uint32_t* addr, uint32_t value);
+/**
+ * Atomic Set the value at addr to the min of addr and value
+ * addr[0] = rsMin(addr[0], value)
+ *
+ * @param addr Address of value to modify
+ * @param value comparison value
+ *
+ * @return old value
+ */
+extern int32_t __attribute__((overloadable))
+    rsAtomicMin(volatile int32_t* addr, int32_t value);
+
+/**
+ * Atomic Set the value at addr to the max of addr and value
+ * addr[0] = rsMax(addr[0], value)
+ *
+ * @param addr Address of value to modify
+ * @param value comparison value
+ *
+ * @return old value
+ */
+extern uint32_t __attribute__((overloadable))
+    rsAtomicMax(volatile uint32_t* addr, uint32_t value);
+/**
+ * Atomic Set the value at addr to the max of addr and value
+ * addr[0] = rsMin(addr[0], value)
+ *
+ * @param addr Address of value to modify
+ * @param value comparison value
+ *
+ * @return old value
+ */
+extern int32_t __attribute__((overloadable))
+    rsAtomicMax(volatile int32_t* addr, int32_t value);
+
+/**
+ * Compare-and-set operation with a full memory barrier.
+ *
+ * If the value at addr matches compareValue then newValue is written.
+ *
+ * @param addr The address to compare and replace if the compare passes.
+ * @param compareValue The value to test addr[0] against.
+ * @param newValue The value to write if the test passes.
+ *
+ * @return old value
+ */
+extern int32_t __attribute__((overloadable))
+    rsAtomicCas(volatile int32_t* addr, int32_t compareValue, int32_t newValue);
+
+/**
+ * Compare-and-set operation with a full memory barrier.
+ *
+ * If the value at addr matches compareValue then newValue is written.
+ *
+ * @param addr The address to compare and replace if the compare passes.
+ * @param compareValue The value to test addr[0] against.
+ * @param newValue The value to write if the test passes.
+ *
+ * @return old value
+ */
+extern uint32_t __attribute__((overloadable))
+    rsAtomicCas(volatile uint32_t* addr, int32_t compareValue, int32_t newValue);
+
+
+#endif
+
diff --git a/scriptc/rs_cl.rsh b/scriptc/rs_cl.rsh
index d78e62e..e402b86 100644
--- a/scriptc/rs_cl.rsh
+++ b/scriptc/rs_cl.rsh
@@ -1,8 +1,28 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** @file rs_cl.rsh
+ *  \brief Additional compute routines
+ *
+ *
+ */
+
 #ifndef __RS_CL_RSH__
 #define __RS_CL_RSH__
 
-#define _RS_RUNTIME  extern
-
 // Conversions
 #define CVT_FUNC_2(typeout, typein)                             \
 _RS_RUNTIME typeout##2 __attribute__((overloadable))             \
@@ -444,6 +464,5 @@
 #undef IN_FUNC_IN
 #undef XN_FUNC_XN_XN_BODY
 #undef IN_FUNC_IN_IN_BODY
-#undef _RS_RUNTIME
 
 #endif
diff --git a/scriptc/rs_core.rsh b/scriptc/rs_core.rsh
index 1583090..be900cb 100644
--- a/scriptc/rs_core.rsh
+++ b/scriptc/rs_core.rsh
@@ -1,889 +1,166 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 /** @file rs_core.rsh
  *  \brief todo-jsams
  *
  *  todo-jsams
  *
  */
+
 #ifndef __RS_CORE_RSH__
 #define __RS_CORE_RSH__
 
 #define _RS_RUNTIME extern
 
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, float);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, float, float);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, float, float, float);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, float, float, float, float);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, double);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, const rs_matrix4x4 *);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, const rs_matrix3x3 *);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, const rs_matrix2x2 *);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, int);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, uint);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, long);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, unsigned long);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, long long);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, unsigned long long);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, const void *);
-#define RS_DEBUG(a) rsDebug(#a, a)
-#define RS_DEBUG_MARKER rsDebug(__FILE__, __LINE__)
+#include "rs_types.rsh"
+#include "rs_allocation.rsh"
+#include "rs_atomic.rsh"
+#include "rs_cl.rsh"
+#include "rs_debug.rsh"
+#include "rs_math.rsh"
+#include "rs_matrix.rsh"
+#include "rs_object.rsh"
+#include "rs_quaternion.rsh"
+#include "rs_time.rsh"
+
 
 
 /**
- * Debug function.  Prints a string and value to the log.
+ * Send a message back to the client.  Will not block and returns true
+ * if the message was sendable and false if the fifo was full.
+ * A message ID is required.  Data payload is optional.
  */
-_RS_RUNTIME void __attribute__((overloadable)) rsDebug(const char *s, float2 v);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-_RS_RUNTIME void __attribute__((overloadable)) rsDebug(const char *s, float3 v);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-_RS_RUNTIME void __attribute__((overloadable)) rsDebug(const char *s, float4 v);
-
-
-/**
- * Pack floating point (0-1) RGB values into a uchar4.  The alpha component is
- * set to 255 (1.0).
- *
- * @param r
- * @param g
- * @param b
- *
- * @return uchar4
- */
-_RS_RUNTIME uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b);
-
-/**
- * Pack floating point (0-1) RGBA values into a uchar4.
- *
- * @param r
- * @param g
- * @param b
- * @param a
- *
- * @return uchar4
- */
-_RS_RUNTIME uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b, float a);
-
-/**
- * Pack floating point (0-1) RGB values into a uchar4.  The alpha component is
- * set to 255 (1.0).
- *
- * @param color
- *
- * @return uchar4
- */
-_RS_RUNTIME uchar4 __attribute__((overloadable)) rsPackColorTo8888(float3 color);
-
-/**
- * Pack floating point (0-1) RGBA values into a uchar4.
- *
- * @param color
- *
- * @return uchar4
- */
-_RS_RUNTIME uchar4 __attribute__((overloadable)) rsPackColorTo8888(float4 color);
-
-/**
- * Unpack a uchar4 color to float4.  The resulting float range will be (0-1).
- *
- * @param c
- *
- * @return float4
- */
-_RS_RUNTIME float4 rsUnpackColor8888(uchar4 c);
-
-
-/////////////////////////////////////////////////////
-// Matrix ops
-/////////////////////////////////////////////////////
-
-/**
- * Set one element of a matrix.
- *
- * @param m The matrix to be set
- * @param row
- * @param col
- * @param v
- *
- * @return void
- */
-_RS_RUNTIME void __attribute__((overloadable))
-rsMatrixSet(rs_matrix4x4 *m, uint32_t row, uint32_t col, float v);
+extern bool __attribute__((overloadable))
+    rsSendToClient(int cmdID);
 /**
  * \overload
  */
-_RS_RUNTIME void __attribute__((overloadable))
-rsMatrixSet(rs_matrix3x3 *m, uint32_t row, uint32_t col, float v);
+extern bool __attribute__((overloadable))
+    rsSendToClient(int cmdID, const void *data, uint len);
 /**
- * \overload
- */
-_RS_RUNTIME void __attribute__((overloadable))
-rsMatrixSet(rs_matrix2x2 *m, uint32_t row, uint32_t col, float v);
-
-/**
- * Get one element of a matrix.
- *
- * @param m The matrix to read from
- * @param row
- * @param col
- *
- * @return float
- */
-_RS_RUNTIME float __attribute__((overloadable))
-rsMatrixGet(const rs_matrix4x4 *m, uint32_t row, uint32_t col);
-/**
- * \overload
- */
-_RS_RUNTIME float __attribute__((overloadable))
-rsMatrixGet(const rs_matrix3x3 *m, uint32_t row, uint32_t col);
-/**
- * \overload
- */
-_RS_RUNTIME float __attribute__((overloadable))
-rsMatrixGet(const rs_matrix2x2 *m, uint32_t row, uint32_t col);
-
-/**
- * Set the elements of a matrix to the identity matrix.
- *
- * @param m
- */
-extern void __attribute__((overloadable)) rsMatrixLoadIdentity(rs_matrix4x4 *m);
-/**
- * \overload
- */
-extern void __attribute__((overloadable)) rsMatrixLoadIdentity(rs_matrix3x3 *m);
-/**
- * \overload
- */
-extern void __attribute__((overloadable)) rsMatrixLoadIdentity(rs_matrix2x2 *m);
-
-/**
- * Set the elements of a matrix from an array of floats.
- *
- * @param m
- */
-extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix4x4 *m, const float *v);
-/**
- * \overload
- */
-extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix3x3 *m, const float *v);
-/**
- * \overload
- */
-extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix2x2 *m, const float *v);
-/**
- * \overload
- */
-extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix4x4 *m, const rs_matrix4x4 *v);
-/**
- * \overload
- */
-extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix4x4 *m, const rs_matrix3x3 *v);
-
-/**
- * Set the elements of a matrix from another matrix.
- *
- * @param m
- */
-extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix4x4 *m, const rs_matrix2x2 *v);
-/**
- * \overload
- */
-extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix3x3 *m, const rs_matrix3x3 *v);
-/**
- * \overload
- */
-extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix2x2 *m, const rs_matrix2x2 *v);
-
-/**
- * Load a rotation matrix.
- *
- * @param m
- * @param rot
- * @param x
- * @param y
- * @param z
+ * Send a message back to the client, blocking until the message is queued.
+ * A message ID is required.  Data payload is optional.
  */
 extern void __attribute__((overloadable))
-rsMatrixLoadRotate(rs_matrix4x4 *m, float rot, float x, float y, float z);
-
-/**
- * Load a scale matrix.
- *
- * @param m
- * @param x
- * @param y
- * @param z
- */
-extern void __attribute__((overloadable))
-rsMatrixLoadScale(rs_matrix4x4 *m, float x, float y, float z);
-
-/**
- * Load a translation matrix.
- *
- * @param m
- * @param x
- * @param y
- * @param z
- */
-extern void __attribute__((overloadable))
-rsMatrixLoadTranslate(rs_matrix4x4 *m, float x, float y, float z);
-
-/**
- * Multiply two matrix (lhs, rhs) and place the result in m.
- *
- * @param m
- * @param lhs
- * @param rhs
- */
-extern void __attribute__((overloadable))
-rsMatrixLoadMultiply(rs_matrix4x4 *m, const rs_matrix4x4 *lhs, const rs_matrix4x4 *rhs);
+    rsSendToClientBlocking(int cmdID);
 /**
  * \overload
  */
 extern void __attribute__((overloadable))
-rsMatrixLoadMultiply(rs_matrix3x3 *m, const rs_matrix3x3 *lhs, const rs_matrix3x3 *rhs);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-rsMatrixLoadMultiply(rs_matrix2x2 *m, const rs_matrix2x2 *lhs, const rs_matrix2x2 *rhs);
+    rsSendToClientBlocking(int cmdID, const void *data, uint len);
+
 
 /**
- * Multiply the matrix m by rhs and place the result back into m.
+ * Launch order hint for rsForEach calls.  This provides a hint to the system to
+ * determine in which order the root function of the target is called with each
+ * cell of the allocation.
  *
- * @param m (lhs)
- * @param rhs
+ * This is a hint and implementations may not obey the order.
  */
-extern void __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix4x4 *m, const rs_matrix4x4 *rhs);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix3x3 *m, const rs_matrix3x3 *rhs);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix2x2 *m, const rs_matrix2x2 *rhs);
+enum rs_for_each_strategy {
+    RS_FOR_EACH_STRATEGY_SERIAL,
+    RS_FOR_EACH_STRATEGY_DONT_CARE,
+    RS_FOR_EACH_STRATEGY_DST_LINEAR,
+    RS_FOR_EACH_STRATEGY_TILE_SMALL,
+    RS_FOR_EACH_STRATEGY_TILE_MEDIUM,
+    RS_FOR_EACH_STRATEGY_TILE_LARGE
+};
+
 
 /**
- * Multiple matrix m with a rotation matrix
+ * Structure to provide extra information to a rsForEach call.  Primarly used to
+ * restrict the call to a subset of cells in the allocation.
+ */
+typedef struct rs_script_call {
+    enum rs_for_each_strategy strategy;
+    uint32_t xStart;
+    uint32_t xEnd;
+    uint32_t yStart;
+    uint32_t yEnd;
+    uint32_t zStart;
+    uint32_t zEnd;
+    uint32_t arrayStart;
+    uint32_t arrayEnd;
+} rs_script_call_t;
+
+/**
+ * Make a script to script call to launch work. One of the input or output is
+ * required to be a valid object. The input and output must be of the same
+ * dimensions.
+ * API 10-13
  *
- * @param m
- * @param rot
- * @param x
- * @param y
- * @param z
- */
-extern void __attribute__((overloadable))
-rsMatrixRotate(rs_matrix4x4 *m, float rot, float x, float y, float z);
-
-/**
- * Multiple matrix m with a scale matrix
+ * @param script The target script to call
+ * @param input The allocation to source data from
+ * @param output the allocation to write date into
+ * @param usrData The user definied params to pass to the root script.  May be
+ *                NULL.
+ * @param sc Extra control infomation used to select a sub-region of the
+ *           allocation to be processed or suggest a walking strategy.  May be
+ *           NULL.
  *
- * @param m
- * @param x
- * @param y
- * @param z
- */
-extern void __attribute__((overloadable))
-rsMatrixScale(rs_matrix4x4 *m, float x, float y, float z);
-
-/**
- * Multiple matrix m with a translation matrix
- *
- * @param m
- * @param x
- * @param y
- * @param z
- */
-extern void __attribute__((overloadable))
-rsMatrixTranslate(rs_matrix4x4 *m, float x, float y, float z);
-
-/**
- * Load an Ortho projection matrix constructed from the 6 planes
- *
- * @param m
- * @param left
- * @param right
- * @param bottom
- * @param top
- * @param near
- * @param far
- */
-extern void __attribute__((overloadable))
-rsMatrixLoadOrtho(rs_matrix4x4 *m, float left, float right, float bottom, float top, float near, float far);
-
-/**
- * Load an Frustum projection matrix constructed from the 6 planes
- *
- * @param m
- * @param left
- * @param right
- * @param bottom
- * @param top
- * @param near
- * @param far
- */
-extern void __attribute__((overloadable))
-rsMatrixLoadFrustum(rs_matrix4x4 *m, float left, float right, float bottom, float top, float near, float far);
-
-/**
- * Load an perspective projection matrix constructed from the 6 planes
- *
- * @param m
- * @param fovy Field of view, in degrees along the Y axis.
- * @param aspect Ratio of x / y.
- * @param near
- * @param far
- */
-extern void __attribute__((overloadable))
-rsMatrixLoadPerspective(rs_matrix4x4* m, float fovy, float aspect, float near, float far);
-
+ *  */
 #if !defined(RS_VERSION) || (RS_VERSION < 14)
-/**
- * Multiply a vector by a matrix and return the result vector.
- * API version 10-13
- */
-_RS_RUNTIME float4 __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix4x4 *m, float4 in);
-
+extern void __attribute__((overloadable))
+    rsForEach(rs_script script, rs_allocation input,
+              rs_allocation output, const void * usrData,
+              const rs_script_call_t *sc);
 /**
  * \overload
  */
-_RS_RUNTIME float4 __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix4x4 *m, float3 in);
-
-/**
- * \overload
- */
-_RS_RUNTIME float4 __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix4x4 *m, float2 in);
-
-/**
- * \overload
- */
-_RS_RUNTIME float3 __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix3x3 *m, float3 in);
-
-/**
- * \overload
- */
-_RS_RUNTIME float3 __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix3x3 *m, float2 in);
-
-/**
- * \overload
- */
-_RS_RUNTIME float2 __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix2x2 *m, float2 in);
+extern void __attribute__((overloadable))
+    rsForEach(rs_script script, rs_allocation input,
+              rs_allocation output, const void * usrData);
 #else
-/**
- * Multiply a vector by a matrix and return the result vector.
- * API version 10-13
- */
-_RS_RUNTIME float4 __attribute__((overloadable))
-rsMatrixMultiply(const rs_matrix4x4 *m, float4 in);
 
 /**
+ * Make a script to script call to launch work. One of the input or output is
+ * required to be a valid object. The input and output must be of the same
+ * dimensions.
+ * API 14+
+ *
+ * @param script The target script to call
+ * @param input The allocation to source data from
+ * @param output the allocation to write date into
+ * @param usrData The user definied params to pass to the root script.  May be
+ *                NULL.
+ * @param usrDataLen The size of the userData structure.  This will be used to
+ *                   perform a shallow copy of the data if necessary.
+ * @param sc Extra control infomation used to select a sub-region of the
+ *           allocation to be processed or suggest a walking strategy.  May be
+ *           NULL.
+ *
+ */
+extern void __attribute__((overloadable))
+    rsForEach(rs_script script, rs_allocation input, rs_allocation output,
+              const void * usrData, size_t usrDataLen, const rs_script_call_t *);
+/**
  * \overload
  */
-_RS_RUNTIME float4 __attribute__((overloadable))
-rsMatrixMultiply(const rs_matrix4x4 *m, float3 in);
-
+extern void __attribute__((overloadable))
+    rsForEach(rs_script script, rs_allocation input, rs_allocation output,
+              const void * usrData, size_t usrDataLen);
 /**
  * \overload
  */
-_RS_RUNTIME float4 __attribute__((overloadable))
-rsMatrixMultiply(const rs_matrix4x4 *m, float2 in);
-
-/**
- * \overload
- */
-_RS_RUNTIME float3 __attribute__((overloadable))
-rsMatrixMultiply(const rs_matrix3x3 *m, float3 in);
-
-/**
- * \overload
- */
-_RS_RUNTIME float3 __attribute__((overloadable))
-rsMatrixMultiply(const rs_matrix3x3 *m, float2 in);
-
-/**
- * \overload
- */
-_RS_RUNTIME float2 __attribute__((overloadable))
-rsMatrixMultiply(const rs_matrix2x2 *m, float2 in);
+extern void __attribute__((overloadable))
+    rsForEach(rs_script script, rs_allocation input, rs_allocation output);
 #endif
 
 
-/**
- * Returns true if the matrix was successfully inversed
- *
- * @param m
- */
-extern bool __attribute__((overloadable)) rsMatrixInverse(rs_matrix4x4 *m);
-
-/**
- * Returns true if the matrix was successfully inversed and transposed.
- *
- * @param m
- */
-extern bool __attribute__((overloadable)) rsMatrixInverseTranspose(rs_matrix4x4 *m);
-
-/**
- * Transpose the matrix m.
- *
- * @param m
- */
-extern void __attribute__((overloadable)) rsMatrixTranspose(rs_matrix4x4 *m);
-/**
- * \overload
- */
-extern void __attribute__((overloadable)) rsMatrixTranspose(rs_matrix3x3 *m);
-/**
- * \overload
- */
-extern void __attribute__((overloadable)) rsMatrixTranspose(rs_matrix2x2 *m);
-
-/////////////////////////////////////////////////////
-// quaternion ops
-/////////////////////////////////////////////////////
-
-/**
- * Set the quaternion components
- * @param w component
- * @param x component
- * @param y component
- * @param z component
- */
-static void __attribute__((overloadable))
-rsQuaternionSet(rs_quaternion *q, float w, float x, float y, float z) {
-    q->w = w;
-    q->x = x;
-    q->y = y;
-    q->z = z;
-}
-
-/**
- * Set the quaternion from another quaternion
- * @param q destination quaternion
- * @param rhs source quaternion
- */
-static void __attribute__((overloadable))
-rsQuaternionSet(rs_quaternion *q, const rs_quaternion *rhs) {
-    q->w = rhs->w;
-    q->x = rhs->x;
-    q->y = rhs->y;
-    q->z = rhs->z;
-}
-
-/**
- * Multiply quaternion by a scalar
- * @param q quaternion to multiply
- * @param s scalar
- */
-static void __attribute__((overloadable))
-rsQuaternionMultiply(rs_quaternion *q, float s) {
-    q->w *= s;
-    q->x *= s;
-    q->y *= s;
-    q->z *= s;
-}
-
-/**
- * Multiply quaternion by another quaternion
- * @param q destination quaternion
- * @param rhs right hand side quaternion to multiply by
- */
-static void __attribute__((overloadable))
-rsQuaternionMultiply(rs_quaternion *q, const rs_quaternion *rhs) {
-    q->w = -q->x*rhs->x - q->y*rhs->y - q->z*rhs->z + q->w*rhs->w;
-    q->x =  q->x*rhs->w + q->y*rhs->z - q->z*rhs->y + q->w*rhs->x;
-    q->y = -q->x*rhs->z + q->y*rhs->w + q->z*rhs->x + q->w*rhs->y;
-    q->z =  q->x*rhs->y - q->y*rhs->x + q->z*rhs->w + q->w*rhs->z;
-}
-
-/**
- * Add two quaternions
- * @param q destination quaternion to add to
- * @param rsh right hand side quaternion to add
- */
-static void
-rsQuaternionAdd(rs_quaternion *q, const rs_quaternion *rhs) {
-    q->w *= rhs->w;
-    q->x *= rhs->x;
-    q->y *= rhs->y;
-    q->z *= rhs->z;
-}
-
-/**
- * Loads a quaternion that represents a rotation about an arbitrary unit vector
- * @param q quaternion to set
- * @param rot angle to rotate by
- * @param x component of a vector
- * @param y component of a vector
- * @param x component of a vector
- */
-static void
-rsQuaternionLoadRotateUnit(rs_quaternion *q, float rot, float x, float y, float z) {
-    rot *= (float)(M_PI / 180.0f) * 0.5f;
-    float c = cos(rot);
-    float s = sin(rot);
-
-    q->w = c;
-    q->x = x * s;
-    q->y = y * s;
-    q->z = z * s;
-}
-
-/**
- * Loads a quaternion that represents a rotation about an arbitrary vector
- * (doesn't have to be unit)
- * @param q quaternion to set
- * @param rot angle to rotate by
- * @param x component of a vector
- * @param y component of a vector
- * @param x component of a vector
- */
-static void
-rsQuaternionLoadRotate(rs_quaternion *q, float rot, float x, float y, float z) {
-    const float len = x*x + y*y + z*z;
-    if (len != 1) {
-        const float recipLen = 1.f / sqrt(len);
-        x *= recipLen;
-        y *= recipLen;
-        z *= recipLen;
-    }
-    rsQuaternionLoadRotateUnit(q, rot, x, y, z);
-}
-
-/**
- * Conjugates the quaternion
- * @param q quaternion to conjugate 
- */
-static void
-rsQuaternionConjugate(rs_quaternion *q) {
-    q->x = -q->x;
-    q->y = -q->y;
-    q->z = -q->z;
-}
-
-/**
- * Dot product of two quaternions
- * @param q0 first quaternion
- * @param q1 second quaternion
- * @return dot product between q0 and q1
- */
-static float
-rsQuaternionDot(const rs_quaternion *q0, const rs_quaternion *q1) {
-    return q0->w*q1->w + q0->x*q1->x + q0->y*q1->y + q0->z*q1->z;
-}
-
-/**
- * Normalizes the quaternion
- * @param q quaternion to normalize
- */
-static void
-rsQuaternionNormalize(rs_quaternion *q) {
-    const float len = rsQuaternionDot(q, q);
-    if (len != 1) {
-        const float recipLen = 1.f / sqrt(len);
-        rsQuaternionMultiply(q, recipLen);
-    }
-}
-
-/**
- * Performs spherical linear interpolation between two quaternions
- * @param q result quaternion from interpolation
- * @param q0 first param
- * @param q1 second param
- * @param t how much to interpolate by
- */
-static void
-rsQuaternionSlerp(rs_quaternion *q, const rs_quaternion *q0, const rs_quaternion *q1, float t) {
-    if (t <= 0.0f) {
-        rsQuaternionSet(q, q0);
-        return;
-    }
-    if (t >= 1.0f) {
-        rsQuaternionSet(q, q1);
-        return;
-    }
-
-    rs_quaternion tempq0, tempq1;
-    rsQuaternionSet(&tempq0, q0);
-    rsQuaternionSet(&tempq1, q1);
-
-    float angle = rsQuaternionDot(q0, q1);
-    if (angle < 0) {
-        rsQuaternionMultiply(&tempq0, -1.0f);
-        angle *= -1.0f;
-    }
-
-    float scale, invScale;
-    if (angle + 1.0f > 0.05f) {
-        if (1.0f - angle >= 0.05f) {
-            float theta = acos(angle);
-            float invSinTheta = 1.0f / sin(theta);
-            scale = sin(theta * (1.0f - t)) * invSinTheta;
-            invScale = sin(theta * t) * invSinTheta;
-        } else {
-            scale = 1.0f - t;
-            invScale = t;
-        }
-    } else {
-        rsQuaternionSet(&tempq1, tempq0.z, -tempq0.y, tempq0.x, -tempq0.w);
-        scale = sin(M_PI * (0.5f - t));
-        invScale = sin(M_PI * t);
-    }
-
-    rsQuaternionSet(q, tempq0.w*scale + tempq1.w*invScale, tempq0.x*scale + tempq1.x*invScale,
-                        tempq0.y*scale + tempq1.y*invScale, tempq0.z*scale + tempq1.z*invScale);
-}
-
-/**
- * Computes rotation matrix from the normalized quaternion
- * @param m resulting matrix
- * @param p normalized quaternion
- */
-static void rsQuaternionGetMatrixUnit(rs_matrix4x4 *m, const rs_quaternion *q) {
-    float x2 = 2.0f * q->x * q->x;
-    float y2 = 2.0f * q->y * q->y;
-    float z2 = 2.0f * q->z * q->z;
-    float xy = 2.0f * q->x * q->y;
-    float wz = 2.0f * q->w * q->z;
-    float xz = 2.0f * q->x * q->z;
-    float wy = 2.0f * q->w * q->y;
-    float wx = 2.0f * q->w * q->x;
-    float yz = 2.0f * q->y * q->z;
-
-    m->m[0] = 1.0f - y2 - z2;
-    m->m[1] = xy - wz;
-    m->m[2] = xz + wy;
-    m->m[3] = 0.0f;
-
-    m->m[4] = xy + wz;
-    m->m[5] = 1.0f - x2 - z2;
-    m->m[6] = yz - wx;
-    m->m[7] = 0.0f;
-
-    m->m[8] = xz - wy;
-    m->m[9] = yz - wx;
-    m->m[10] = 1.0f - x2 - y2;
-    m->m[11] = 0.0f;
-
-    m->m[12] = 0.0f;
-    m->m[13] = 0.0f;
-    m->m[14] = 0.0f;
-    m->m[15] = 1.0f;
-}
-
-/////////////////////////////////////////////////////
-// utility funcs
-/////////////////////////////////////////////////////
-
-/**
- * Computes 6 frustum planes from the view projection matrix
- * @param viewProj matrix to extract planes from
- * @param left plane
- * @param right plane
- * @param top plane
- * @param bottom plane
- * @param near plane
- * @param far plane
- */
-__inline__ static void __attribute__((overloadable, always_inline))
-rsExtractFrustumPlanes(const rs_matrix4x4 *viewProj,
-                         float4 *left, float4 *right,
-                         float4 *top, float4 *bottom,
-                         float4 *near, float4 *far) {
-    // x y z w = a b c d in the plane equation
-    left->x = viewProj->m[3] + viewProj->m[0];
-    left->y = viewProj->m[7] + viewProj->m[4];
-    left->z = viewProj->m[11] + viewProj->m[8];
-    left->w = viewProj->m[15] + viewProj->m[12];
-
-    right->x = viewProj->m[3] - viewProj->m[0];
-    right->y = viewProj->m[7] - viewProj->m[4];
-    right->z = viewProj->m[11] - viewProj->m[8];
-    right->w = viewProj->m[15] - viewProj->m[12];
-
-    top->x = viewProj->m[3] - viewProj->m[1];
-    top->y = viewProj->m[7] - viewProj->m[5];
-    top->z = viewProj->m[11] - viewProj->m[9];
-    top->w = viewProj->m[15] - viewProj->m[13];
-
-    bottom->x = viewProj->m[3] + viewProj->m[1];
-    bottom->y = viewProj->m[7] + viewProj->m[5];
-    bottom->z = viewProj->m[11] + viewProj->m[9];
-    bottom->w = viewProj->m[15] + viewProj->m[13];
-
-    near->x = viewProj->m[3] + viewProj->m[2];
-    near->y = viewProj->m[7] + viewProj->m[6];
-    near->z = viewProj->m[11] + viewProj->m[10];
-    near->w = viewProj->m[15] + viewProj->m[14];
-
-    far->x = viewProj->m[3] - viewProj->m[2];
-    far->y = viewProj->m[7] - viewProj->m[6];
-    far->z = viewProj->m[11] - viewProj->m[10];
-    far->w = viewProj->m[15] - viewProj->m[14];
-
-    float len = length(left->xyz);
-    *left /= len;
-    len = length(right->xyz);
-    *right /= len;
-    len = length(top->xyz);
-    *top /= len;
-    len = length(bottom->xyz);
-    *bottom /= len;
-    len = length(near->xyz);
-    *near /= len;
-    len = length(far->xyz);
-    *far /= len;
-}
-
-/**
- * Checks if a sphere is withing the 6 frustum planes
- * @param sphere float4 representing the sphere
- * @param left plane
- * @param right plane
- * @param top plane
- * @param bottom plane
- * @param near plane
- * @param far plane
- */
-__inline__ static bool __attribute__((overloadable, always_inline))
-rsIsSphereInFrustum(float4 *sphere,
-                      float4 *left, float4 *right,
-                      float4 *top, float4 *bottom,
-                      float4 *near, float4 *far) {
-
-    float distToCenter = dot(left->xyz, sphere->xyz) + left->w;
-    if (distToCenter < -sphere->w) {
-        return false;
-    }
-    distToCenter = dot(right->xyz, sphere->xyz) + right->w;
-    if (distToCenter < -sphere->w) {
-        return false;
-    }
-    distToCenter = dot(top->xyz, sphere->xyz) + top->w;
-    if (distToCenter < -sphere->w) {
-        return false;
-    }
-    distToCenter = dot(bottom->xyz, sphere->xyz) + bottom->w;
-    if (distToCenter < -sphere->w) {
-        return false;
-    }
-    distToCenter = dot(near->xyz, sphere->xyz) + near->w;
-    if (distToCenter < -sphere->w) {
-        return false;
-    }
-    distToCenter = dot(far->xyz, sphere->xyz) + far->w;
-    if (distToCenter < -sphere->w) {
-        return false;
-    }
-    return true;
-}
-
-
-/////////////////////////////////////////////////////
-// int ops
-/////////////////////////////////////////////////////
-
-/**
- * Clamp the value amount between low and high.
- *
- * @param amount  The value to clamp
- * @param low
- * @param high
- */
-_RS_RUNTIME uint __attribute__((overloadable, always_inline)) rsClamp(uint amount, uint low, uint high);
-
-/**
- * \overload
- */
-_RS_RUNTIME int __attribute__((overloadable, always_inline)) rsClamp(int amount, int low, int high);
-/**
- * \overload
- */
-_RS_RUNTIME ushort __attribute__((overloadable, always_inline)) rsClamp(ushort amount, ushort low, ushort high);
-/**
- * \overload
- */
-_RS_RUNTIME short __attribute__((overloadable, always_inline)) rsClamp(short amount, short low, short high);
-/**
- * \overload
- */
-_RS_RUNTIME uchar __attribute__((overloadable, always_inline)) rsClamp(uchar amount, uchar low, uchar high);
-/**
- * \overload
- */
-_RS_RUNTIME char __attribute__((overloadable, always_inline)) rsClamp(char amount, char low, char high);
 
 #undef _RS_RUNTIME
 
diff --git a/scriptc/rs_debug.rsh b/scriptc/rs_debug.rsh
new file mode 100644
index 0000000..074c28f
--- /dev/null
+++ b/scriptc/rs_debug.rsh
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** @file rs_debug.rsh
+ *  \brief Utility debugging routines
+ *
+ *  Routines intended to be used during application developement.  These should
+ *  not be used in shipping applications.  All print a string and value pair to
+ *  the standard log.
+ *
+ */
+
+#ifndef __RS_DEBUG_RSH__
+#define __RS_DEBUG_RSH__
+
+
+
+/**
+ * Debug function.  Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+    rsDebug(const char *, float);
+/**
+ * Debug function.  Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+    rsDebug(const char *, float, float);
+/**
+ * Debug function.  Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+    rsDebug(const char *, float, float, float);
+/**
+ * Debug function.  Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+    rsDebug(const char *, float, float, float, float);
+/**
+ * Debug function.  Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+    rsDebug(const char *, double);
+/**
+ * Debug function.  Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+    rsDebug(const char *, const rs_matrix4x4 *);
+/**
+ * Debug function.  Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+    rsDebug(const char *, const rs_matrix3x3 *);
+/**
+ * Debug function.  Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+    rsDebug(const char *, const rs_matrix2x2 *);
+/**
+ * Debug function.  Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+    rsDebug(const char *, int);
+/**
+ * Debug function.  Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+    rsDebug(const char *, uint);
+/**
+ * Debug function.  Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+    rsDebug(const char *, long);
+/**
+ * Debug function.  Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+    rsDebug(const char *, unsigned long);
+/**
+ * Debug function.  Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+    rsDebug(const char *, long long);
+/**
+ * Debug function.  Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+    rsDebug(const char *, unsigned long long);
+/**
+ * Debug function.  Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+    rsDebug(const char *, const void *);
+#define RS_DEBUG(a) rsDebug(#a, a)
+#define RS_DEBUG_MARKER rsDebug(__FILE__, __LINE__)
+
+
+/**
+ * Debug function.  Prints a string and value to the log.
+ */
+_RS_RUNTIME void __attribute__((overloadable)) rsDebug(const char *s, float2 v);
+/**
+ * Debug function.  Prints a string and value to the log.
+ */
+_RS_RUNTIME void __attribute__((overloadable)) rsDebug(const char *s, float3 v);
+/**
+ * Debug function.  Prints a string and value to the log.
+ */
+_RS_RUNTIME void __attribute__((overloadable)) rsDebug(const char *s, float4 v);
+
+#endif
diff --git a/scriptc/rs_graphics.rsh b/scriptc/rs_graphics.rsh
index 9a8a4e6..00fd1b1 100644
--- a/scriptc/rs_graphics.rsh
+++ b/scriptc/rs_graphics.rsh
@@ -1,3 +1,25 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** @file rs_graphics.rsh
+ *  \brief Renderscript graphics API
+ *
+ *  A set of graphics functions used by Renderscript.
+ *
+ */
 #ifndef __RS_GRAPHICS_RSH__
 #define __RS_GRAPHICS_RSH__
 
@@ -37,7 +59,7 @@
     rsgClearAllRenderTargets(void);
 
 /**
- * Force RenderScript to finish all rendering commands
+ * Force Renderscript to finish all rendering commands
  */
 extern uint __attribute__((overloadable))
     rsgFinish(void);
@@ -94,16 +116,38 @@
 extern void __attribute__((overloadable))
     rsgBindTexture(rs_program_fragment, uint slot, rs_allocation);
 
-
+/**
+ * Load the projection matrix for a currently bound fixed function
+ * vertex program. Calling this function with a custom vertex shader
+ * would result in an error.
+ * @param proj projection matrix
+ */
 extern void __attribute__((overloadable))
-    rsgProgramVertexLoadProjectionMatrix(const rs_matrix4x4 *);
+    rsgProgramVertexLoadProjectionMatrix(const rs_matrix4x4 *proj);
+/**
+ * Load the model matrix for a currently bound fixed function
+ * vertex program. Calling this function with a custom vertex shader
+ * would result in an error.
+ * @param model model matrix
+ */
 extern void __attribute__((overloadable))
-    rsgProgramVertexLoadModelMatrix(const rs_matrix4x4 *);
+    rsgProgramVertexLoadModelMatrix(const rs_matrix4x4 *model);
+/**
+ * Load the texture matrix for a currently bound fixed function
+ * vertex program. Calling this function with a custom vertex shader
+ * would result in an error.
+ * @param tex texture matrix
+ */
 extern void __attribute__((overloadable))
-    rsgProgramVertexLoadTextureMatrix(const rs_matrix4x4 *);
-
+    rsgProgramVertexLoadTextureMatrix(const rs_matrix4x4 *tex);
+/**
+ * Get the projection matrix for a currently bound fixed function
+ * vertex program. Calling this function with a custom vertex shader
+ * would result in an error.
+ * @param proj matrix to store the current projection matrix into
+ */
 extern void __attribute__((overloadable))
-    rsgProgramVertexGetProjectionMatrix(rs_matrix4x4 *);
+    rsgProgramVertexGetProjectionMatrix(rs_matrix4x4 *proj);
 
 /**
  * Set the constant color for a fixed function emulation program.
@@ -239,15 +283,29 @@
     rsgDrawSpriteScreenspace(float x, float y, float z, float w, float h);
 
 /**
- * Draw a mesh of geometry using the current context state.  The whole mesh is
+ * Draw a mesh using the current context state.  The whole mesh is
  * rendered.
  *
  * @param ism
  */
 extern void __attribute__((overloadable))
     rsgDrawMesh(rs_mesh ism);
+/**
+ * Draw part of a mesh using the current context state.
+ * @param ism mesh object to render
+ * @param primitiveIndex for meshes that contain multiple primitive groups
+ *        this parameter specifies the index of the group to draw.
+ */
 extern void __attribute__((overloadable))
     rsgDrawMesh(rs_mesh ism, uint primitiveIndex);
+/**
+ * Draw specified index range of part of a mesh using the current context state.
+ * @param ism mesh object to render
+ * @param primitiveIndex for meshes that contain multiple primitive groups
+ *        this parameter specifies the index of the group to draw.
+ * @param start starting index in the range
+ * @param len number of indices to draw
+ */
 extern void __attribute__((overloadable))
     rsgDrawMesh(rs_mesh ism, uint primitiveIndex, uint start, uint len);
 
@@ -264,29 +322,54 @@
 
 /**
  * Clears the depth suface to the specified value.
- *
  */
 extern void __attribute__((overloadable))
     rsgClearDepth(float value);
-
+/**
+ * Draws text given a string and location
+ */
 extern void __attribute__((overloadable))
     rsgDrawText(const char *, int x, int y);
+/**
+ * \overload
+ */
 extern void __attribute__((overloadable))
     rsgDrawText(rs_allocation, int x, int y);
+/**
+ * Binds the font object to be used for all subsequent font rendering calls
+ * @param font object to bind
+ */
 extern void __attribute__((overloadable))
-    rsgBindFont(rs_font);
+    rsgBindFont(rs_font font);
+/**
+ * Sets the font color for all subsequent rendering calls
+ * @param r red component
+ * @param g green component
+ * @param b blue component
+ * @param a alpha component
+ */
 extern void __attribute__((overloadable))
-    rsgFontColor(float, float, float, float);
-// Returns the bounding box of the text relative to (0, 0)
-// Any of left, right, top, bottom could be NULL
+    rsgFontColor(float r, float g, float b, float a);
+/**
+ * Returns the bounding box of the text relative to (0, 0)
+ * Any of left, right, top, bottom could be NULL
+ */
 extern void __attribute__((overloadable))
     rsgMeasureText(const char *, int *left, int *right, int *top, int *bottom);
+/**
+ * \overload
+ */
 extern void __attribute__((overloadable))
     rsgMeasureText(rs_allocation, int *left, int *right, int *top, int *bottom);
-
+/**
+ * Computes an axis aligned bounding box of a mesh object
+ */
 extern void __attribute__((overloadable))
     rsgMeshComputeBoundingBox(rs_mesh mesh, float *minX, float *minY, float *minZ,
                                                 float *maxX, float *maxY, float *maxZ);
+/**
+ * \overload
+ */
 __inline__ static void __attribute__((overloadable, always_inline))
 rsgMeshComputeBoundingBox(rs_mesh mesh, float3 *bBoxMin, float3 *bBoxMax) {
     float x1, y1, z1, x2, y2, z2;
diff --git a/scriptc/rs_math.rsh b/scriptc/rs_math.rsh
index e44c051..8117ca8 100644
--- a/scriptc/rs_math.rsh
+++ b/scriptc/rs_math.rsh
@@ -1,309 +1,30 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 /** @file rs_math.rsh
  *  \brief todo-jsams
  *
  *  todo-jsams
  *
  */
+
 #ifndef __RS_MATH_RSH__
 #define __RS_MATH_RSH__
 
 
-
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsSetObject(rs_element *dst, rs_element src);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsSetObject(rs_type *dst, rs_type src);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsSetObject(rs_allocation *dst, rs_allocation src);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsSetObject(rs_sampler *dst, rs_sampler src);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsSetObject(rs_script *dst, rs_script src);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsSetObject(rs_mesh *dst, rs_mesh src);
-/**
- * Copy reference to the specified object.
- *
- * @param dst
- * @param src
- */
-extern void __attribute__((overloadable))
-    rsSetObject(rs_program_fragment *dst, rs_program_fragment src);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsSetObject(rs_program_vertex *dst, rs_program_vertex src);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsSetObject(rs_program_raster *dst, rs_program_raster src);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsSetObject(rs_program_store *dst, rs_program_store src);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsSetObject(rs_font *dst, rs_font src);
-
-/**
- * Sets the object to NULL.
- *
- * @return bool
- */
-extern void __attribute__((overloadable))
-    rsClearObject(rs_element *dst);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsClearObject(rs_type *dst);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsClearObject(rs_allocation *dst);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsClearObject(rs_sampler *dst);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsClearObject(rs_script *dst);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsClearObject(rs_mesh *dst);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsClearObject(rs_program_fragment *dst);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsClearObject(rs_program_vertex *dst);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsClearObject(rs_program_raster *dst);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsClearObject(rs_program_store *dst);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsClearObject(rs_font *dst);
-
-/**
- * \overload
- */
-extern bool __attribute__((overloadable))
-    rsIsObject(rs_element);
-/**
- * \overload
- */
-extern bool __attribute__((overloadable))
-    rsIsObject(rs_type);
-/**
- * \overload
- */
-extern bool __attribute__((overloadable))
-    rsIsObject(rs_allocation);
-/**
- * Tests if the object is valid.  Returns true if the object is valid, false if
- * it is NULL.
- *
- * @return bool
- */
-
-extern bool __attribute__((overloadable))
-    rsIsObject(rs_sampler);
-/**
- * \overload
- */
-extern bool __attribute__((overloadable))
-    rsIsObject(rs_script);
-/**
- * \overload
- */
-extern bool __attribute__((overloadable))
-    rsIsObject(rs_mesh);
-/**
- * \overload
- */
-extern bool __attribute__((overloadable))
-    rsIsObject(rs_program_fragment);
-/**
- * \overload
- */
-extern bool __attribute__((overloadable))
-    rsIsObject(rs_program_vertex);
-/**
- * \overload
- */
-extern bool __attribute__((overloadable))
-    rsIsObject(rs_program_raster);
-/**
- * \overload
- */
-extern bool __attribute__((overloadable))
-    rsIsObject(rs_program_store);
-/**
- * \overload
- */
-extern bool __attribute__((overloadable))
-    rsIsObject(rs_font);
-
-
-/**
- * Returns the Allocation for a given pointer.  The pointer should point within
- * a valid allocation.  The results are undefined if the pointer is not from a
- * valid allocation.
- */
-extern rs_allocation __attribute__((overloadable))
-    rsGetAllocation(const void *);
-
-/**
- * Query the dimension of an allocation.
- *
- * @return uint32_t The X dimension of the allocation.
- */
-extern uint32_t __attribute__((overloadable))
-    rsAllocationGetDimX(rs_allocation);
-
-/**
- * Query the dimension of an allocation.
- *
- * @return uint32_t The Y dimension of the allocation.
- */
-extern uint32_t __attribute__((overloadable))
-    rsAllocationGetDimY(rs_allocation);
-
-/**
- * Query the dimension of an allocation.
- *
- * @return uint32_t The Z dimension of the allocation.
- */
-extern uint32_t __attribute__((overloadable))
-    rsAllocationGetDimZ(rs_allocation);
-
-/**
- * Query an allocation for the presence of more than one LOD.
- *
- * @return uint32_t Returns 1 if more than one LOD is present, 0 otherwise.
- */
-extern uint32_t __attribute__((overloadable))
-    rsAllocationGetDimLOD(rs_allocation);
-
-/**
- * Query an allocation for the presence of more than one face.
- *
- * @return uint32_t Returns 1 if more than one face is present, 0 otherwise.
- */
-extern uint32_t __attribute__((overloadable))
-    rsAllocationGetDimFaces(rs_allocation);
-
-/**
- * Copy part of an allocation from another allocation.
- *
- * @param dstAlloc Allocation to copy data into.
- * @param dstOff The offset of the first element to be copied in
- *               the destination allocation.
- * @param dstMip Mip level in the destination allocation.
- * @param count The number of elements to be copied.
- * @param srcAlloc The source data allocation.
- * @param srcOff The offset of the first element in data to be
- *               copied in the source allocation.
- * @param srcMip Mip level in the source allocation.
- */
-extern void __attribute__((overloadable))
-    rsAllocationCopy1DRange(rs_allocation dstAlloc,
-                            uint32_t dstOff, uint32_t dstMip,
-                            uint32_t count,
-                            rs_allocation srcAlloc,
-                            uint32_t srcOff, uint32_t srcMip);
-
-/**
- * Copy a rectangular region into the allocation from another
- * allocation.
- *
- * @param dstAlloc allocation to copy data into.
- * @param dstXoff X offset of the region to update in the
- *                destination allocation.
- * @param dstYoff Y offset of the region to update in the
- *                destination allocation.
- * @param dstMip Mip level in the destination allocation.
- * @param dstFace Cubemap face of the destination allocation,
- *                ignored for allocations that aren't cubemaps.
- * @param width Width of the incoming region to update.
- * @param height Height of the incoming region to update.
- * @param srcAlloc The source data allocation.
- * @param srcXoff X offset in data of the source allocation.
- * @param srcYoff Y offset in data of the source allocation.
- * @param srcMip Mip level in the source allocation.
- * @param srcFace Cubemap face of the source allocation,
- *                ignored for allocations that aren't cubemaps.
- */
-extern void __attribute__((overloadable))
-    rsAllocationCopy2DRange(rs_allocation dstAlloc,
-                            uint32_t dstXoff, uint32_t dstYoff,
-                            uint32_t dstMip,
-                            rs_allocation_cubemap_face dstFace,
-                            uint32_t width, uint32_t height,
-                            rs_allocation srcAlloc,
-                            uint32_t srcXoff, uint32_t srcYoff,
-                            uint32_t srcMip,
-                            rs_allocation_cubemap_face srcFace);
-
-
-/**
- * Extract a single element from an allocation.
- */
-extern const void * __attribute__((overloadable))
-    rsGetElementAt(rs_allocation, uint32_t x);
-/**
- * \overload
- */
-extern const void * __attribute__((overloadable))
-    rsGetElementAt(rs_allocation, uint32_t x, uint32_t y);
-/**
- * \overload
- */
-extern const void * __attribute__((overloadable))
-    rsGetElementAt(rs_allocation, uint32_t x, uint32_t y, uint32_t z);
-
 /**
  * Return a random value between 0 (or min_value) and max_malue.
  */
@@ -331,346 +52,197 @@
 extern float __attribute__((overloadable))
     rsFrac(float);
 
+
+/////////////////////////////////////////////////////
+// int ops
+/////////////////////////////////////////////////////
+
 /**
- * Send a message back to the client.  Will not block and returns true
- * if the message was sendable and false if the fifo was full.
- * A message ID is required.  Data payload is optional.
+ * Clamp the value amount between low and high.
+ *
+ * @param amount  The value to clamp
+ * @param low
+ * @param high
  */
-extern bool __attribute__((overloadable))
-    rsSendToClient(int cmdID);
+_RS_RUNTIME uint __attribute__((overloadable, always_inline)) rsClamp(uint amount, uint low, uint high);
+
 /**
  * \overload
  */
-extern bool __attribute__((overloadable))
-    rsSendToClient(int cmdID, const void *data, uint len);
-/**
- * Send a message back to the client, blocking until the message is queued.
- * A message ID is required.  Data payload is optional.
- */
-extern void __attribute__((overloadable))
-    rsSendToClientBlocking(int cmdID);
+_RS_RUNTIME int __attribute__((overloadable, always_inline)) rsClamp(int amount, int low, int high);
 /**
  * \overload
  */
-extern void __attribute__((overloadable))
-    rsSendToClientBlocking(int cmdID, const void *data, uint len);
-
-
-/**
- * Launch order hint for rsForEach calls.  This provides a hint to the system to
- * determine in which order the root function of the target is called with each
- * cell of the allocation.
- *
- * This is a hint and implementations may not obey the order.
- */
-enum rs_for_each_strategy {
-    RS_FOR_EACH_STRATEGY_SERIAL,
-    RS_FOR_EACH_STRATEGY_DONT_CARE,
-    RS_FOR_EACH_STRATEGY_DST_LINEAR,
-    RS_FOR_EACH_STRATEGY_TILE_SMALL,
-    RS_FOR_EACH_STRATEGY_TILE_MEDIUM,
-    RS_FOR_EACH_STRATEGY_TILE_LARGE
-};
-
-
-/**
- * Structure to provide extra information to a rsForEach call.  Primarly used to
- * restrict the call to a subset of cells in the allocation.
- */
-typedef struct rs_script_call {
-    enum rs_for_each_strategy strategy;
-    uint32_t xStart;
-    uint32_t xEnd;
-    uint32_t yStart;
-    uint32_t yEnd;
-    uint32_t zStart;
-    uint32_t zEnd;
-    uint32_t arrayStart;
-    uint32_t arrayEnd;
-} rs_script_call_t;
-
-/**
- * Make a script to script call to launch work. One of the input or output is
- * required to be a valid object. The input and output must be of the same
- * dimensions.
- * API 10-13
- *
- * @param script The target script to call
- * @param input The allocation to source data from
- * @param output the allocation to write date into
- * @param usrData The user definied params to pass to the root script.  May be
- *                NULL.
- * @param sc Extra control infomation used to select a sub-region of the
- *           allocation to be processed or suggest a walking strategy.  May be
- *           NULL.
- *
- *  */
-#if !defined(RS_VERSION) || (RS_VERSION < 14)
-extern void __attribute__((overloadable))
-    rsForEach(rs_script script script, rs_allocation input,
-              rs_allocation output, const void * usrData,
-              const rs_script_call_t *sc);
+_RS_RUNTIME ushort __attribute__((overloadable, always_inline)) rsClamp(ushort amount, ushort low, ushort high);
 /**
  * \overload
  */
-extern void __attribute__((overloadable))
-    rsForEach(rs_script script, rs_allocation input,
-              rs_allocation output, const void * usrData);
-#else
-
-/**
- * Make a script to script call to launch work. One of the input or output is
- * required to be a valid object. The input and output must be of the same
- * dimensions.
- * API 14+
- *
- * @param script The target script to call
- * @param input The allocation to source data from
- * @param output the allocation to write date into
- * @param usrData The user definied params to pass to the root script.  May be
- *                NULL.
- * @param usrDataLen The size of the userData structure.  This will be used to
- *                   perform a shallow copy of the data if necessary.
- * @param sc Extra control infomation used to select a sub-region of the
- *           allocation to be processed or suggest a walking strategy.  May be
- *           NULL.
- *
- */
-extern void __attribute__((overloadable))
-    rsForEach(rs_script script, rs_allocation input, rs_allocation output,
-              const void * usrData, size_t usrDataLen, const rs_script_call_t *);
+_RS_RUNTIME short __attribute__((overloadable, always_inline)) rsClamp(short amount, short low, short high);
 /**
  * \overload
  */
-extern void __attribute__((overloadable))
-    rsForEach(rs_script script, rs_allocation input, rs_allocation output,
-              const void * usrData, size_t usrDataLen);
+_RS_RUNTIME uchar __attribute__((overloadable, always_inline)) rsClamp(uchar amount, uchar low, uchar high);
 /**
  * \overload
  */
-extern void __attribute__((overloadable))
-    rsForEach(rs_script script, rs_allocation input, rs_allocation output);
-#endif
+_RS_RUNTIME char __attribute__((overloadable, always_inline)) rsClamp(char amount, char low, char high);
 
 
 /**
- * Atomic add one to the value at addr.
- * Equal to rsAtomicAdd(addr, 1)
- *
- * @param addr Address of value to increment
- *
- * @return old value
+ * Computes 6 frustum planes from the view projection matrix
+ * @param viewProj matrix to extract planes from
+ * @param left plane
+ * @param right plane
+ * @param top plane
+ * @param bottom plane
+ * @param near plane
+ * @param far plane
  */
-extern int32_t __attribute__((overloadable))
-    rsAtomicInc(volatile int32_t* addr);
-/**
- * Atomic add one to the value at addr.
- * Equal to rsAtomicAdd(addr, 1)
- *
- * @param addr Address of value to increment
- *
- * @return old value
- */
-extern uint32_t __attribute__((overloadable))
-    rsAtomicInc(volatile uint32_t* addr);
+__inline__ static void __attribute__((overloadable, always_inline))
+rsExtractFrustumPlanes(const rs_matrix4x4 *viewProj,
+                         float4 *left, float4 *right,
+                         float4 *top, float4 *bottom,
+                         float4 *near, float4 *far) {
+    // x y z w = a b c d in the plane equation
+    left->x = viewProj->m[3] + viewProj->m[0];
+    left->y = viewProj->m[7] + viewProj->m[4];
+    left->z = viewProj->m[11] + viewProj->m[8];
+    left->w = viewProj->m[15] + viewProj->m[12];
+
+    right->x = viewProj->m[3] - viewProj->m[0];
+    right->y = viewProj->m[7] - viewProj->m[4];
+    right->z = viewProj->m[11] - viewProj->m[8];
+    right->w = viewProj->m[15] - viewProj->m[12];
+
+    top->x = viewProj->m[3] - viewProj->m[1];
+    top->y = viewProj->m[7] - viewProj->m[5];
+    top->z = viewProj->m[11] - viewProj->m[9];
+    top->w = viewProj->m[15] - viewProj->m[13];
+
+    bottom->x = viewProj->m[3] + viewProj->m[1];
+    bottom->y = viewProj->m[7] + viewProj->m[5];
+    bottom->z = viewProj->m[11] + viewProj->m[9];
+    bottom->w = viewProj->m[15] + viewProj->m[13];
+
+    near->x = viewProj->m[3] + viewProj->m[2];
+    near->y = viewProj->m[7] + viewProj->m[6];
+    near->z = viewProj->m[11] + viewProj->m[10];
+    near->w = viewProj->m[15] + viewProj->m[14];
+
+    far->x = viewProj->m[3] - viewProj->m[2];
+    far->y = viewProj->m[7] - viewProj->m[6];
+    far->z = viewProj->m[11] - viewProj->m[10];
+    far->w = viewProj->m[15] - viewProj->m[14];
+
+    float len = length(left->xyz);
+    *left /= len;
+    len = length(right->xyz);
+    *right /= len;
+    len = length(top->xyz);
+    *top /= len;
+    len = length(bottom->xyz);
+    *bottom /= len;
+    len = length(near->xyz);
+    *near /= len;
+    len = length(far->xyz);
+    *far /= len;
+}
 
 /**
- * Atomic subtract one from the value at addr. Equal to rsAtomicSub(addr, 1)
- *
- * @param addr Address of value to decrement
- *
- * @return old value
+ * Checks if a sphere is withing the 6 frustum planes
+ * @param sphere float4 representing the sphere
+ * @param left plane
+ * @param right plane
+ * @param top plane
+ * @param bottom plane
+ * @param near plane
+ * @param far plane
  */
-extern int32_t __attribute__((overloadable))
-    rsAtomicDec(volatile int32_t* addr);
-/**
- * Atomic subtract one from the value at addr. Equal to rsAtomicSub(addr, 1)
- *
- * @param addr Address of value to decrement
- *
- * @return old value
- */
-extern uint32_t __attribute__((overloadable))
-    rsAtomicDec(volatile uint32_t* addr);
+__inline__ static bool __attribute__((overloadable, always_inline))
+rsIsSphereInFrustum(float4 *sphere,
+                      float4 *left, float4 *right,
+                      float4 *top, float4 *bottom,
+                      float4 *near, float4 *far) {
+
+    float distToCenter = dot(left->xyz, sphere->xyz) + left->w;
+    if (distToCenter < -sphere->w) {
+        return false;
+    }
+    distToCenter = dot(right->xyz, sphere->xyz) + right->w;
+    if (distToCenter < -sphere->w) {
+        return false;
+    }
+    distToCenter = dot(top->xyz, sphere->xyz) + top->w;
+    if (distToCenter < -sphere->w) {
+        return false;
+    }
+    distToCenter = dot(bottom->xyz, sphere->xyz) + bottom->w;
+    if (distToCenter < -sphere->w) {
+        return false;
+    }
+    distToCenter = dot(near->xyz, sphere->xyz) + near->w;
+    if (distToCenter < -sphere->w) {
+        return false;
+    }
+    distToCenter = dot(far->xyz, sphere->xyz) + far->w;
+    if (distToCenter < -sphere->w) {
+        return false;
+    }
+    return true;
+}
+
 
 /**
- * Atomic add a value to the value at addr.  addr[0] += value
+ * Pack floating point (0-1) RGB values into a uchar4.  The alpha component is
+ * set to 255 (1.0).
  *
- * @param addr Address of value to modify
- * @param value Amount to add to the value at addr
+ * @param r
+ * @param g
+ * @param b
  *
- * @return old value
+ * @return uchar4
  */
-extern int32_t __attribute__((overloadable))
-    rsAtomicAdd(volatile int32_t* addr, int32_t value);
-/**
- * Atomic add a value to the value at addr.  addr[0] += value
- *
- * @param addr Address of value to modify
- * @param value Amount to add to the value at addr
- *
- * @return old value
- */
-extern uint32_t __attribute__((overloadable))
-    rsAtomicAdd(volatile uint32_t* addr, uint32_t value);
+_RS_RUNTIME uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b);
 
 /**
- * Atomic Subtract a value from the value at addr.  addr[0] -= value
+ * Pack floating point (0-1) RGBA values into a uchar4.
  *
- * @param addr Address of value to modify
- * @param value Amount to subtract from the value at addr
+ * @param r
+ * @param g
+ * @param b
+ * @param a
  *
- * @return old value
+ * @return uchar4
  */
-extern int32_t __attribute__((overloadable))
-    rsAtomicSub(volatile int32_t* addr, int32_t value);
-/**
- * Atomic Subtract a value from the value at addr.  addr[0] -= value
- *
- * @param addr Address of value to modify
- * @param value Amount to subtract from the value at addr
- *
- * @return old value
- */
-extern uint32_t __attribute__((overloadable))
-    rsAtomicSub(volatile uint32_t* addr, uint32_t value);
+_RS_RUNTIME uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b, float a);
 
 /**
- * Atomic Bitwise and a value from the value at addr.  addr[0] &= value
+ * Pack floating point (0-1) RGB values into a uchar4.  The alpha component is
+ * set to 255 (1.0).
  *
- * @param addr Address of value to modify
- * @param value Amount to and with the value at addr
+ * @param color
  *
- * @return old value
+ * @return uchar4
  */
-extern int32_t __attribute__((overloadable))
-    rsAtomicAnd(volatile int32_t* addr, int32_t value);
-/**
- * Atomic Bitwise and a value from the value at addr.  addr[0] &= value
- *
- * @param addr Address of value to modify
- * @param value Amount to and with the value at addr
- *
- * @return old value
- */
-extern uint32_t __attribute__((overloadable))
-    rsAtomicAnd(volatile uint32_t* addr, uint32_t value);
+_RS_RUNTIME uchar4 __attribute__((overloadable)) rsPackColorTo8888(float3 color);
 
 /**
- * Atomic Bitwise or a value from the value at addr.  addr[0] |= value
+ * Pack floating point (0-1) RGBA values into a uchar4.
  *
- * @param addr Address of value to modify
- * @param value Amount to or with the value at addr
+ * @param color
  *
- * @return old value
+ * @return uchar4
  */
-extern int32_t __attribute__((overloadable))
-    rsAtomicOr(volatile int32_t* addr, int32_t value);
-/**
- * Atomic Bitwise or a value from the value at addr.  addr[0] |= value
- *
- * @param addr Address of value to modify
- * @param value Amount to or with the value at addr
- *
- * @return old value
- */
-extern uint32_t __attribute__((overloadable))
-    rsAtomicOr(volatile uint32_t* addr, uint32_t value);
+_RS_RUNTIME uchar4 __attribute__((overloadable)) rsPackColorTo8888(float4 color);
 
 /**
- * Atomic Bitwise xor a value from the value at addr.  addr[0] ^= value
+ * Unpack a uchar4 color to float4.  The resulting float range will be (0-1).
  *
- * @param addr Address of value to modify
- * @param value Amount to xor with the value at addr
+ * @param c
  *
- * @return old value
+ * @return float4
  */
-extern uint32_t __attribute__((overloadable))
-    rsAtomicXor(volatile uint32_t* addr, uint32_t value);
-/**
- * Atomic Bitwise xor a value from the value at addr.  addr[0] ^= value
- *
- * @param addr Address of value to modify
- * @param value Amount to xor with the value at addr
- *
- * @return old value
- */
-extern int32_t __attribute__((overloadable))
-    rsAtomicXor(volatile int32_t* addr, int32_t value);
-
-/**
- * Atomic Set the value at addr to the min of addr and value
- * addr[0] = rsMin(addr[0], value)
- *
- * @param addr Address of value to modify
- * @param value comparison value
- *
- * @return old value
- */
-extern uint32_t __attribute__((overloadable))
-    rsAtomicMin(volatile uint32_t* addr, uint32_t value);
-/**
- * Atomic Set the value at addr to the min of addr and value
- * addr[0] = rsMin(addr[0], value)
- *
- * @param addr Address of value to modify
- * @param value comparison value
- *
- * @return old value
- */
-extern int32_t __attribute__((overloadable))
-    rsAtomicMin(volatile int32_t* addr, int32_t value);
-
-/**
- * Atomic Set the value at addr to the max of addr and value
- * addr[0] = rsMax(addr[0], value)
- *
- * @param addr Address of value to modify
- * @param value comparison value
- *
- * @return old value
- */
-extern uint32_t __attribute__((overloadable))
-    rsAtomicMax(volatile uint32_t* addr, uint32_t value);
-/**
- * Atomic Set the value at addr to the max of addr and value
- * addr[0] = rsMin(addr[0], value)
- *
- * @param addr Address of value to modify
- * @param value comparison value
- *
- * @return old value
- */
-extern int32_t __attribute__((overloadable))
-    rsAtomicMax(volatile int32_t* addr, int32_t value);
-
-/**
- * Compare-and-set operation with a full memory barrier.
- *
- * If the value at addr matches compareValue then newValue is written.
- *
- * @param addr The address to compare and replace if the compare passes.
- * @param compareValue The value to test addr[0] against.
- * @param newValue The value to write if the test passes.
- *
- * @return old value
- */
-extern int32_t __attribute__((overloadable))
-    rsAtomicCas(volatile int32_t* addr, int32_t compareValue, int32_t newValue);
-
-/**
- * Compare-and-set operation with a full memory barrier.
- *
- * If the value at addr matches compareValue then newValue is written.
- *
- * @param addr The address to compare and replace if the compare passes.
- * @param compareValue The value to test addr[0] against.
- * @param newValue The value to write if the test passes.
- *
- * @return old value
- */
-extern uint32_t __attribute__((overloadable))
-    rsAtomicCas(volatile uint32_t* addr, int32_t compareValue, int32_t newValue);
+_RS_RUNTIME float4 rsUnpackColor8888(uchar4 c);
 
 
 #endif
diff --git a/scriptc/rs_matrix.rsh b/scriptc/rs_matrix.rsh
new file mode 100644
index 0000000..ab3cd3b
--- /dev/null
+++ b/scriptc/rs_matrix.rsh
@@ -0,0 +1,378 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** @file rs_matrix.rsh
+ *  \brief Matrix routines
+ *
+ *
+ */
+
+#ifndef __RS_MATRIX_RSH__
+#define __RS_MATRIX_RSH__
+
+/**
+ * Set one element of a matrix.
+ *
+ * @param m The matrix to be set
+ * @param row
+ * @param col
+ * @param v
+ *
+ * @return void
+ */
+_RS_RUNTIME void __attribute__((overloadable))
+rsMatrixSet(rs_matrix4x4 *m, uint32_t row, uint32_t col, float v);
+/**
+ * \overload
+ */
+_RS_RUNTIME void __attribute__((overloadable))
+rsMatrixSet(rs_matrix3x3 *m, uint32_t row, uint32_t col, float v);
+/**
+ * \overload
+ */
+_RS_RUNTIME void __attribute__((overloadable))
+rsMatrixSet(rs_matrix2x2 *m, uint32_t row, uint32_t col, float v);
+
+/**
+ * Get one element of a matrix.
+ *
+ * @param m The matrix to read from
+ * @param row
+ * @param col
+ *
+ * @return float
+ */
+_RS_RUNTIME float __attribute__((overloadable))
+rsMatrixGet(const rs_matrix4x4 *m, uint32_t row, uint32_t col);
+/**
+ * \overload
+ */
+_RS_RUNTIME float __attribute__((overloadable))
+rsMatrixGet(const rs_matrix3x3 *m, uint32_t row, uint32_t col);
+/**
+ * \overload
+ */
+_RS_RUNTIME float __attribute__((overloadable))
+rsMatrixGet(const rs_matrix2x2 *m, uint32_t row, uint32_t col);
+
+/**
+ * Set the elements of a matrix to the identity matrix.
+ *
+ * @param m
+ */
+extern void __attribute__((overloadable)) rsMatrixLoadIdentity(rs_matrix4x4 *m);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable)) rsMatrixLoadIdentity(rs_matrix3x3 *m);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable)) rsMatrixLoadIdentity(rs_matrix2x2 *m);
+
+/**
+ * Set the elements of a matrix from an array of floats.
+ *
+ * @param m
+ */
+extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix4x4 *m, const float *v);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix3x3 *m, const float *v);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix2x2 *m, const float *v);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix4x4 *m, const rs_matrix4x4 *v);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix4x4 *m, const rs_matrix3x3 *v);
+
+/**
+ * Set the elements of a matrix from another matrix.
+ *
+ * @param m
+ */
+extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix4x4 *m, const rs_matrix2x2 *v);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix3x3 *m, const rs_matrix3x3 *v);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix2x2 *m, const rs_matrix2x2 *v);
+
+/**
+ * Load a rotation matrix.
+ *
+ * @param m
+ * @param rot
+ * @param x
+ * @param y
+ * @param z
+ */
+extern void __attribute__((overloadable))
+rsMatrixLoadRotate(rs_matrix4x4 *m, float rot, float x, float y, float z);
+
+/**
+ * Load a scale matrix.
+ *
+ * @param m
+ * @param x
+ * @param y
+ * @param z
+ */
+extern void __attribute__((overloadable))
+rsMatrixLoadScale(rs_matrix4x4 *m, float x, float y, float z);
+
+/**
+ * Load a translation matrix.
+ *
+ * @param m
+ * @param x
+ * @param y
+ * @param z
+ */
+extern void __attribute__((overloadable))
+rsMatrixLoadTranslate(rs_matrix4x4 *m, float x, float y, float z);
+
+/**
+ * Multiply two matrix (lhs, rhs) and place the result in m.
+ *
+ * @param m
+ * @param lhs
+ * @param rhs
+ */
+extern void __attribute__((overloadable))
+rsMatrixLoadMultiply(rs_matrix4x4 *m, const rs_matrix4x4 *lhs, const rs_matrix4x4 *rhs);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable))
+rsMatrixLoadMultiply(rs_matrix3x3 *m, const rs_matrix3x3 *lhs, const rs_matrix3x3 *rhs);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable))
+rsMatrixLoadMultiply(rs_matrix2x2 *m, const rs_matrix2x2 *lhs, const rs_matrix2x2 *rhs);
+
+/**
+ * Multiply the matrix m by rhs and place the result back into m.
+ *
+ * @param m (lhs)
+ * @param rhs
+ */
+extern void __attribute__((overloadable))
+rsMatrixMultiply(rs_matrix4x4 *m, const rs_matrix4x4 *rhs);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable))
+rsMatrixMultiply(rs_matrix3x3 *m, const rs_matrix3x3 *rhs);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable))
+rsMatrixMultiply(rs_matrix2x2 *m, const rs_matrix2x2 *rhs);
+
+/**
+ * Multiple matrix m with a rotation matrix
+ *
+ * @param m
+ * @param rot
+ * @param x
+ * @param y
+ * @param z
+ */
+extern void __attribute__((overloadable))
+rsMatrixRotate(rs_matrix4x4 *m, float rot, float x, float y, float z);
+
+/**
+ * Multiple matrix m with a scale matrix
+ *
+ * @param m
+ * @param x
+ * @param y
+ * @param z
+ */
+extern void __attribute__((overloadable))
+rsMatrixScale(rs_matrix4x4 *m, float x, float y, float z);
+
+/**
+ * Multiple matrix m with a translation matrix
+ *
+ * @param m
+ * @param x
+ * @param y
+ * @param z
+ */
+extern void __attribute__((overloadable))
+rsMatrixTranslate(rs_matrix4x4 *m, float x, float y, float z);
+
+/**
+ * Load an Ortho projection matrix constructed from the 6 planes
+ *
+ * @param m
+ * @param left
+ * @param right
+ * @param bottom
+ * @param top
+ * @param near
+ * @param far
+ */
+extern void __attribute__((overloadable))
+rsMatrixLoadOrtho(rs_matrix4x4 *m, float left, float right, float bottom, float top, float near, float far);
+
+/**
+ * Load an Frustum projection matrix constructed from the 6 planes
+ *
+ * @param m
+ * @param left
+ * @param right
+ * @param bottom
+ * @param top
+ * @param near
+ * @param far
+ */
+extern void __attribute__((overloadable))
+rsMatrixLoadFrustum(rs_matrix4x4 *m, float left, float right, float bottom, float top, float near, float far);
+
+/**
+ * Load an perspective projection matrix constructed from the 6 planes
+ *
+ * @param m
+ * @param fovy Field of view, in degrees along the Y axis.
+ * @param aspect Ratio of x / y.
+ * @param near
+ * @param far
+ */
+extern void __attribute__((overloadable))
+rsMatrixLoadPerspective(rs_matrix4x4* m, float fovy, float aspect, float near, float far);
+
+#if !defined(RS_VERSION) || (RS_VERSION < 14)
+/**
+ * Multiply a vector by a matrix and return the result vector.
+ * API version 10-13
+ */
+_RS_RUNTIME float4 __attribute__((overloadable))
+rsMatrixMultiply(rs_matrix4x4 *m, float4 in);
+
+/**
+ * \overload
+ */
+_RS_RUNTIME float4 __attribute__((overloadable))
+rsMatrixMultiply(rs_matrix4x4 *m, float3 in);
+
+/**
+ * \overload
+ */
+_RS_RUNTIME float4 __attribute__((overloadable))
+rsMatrixMultiply(rs_matrix4x4 *m, float2 in);
+
+/**
+ * \overload
+ */
+_RS_RUNTIME float3 __attribute__((overloadable))
+rsMatrixMultiply(rs_matrix3x3 *m, float3 in);
+
+/**
+ * \overload
+ */
+_RS_RUNTIME float3 __attribute__((overloadable))
+rsMatrixMultiply(rs_matrix3x3 *m, float2 in);
+
+/**
+ * \overload
+ */
+_RS_RUNTIME float2 __attribute__((overloadable))
+rsMatrixMultiply(rs_matrix2x2 *m, float2 in);
+#else
+/**
+ * Multiply a vector by a matrix and return the result vector.
+ * API version 10-13
+ */
+_RS_RUNTIME float4 __attribute__((overloadable))
+rsMatrixMultiply(const rs_matrix4x4 *m, float4 in);
+
+/**
+ * \overload
+ */
+_RS_RUNTIME float4 __attribute__((overloadable))
+rsMatrixMultiply(const rs_matrix4x4 *m, float3 in);
+
+/**
+ * \overload
+ */
+_RS_RUNTIME float4 __attribute__((overloadable))
+rsMatrixMultiply(const rs_matrix4x4 *m, float2 in);
+
+/**
+ * \overload
+ */
+_RS_RUNTIME float3 __attribute__((overloadable))
+rsMatrixMultiply(const rs_matrix3x3 *m, float3 in);
+
+/**
+ * \overload
+ */
+_RS_RUNTIME float3 __attribute__((overloadable))
+rsMatrixMultiply(const rs_matrix3x3 *m, float2 in);
+
+/**
+ * \overload
+ */
+_RS_RUNTIME float2 __attribute__((overloadable))
+rsMatrixMultiply(const rs_matrix2x2 *m, float2 in);
+#endif
+
+
+/**
+ * Returns true if the matrix was successfully inversed
+ *
+ * @param m
+ */
+extern bool __attribute__((overloadable)) rsMatrixInverse(rs_matrix4x4 *m);
+
+/**
+ * Returns true if the matrix was successfully inversed and transposed.
+ *
+ * @param m
+ */
+extern bool __attribute__((overloadable)) rsMatrixInverseTranspose(rs_matrix4x4 *m);
+
+/**
+ * Transpose the matrix m.
+ *
+ * @param m
+ */
+extern void __attribute__((overloadable)) rsMatrixTranspose(rs_matrix4x4 *m);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable)) rsMatrixTranspose(rs_matrix3x3 *m);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable)) rsMatrixTranspose(rs_matrix2x2 *m);
+
+
+#endif
diff --git a/scriptc/rs_object.rsh b/scriptc/rs_object.rsh
new file mode 100644
index 0000000..a431219
--- /dev/null
+++ b/scriptc/rs_object.rsh
@@ -0,0 +1,205 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** @file rs_object.rsh
+ *  \brief Object routines
+ *
+ *
+ */
+
+#ifndef __RS_OBJECT_RSH__
+#define __RS_OBJECT_RSH__
+
+
+/**
+ * Copy reference to the specified object.
+ *
+ * @param dst
+ * @param src
+ */
+extern void __attribute__((overloadable))
+    rsSetObject(rs_element *dst, rs_element src);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable))
+    rsSetObject(rs_type *dst, rs_type src);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable))
+    rsSetObject(rs_allocation *dst, rs_allocation src);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable))
+    rsSetObject(rs_sampler *dst, rs_sampler src);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable))
+    rsSetObject(rs_script *dst, rs_script src);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable))
+    rsSetObject(rs_mesh *dst, rs_mesh src);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable))
+    rsSetObject(rs_program_fragment *dst, rs_program_fragment src);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable))
+    rsSetObject(rs_program_vertex *dst, rs_program_vertex src);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable))
+    rsSetObject(rs_program_raster *dst, rs_program_raster src);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable))
+    rsSetObject(rs_program_store *dst, rs_program_store src);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable))
+    rsSetObject(rs_font *dst, rs_font src);
+
+/**
+ * Sets the object to NULL.
+ *
+ * @return bool
+ */
+extern void __attribute__((overloadable))
+    rsClearObject(rs_element *dst);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable))
+    rsClearObject(rs_type *dst);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable))
+    rsClearObject(rs_allocation *dst);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable))
+    rsClearObject(rs_sampler *dst);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable))
+    rsClearObject(rs_script *dst);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable))
+    rsClearObject(rs_mesh *dst);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable))
+    rsClearObject(rs_program_fragment *dst);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable))
+    rsClearObject(rs_program_vertex *dst);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable))
+    rsClearObject(rs_program_raster *dst);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable))
+    rsClearObject(rs_program_store *dst);
+/**
+ * \overload
+ */
+extern void __attribute__((overloadable))
+    rsClearObject(rs_font *dst);
+
+
+
+/**
+ * Tests if the object is valid.  Returns true if the object is valid, false if
+ * it is NULL.
+ *
+ * @return bool
+ */
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_element);
+/**
+ * \overload
+ */
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_type);
+/**
+ * \overload
+ */
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_allocation);
+/**
+ * \overload
+ */
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_sampler);
+/**
+ * \overload
+ */
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_script);
+/**
+ * \overload
+ */
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_mesh);
+/**
+ * \overload
+ */
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_program_fragment);
+/**
+ * \overload
+ */
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_program_vertex);
+/**
+ * \overload
+ */
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_program_raster);
+/**
+ * \overload
+ */
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_program_store);
+/**
+ * \overload
+ */
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_font);
+
+#endif
diff --git a/scriptc/rs_quaternion.rsh b/scriptc/rs_quaternion.rsh
new file mode 100644
index 0000000..36e6736
--- /dev/null
+++ b/scriptc/rs_quaternion.rsh
@@ -0,0 +1,257 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** @file rs_matrix.rsh
+ *  \brief Quaternion routines
+ *
+ *
+ */
+
+#ifndef __RS_QUATERNION_RSH__
+#define __RS_QUATERNION_RSH__
+
+
+/**
+ * Set the quaternion components
+ * @param w component
+ * @param x component
+ * @param y component
+ * @param z component
+ */
+static void __attribute__((overloadable))
+rsQuaternionSet(rs_quaternion *q, float w, float x, float y, float z) {
+    q->w = w;
+    q->x = x;
+    q->y = y;
+    q->z = z;
+}
+
+/**
+ * Set the quaternion from another quaternion
+ * @param q destination quaternion
+ * @param rhs source quaternion
+ */
+static void __attribute__((overloadable))
+rsQuaternionSet(rs_quaternion *q, const rs_quaternion *rhs) {
+    q->w = rhs->w;
+    q->x = rhs->x;
+    q->y = rhs->y;
+    q->z = rhs->z;
+}
+
+/**
+ * Multiply quaternion by a scalar
+ * @param q quaternion to multiply
+ * @param s scalar
+ */
+static void __attribute__((overloadable))
+rsQuaternionMultiply(rs_quaternion *q, float s) {
+    q->w *= s;
+    q->x *= s;
+    q->y *= s;
+    q->z *= s;
+}
+
+/**
+ * Multiply quaternion by another quaternion
+ * @param q destination quaternion
+ * @param rhs right hand side quaternion to multiply by
+ */
+static void __attribute__((overloadable))
+rsQuaternionMultiply(rs_quaternion *q, const rs_quaternion *rhs) {
+    q->w = -q->x*rhs->x - q->y*rhs->y - q->z*rhs->z + q->w*rhs->w;
+    q->x =  q->x*rhs->w + q->y*rhs->z - q->z*rhs->y + q->w*rhs->x;
+    q->y = -q->x*rhs->z + q->y*rhs->w + q->z*rhs->x + q->w*rhs->y;
+    q->z =  q->x*rhs->y - q->y*rhs->x + q->z*rhs->w + q->w*rhs->z;
+}
+
+/**
+ * Add two quaternions
+ * @param q destination quaternion to add to
+ * @param rsh right hand side quaternion to add
+ */
+static void
+rsQuaternionAdd(rs_quaternion *q, const rs_quaternion *rhs) {
+    q->w *= rhs->w;
+    q->x *= rhs->x;
+    q->y *= rhs->y;
+    q->z *= rhs->z;
+}
+
+/**
+ * Loads a quaternion that represents a rotation about an arbitrary unit vector
+ * @param q quaternion to set
+ * @param rot angle to rotate by
+ * @param x component of a vector
+ * @param y component of a vector
+ * @param x component of a vector
+ */
+static void
+rsQuaternionLoadRotateUnit(rs_quaternion *q, float rot, float x, float y, float z) {
+    rot *= (float)(M_PI / 180.0f) * 0.5f;
+    float c = cos(rot);
+    float s = sin(rot);
+
+    q->w = c;
+    q->x = x * s;
+    q->y = y * s;
+    q->z = z * s;
+}
+
+/**
+ * Loads a quaternion that represents a rotation about an arbitrary vector
+ * (doesn't have to be unit)
+ * @param q quaternion to set
+ * @param rot angle to rotate by
+ * @param x component of a vector
+ * @param y component of a vector
+ * @param x component of a vector
+ */
+static void
+rsQuaternionLoadRotate(rs_quaternion *q, float rot, float x, float y, float z) {
+    const float len = x*x + y*y + z*z;
+    if (len != 1) {
+        const float recipLen = 1.f / sqrt(len);
+        x *= recipLen;
+        y *= recipLen;
+        z *= recipLen;
+    }
+    rsQuaternionLoadRotateUnit(q, rot, x, y, z);
+}
+
+/**
+ * Conjugates the quaternion
+ * @param q quaternion to conjugate
+ */
+static void
+rsQuaternionConjugate(rs_quaternion *q) {
+    q->x = -q->x;
+    q->y = -q->y;
+    q->z = -q->z;
+}
+
+/**
+ * Dot product of two quaternions
+ * @param q0 first quaternion
+ * @param q1 second quaternion
+ * @return dot product between q0 and q1
+ */
+static float
+rsQuaternionDot(const rs_quaternion *q0, const rs_quaternion *q1) {
+    return q0->w*q1->w + q0->x*q1->x + q0->y*q1->y + q0->z*q1->z;
+}
+
+/**
+ * Normalizes the quaternion
+ * @param q quaternion to normalize
+ */
+static void
+rsQuaternionNormalize(rs_quaternion *q) {
+    const float len = rsQuaternionDot(q, q);
+    if (len != 1) {
+        const float recipLen = 1.f / sqrt(len);
+        rsQuaternionMultiply(q, recipLen);
+    }
+}
+
+/**
+ * Performs spherical linear interpolation between two quaternions
+ * @param q result quaternion from interpolation
+ * @param q0 first param
+ * @param q1 second param
+ * @param t how much to interpolate by
+ */
+static void
+rsQuaternionSlerp(rs_quaternion *q, const rs_quaternion *q0, const rs_quaternion *q1, float t) {
+    if (t <= 0.0f) {
+        rsQuaternionSet(q, q0);
+        return;
+    }
+    if (t >= 1.0f) {
+        rsQuaternionSet(q, q1);
+        return;
+    }
+
+    rs_quaternion tempq0, tempq1;
+    rsQuaternionSet(&tempq0, q0);
+    rsQuaternionSet(&tempq1, q1);
+
+    float angle = rsQuaternionDot(q0, q1);
+    if (angle < 0) {
+        rsQuaternionMultiply(&tempq0, -1.0f);
+        angle *= -1.0f;
+    }
+
+    float scale, invScale;
+    if (angle + 1.0f > 0.05f) {
+        if (1.0f - angle >= 0.05f) {
+            float theta = acos(angle);
+            float invSinTheta = 1.0f / sin(theta);
+            scale = sin(theta * (1.0f - t)) * invSinTheta;
+            invScale = sin(theta * t) * invSinTheta;
+        } else {
+            scale = 1.0f - t;
+            invScale = t;
+        }
+    } else {
+        rsQuaternionSet(&tempq1, tempq0.z, -tempq0.y, tempq0.x, -tempq0.w);
+        scale = sin(M_PI * (0.5f - t));
+        invScale = sin(M_PI * t);
+    }
+
+    rsQuaternionSet(q, tempq0.w*scale + tempq1.w*invScale, tempq0.x*scale + tempq1.x*invScale,
+                        tempq0.y*scale + tempq1.y*invScale, tempq0.z*scale + tempq1.z*invScale);
+}
+
+/**
+ * Computes rotation matrix from the normalized quaternion
+ * @param m resulting matrix
+ * @param p normalized quaternion
+ */
+static void rsQuaternionGetMatrixUnit(rs_matrix4x4 *m, const rs_quaternion *q) {
+    float x2 = 2.0f * q->x * q->x;
+    float y2 = 2.0f * q->y * q->y;
+    float z2 = 2.0f * q->z * q->z;
+    float xy = 2.0f * q->x * q->y;
+    float wz = 2.0f * q->w * q->z;
+    float xz = 2.0f * q->x * q->z;
+    float wy = 2.0f * q->w * q->y;
+    float wx = 2.0f * q->w * q->x;
+    float yz = 2.0f * q->y * q->z;
+
+    m->m[0] = 1.0f - y2 - z2;
+    m->m[1] = xy - wz;
+    m->m[2] = xz + wy;
+    m->m[3] = 0.0f;
+
+    m->m[4] = xy + wz;
+    m->m[5] = 1.0f - x2 - z2;
+    m->m[6] = yz - wx;
+    m->m[7] = 0.0f;
+
+    m->m[8] = xz - wy;
+    m->m[9] = yz - wx;
+    m->m[10] = 1.0f - x2 - y2;
+    m->m[11] = 0.0f;
+
+    m->m[12] = 0.0f;
+    m->m[13] = 0.0f;
+    m->m[14] = 0.0f;
+    m->m[15] = 1.0f;
+}
+
+#endif
+
diff --git a/scriptc/rs_time.rsh b/scriptc/rs_time.rsh
index f1abed6..f8f297d 100644
--- a/scriptc/rs_time.rsh
+++ b/scriptc/rs_time.rsh
@@ -1,3 +1,25 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** @file rs_time.rsh
+ *  \brief Time routines
+ *
+ *
+ */
+
 #ifndef __RS_TIME_RSH__
 #define __RS_TIME_RSH__
 
diff --git a/scriptc/rs_types.rsh b/scriptc/rs_types.rsh
index 9a79f5e..875beb9 100644
--- a/scriptc/rs_types.rsh
+++ b/scriptc/rs_types.rsh
@@ -1,4 +1,20 @@
-/** @file rs_time.rsh
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** @file rs_types.rsh
  *
  *  Define the standard Renderscript types
  *