Merge Filament's math library

This math library was derived from Android's and is API compatible.
It adds new useful types (quat and half) as well as many missing
functions and optimizations.

The half type (fp16) is going to be used for HDR/color management.

Test: mat_test, quat_test, half_test and vec_test

Change-Id: I4c61efb085d6aa2cf5b43cdd194719b3e855aa9b
diff --git a/include/ui/TMatHelpers.h b/include/ui/TMatHelpers.h
index a6aadca..9daca08 100644
--- a/include/ui/TMatHelpers.h
+++ b/include/ui/TMatHelpers.h
@@ -14,25 +14,35 @@
  * limitations under the License.
  */
 
-#ifndef TMAT_IMPLEMENTATION
-#error "Don't include TMatHelpers.h directly. use ui/mat*.h instead"
-#else
-#undef TMAT_IMPLEMENTATION
-#endif
+#ifndef UI_TMATHELPERS_H_
+#define UI_TMATHELPERS_H_
 
-
-#ifndef UI_TMAT_HELPERS_H
-#define UI_TMAT_HELPERS_H
-
+#include <math.h>
 #include <stdint.h>
 #include <sys/types.h>
-#include <math.h>
-#include <utils/Debug.h>
-#include <utils/String8.h>
+
+#include <cmath>
+#include <exception>
+#include <iomanip>
+#include <stdexcept>
+
+#include <ui/quat.h>
+#include <ui/TVecHelpers.h>
+
+#include  <utils/String8.h>
+
+#ifdef __cplusplus
+#   define LIKELY( exp )    (__builtin_expect( !!(exp), true ))
+#   define UNLIKELY( exp )  (__builtin_expect( !!(exp), false ))
+#else
+#   define LIKELY( exp )    (__builtin_expect( !!(exp), 1 ))
+#   define UNLIKELY( exp )  (__builtin_expect( !!(exp), 0 ))
+#endif
 
 #define PURE __attribute__((pure))
 
 namespace android {
+namespace details {
 // -------------------------------------------------------------------------------------
 
 /*
@@ -48,63 +58,177 @@
 
 namespace matrix {
 
-inline int     PURE transpose(int v)    { return v; }
-inline float   PURE transpose(float v)  { return v; }
-inline double  PURE transpose(double v) { return v; }
+inline constexpr int     transpose(int v)    { return v; }
+inline constexpr float   transpose(float v)  { return v; }
+inline constexpr double  transpose(double v) { return v; }
 
-inline int     PURE trace(int v)    { return v; }
-inline float   PURE trace(float v)  { return v; }
-inline double  PURE trace(double v) { return v; }
+inline constexpr int     trace(int v)    { return v; }
+inline constexpr float   trace(float v)  { return v; }
+inline constexpr double  trace(double v) { return v; }
 
+/*
+ * Matrix inversion
+ */
 template<typename MATRIX>
-MATRIX PURE inverse(const MATRIX& src) {
-
-    COMPILE_TIME_ASSERT_FUNCTION_SCOPE( MATRIX::COL_SIZE == MATRIX::ROW_SIZE );
-
-    typename MATRIX::value_type t;
-    const size_t N = MATRIX::col_size();
-    size_t swap;
+MATRIX PURE gaussJordanInverse(const MATRIX& src) {
+    typedef typename MATRIX::value_type T;
+    static constexpr unsigned int N = MATRIX::NUM_ROWS;
     MATRIX tmp(src);
-    MATRIX inverse(1);
+    MATRIX inverted(1);
 
-    for (size_t i=0 ; i<N ; i++) {
-        // look for largest element in column
-        swap = i;
-        for (size_t j=i+1 ; j<N ; j++) {
-            if (fabs(tmp[j][i]) > fabs(tmp[i][i])) {
+    for (size_t i = 0; i < N; ++i) {
+        // look for largest element in i'th column
+        size_t swap = i;
+        T t = std::abs(tmp[i][i]);
+        for (size_t j = i + 1; j < N; ++j) {
+            const T t2 = std::abs(tmp[j][i]);
+            if (t2 > t) {
                 swap = j;
+                t = t2;
             }
         }
 
         if (swap != i) {
-            /* swap rows. */
-            for (size_t k=0 ; k<N ; k++) {
-                t = tmp[i][k];
-                tmp[i][k] = tmp[swap][k];
-                tmp[swap][k] = t;
-
-                t = inverse[i][k];
-                inverse[i][k] = inverse[swap][k];
-                inverse[swap][k] = t;
-            }
+            // swap columns.
+            std::swap(tmp[i], tmp[swap]);
+            std::swap(inverted[i], inverted[swap]);
         }
 
-        t = 1 / tmp[i][i];
-        for (size_t k=0 ; k<N ; k++) {
-            tmp[i][k] *= t;
-            inverse[i][k] *= t;
+        const T denom(tmp[i][i]);
+        for (size_t k = 0; k < N; ++k) {
+            tmp[i][k] /= denom;
+            inverted[i][k] /= denom;
         }
-        for (size_t j=0 ; j<N ; j++) {
+
+        // Factor out the lower triangle
+        for (size_t j = 0; j < N; ++j) {
             if (j != i) {
-                t = tmp[j][i];
-                for (size_t k=0 ; k<N ; k++) {
+                const T t = tmp[j][i];
+                for (size_t k = 0; k < N; ++k) {
                     tmp[j][k] -= tmp[i][k] * t;
-                    inverse[j][k] -= inverse[i][k] * t;
+                    inverted[j][k] -= inverted[i][k] * t;
                 }
             }
         }
     }
-    return inverse;
+
+    return inverted;
+}
+
+
+//------------------------------------------------------------------------------
+// 2x2 matrix inverse is easy.
+template <typename MATRIX>
+MATRIX PURE fastInverse2(const MATRIX& x) {
+    typedef typename MATRIX::value_type T;
+
+    // Assuming the input matrix is:
+    // | a b |
+    // | c d |
+    //
+    // The analytic inverse is
+    // | d -b |
+    // | -c a | / (a d - b c)
+    //
+    // Importantly, our matrices are column-major!
+
+    MATRIX inverted(MATRIX::NO_INIT);
+
+    const T a = x[0][0];
+    const T c = x[0][1];
+    const T b = x[1][0];
+    const T d = x[1][1];
+
+    const T det((a * d) - (b * c));
+    inverted[0][0] =  d / det;
+    inverted[0][1] = -c / det;
+    inverted[1][0] = -b / det;
+    inverted[1][1] =  a / det;
+    return inverted;
+}
+
+
+//------------------------------------------------------------------------------
+// From the Wikipedia article on matrix inversion's section on fast 3x3
+// matrix inversion:
+// http://en.wikipedia.org/wiki/Invertible_matrix#Inversion_of_3.C3.973_matrices
+template <typename MATRIX>
+MATRIX PURE fastInverse3(const MATRIX& x) {
+    typedef typename MATRIX::value_type T;
+
+    // Assuming the input matrix is:
+    // | a b c |
+    // | d e f |
+    // | g h i |
+    //
+    // The analytic inverse is
+    // | A B C |^T
+    // | D E F |
+    // | G H I | / determinant
+    //
+    // Which is
+    // | A D G |
+    // | B E H |
+    // | C F I | / determinant
+    //
+    // Where:
+    // A = (ei - fh), B = (fg - di), C = (dh - eg)
+    // D = (ch - bi), E = (ai - cg), F = (bg - ah)
+    // G = (bf - ce), H = (cd - af), I = (ae - bd)
+    //
+    // and the determinant is a*A + b*B + c*C (The rule of Sarrus)
+    //
+    // Importantly, our matrices are column-major!
+
+    MATRIX inverted(MATRIX::NO_INIT);
+
+    const T a = x[0][0];
+    const T b = x[1][0];
+    const T c = x[2][0];
+    const T d = x[0][1];
+    const T e = x[1][1];
+    const T f = x[2][1];
+    const T g = x[0][2];
+    const T h = x[1][2];
+    const T i = x[2][2];
+
+    // Do the full analytic inverse
+    const T A = e * i - f * h;
+    const T B = f * g - d * i;
+    const T C = d * h - e * g;
+    inverted[0][0] = A;                 // A
+    inverted[0][1] = B;                 // B
+    inverted[0][2] = C;                 // C
+    inverted[1][0] = c * h - b * i;     // D
+    inverted[1][1] = a * i - c * g;     // E
+    inverted[1][2] = b * g - a * h;     // F
+    inverted[2][0] = b * f - c * e;     // G
+    inverted[2][1] = c * d - a * f;     // H
+    inverted[2][2] = a * e - b * d;     // I
+
+    const T det(a * A + b * B + c * C);
+    for (size_t col = 0; col < 3; ++col) {
+        for (size_t row = 0; row < 3; ++row) {
+            inverted[col][row] /= det;
+        }
+    }
+
+    return inverted;
+}
+
+
+/**
+ * Inversion function which switches on the matrix size.
+ * @warning This function assumes the matrix is invertible. The result is
+ * undefined if it is not. It is the responsibility of the caller to
+ * make sure the matrix is not singular.
+ */
+template <typename MATRIX>
+inline constexpr MATRIX PURE inverse(const MATRIX& matrix) {
+    static_assert(MATRIX::NUM_ROWS == MATRIX::NUM_COLS, "only square matrices can be inverted");
+    return (MATRIX::NUM_ROWS == 2) ? fastInverse2<MATRIX>(matrix) :
+          ((MATRIX::NUM_ROWS == 3) ? fastInverse3<MATRIX>(matrix) :
+                    gaussJordanInverse<MATRIX>(matrix));
 }
 
 template<typename MATRIX_R, typename MATRIX_A, typename MATRIX_B>
@@ -114,13 +238,16 @@
     //  rhs : C columns, D rows
     //  res : C columns, R rows
 
-    COMPILE_TIME_ASSERT_FUNCTION_SCOPE( MATRIX_A::ROW_SIZE == MATRIX_B::COL_SIZE );
-    COMPILE_TIME_ASSERT_FUNCTION_SCOPE( MATRIX_R::ROW_SIZE == MATRIX_B::ROW_SIZE );
-    COMPILE_TIME_ASSERT_FUNCTION_SCOPE( MATRIX_R::COL_SIZE == MATRIX_A::COL_SIZE );
+    static_assert(MATRIX_A::NUM_COLS == MATRIX_B::NUM_ROWS,
+            "matrices can't be multiplied. invalid dimensions.");
+    static_assert(MATRIX_R::NUM_COLS == MATRIX_B::NUM_COLS,
+            "invalid dimension of matrix multiply result.");
+    static_assert(MATRIX_R::NUM_ROWS == MATRIX_A::NUM_ROWS,
+            "invalid dimension of matrix multiply result.");
 
     MATRIX_R res(MATRIX_R::NO_INIT);
-    for (size_t r=0 ; r<MATRIX_R::row_size() ; r++) {
-        res[r] = lhs * rhs[r];
+    for (size_t col = 0; col < MATRIX_R::NUM_COLS; ++col) {
+        res[col] = lhs * rhs[col];
     }
     return res;
 }
@@ -129,40 +256,88 @@
 template <typename MATRIX>
 MATRIX PURE transpose(const MATRIX& m) {
     // for now we only handle square matrix transpose
-    COMPILE_TIME_ASSERT_FUNCTION_SCOPE( MATRIX::ROW_SIZE == MATRIX::COL_SIZE );
+    static_assert(MATRIX::NUM_COLS == MATRIX::NUM_ROWS, "transpose only supports square matrices");
     MATRIX result(MATRIX::NO_INIT);
-    for (size_t r=0 ; r<MATRIX::row_size() ; r++)
-        for (size_t c=0 ; c<MATRIX::col_size() ; c++)
-            result[c][r] = transpose(m[r][c]);
+    for (size_t col = 0; col < MATRIX::NUM_COLS; ++col) {
+        for (size_t row = 0; row < MATRIX::NUM_ROWS; ++row) {
+            result[col][row] = transpose(m[row][col]);
+        }
+    }
     return result;
 }
 
 // trace. this handles matrices of matrices
 template <typename MATRIX>
 typename MATRIX::value_type PURE trace(const MATRIX& m) {
-    COMPILE_TIME_ASSERT_FUNCTION_SCOPE( MATRIX::ROW_SIZE == MATRIX::COL_SIZE );
+    static_assert(MATRIX::NUM_COLS == MATRIX::NUM_ROWS, "trace only defined for square matrices");
     typename MATRIX::value_type result(0);
-    for (size_t r=0 ; r<MATRIX::row_size() ; r++)
-        result += trace(m[r][r]);
+    for (size_t col = 0; col < MATRIX::NUM_COLS; ++col) {
+        result += trace(m[col][col]);
+    }
     return result;
 }
 
-// trace. this handles matrices of matrices
+// diag. this handles matrices of matrices
 template <typename MATRIX>
 typename MATRIX::col_type PURE diag(const MATRIX& m) {
-    COMPILE_TIME_ASSERT_FUNCTION_SCOPE( MATRIX::ROW_SIZE == MATRIX::COL_SIZE );
+    static_assert(MATRIX::NUM_COLS == MATRIX::NUM_ROWS, "diag only defined for square matrices");
     typename MATRIX::col_type result(MATRIX::col_type::NO_INIT);
-    for (size_t r=0 ; r<MATRIX::row_size() ; r++)
-        result[r] = m[r][r];
+    for (size_t col = 0; col < MATRIX::NUM_COLS; ++col) {
+        result[col] = m[col][col];
+    }
     return result;
 }
 
+//------------------------------------------------------------------------------
+// This is taken from the Imath MatrixAlgo code, and is identical to Eigen.
+template <typename MATRIX>
+TQuaternion<typename MATRIX::value_type> extractQuat(const MATRIX& mat) {
+    typedef typename MATRIX::value_type T;
+
+    TQuaternion<T> quat(TQuaternion<T>::NO_INIT);
+
+    // Compute the trace to see if it is positive or not.
+    const T trace = mat[0][0] + mat[1][1] + mat[2][2];
+
+    // check the sign of the trace
+    if (LIKELY(trace > 0)) {
+        // trace is positive
+        T s = std::sqrt(trace + 1);
+        quat.w = T(0.5) * s;
+        s = T(0.5) / s;
+        quat.x = (mat[1][2] - mat[2][1]) * s;
+        quat.y = (mat[2][0] - mat[0][2]) * s;
+        quat.z = (mat[0][1] - mat[1][0]) * s;
+    } else {
+        // trace is negative
+
+        // Find the index of the greatest diagonal
+        size_t i = 0;
+        if (mat[1][1] > mat[0][0]) { i = 1; }
+        if (mat[2][2] > mat[i][i]) { i = 2; }
+
+        // Get the next indices: (n+1)%3
+        static constexpr size_t next_ijk[3] = { 1, 2, 0 };
+        size_t j = next_ijk[i];
+        size_t k = next_ijk[j];
+        T s = std::sqrt((mat[i][i] - (mat[j][j] + mat[k][k])) + 1);
+        quat[i] = T(0.5) * s;
+        if (s != 0) {
+            s = T(0.5) / s;
+        }
+        quat.w  = (mat[j][k] - mat[k][j]) * s;
+        quat[j] = (mat[i][j] + mat[j][i]) * s;
+        quat[k] = (mat[i][k] + mat[k][i]) * s;
+    }
+    return quat;
+}
+
 template <typename MATRIX>
 String8 asString(const MATRIX& m) {
     String8 s;
-    for (size_t c=0 ; c<MATRIX::col_size() ; c++) {
+    for (size_t c = 0; c < MATRIX::col_size(); c++) {
         s.append("|  ");
-        for (size_t r=0 ; r<MATRIX::row_size() ; r++) {
+        for (size_t r = 0; r < MATRIX::row_size(); r++) {
             s.appendFormat("%7.2f  ", m[r][c]);
         }
         s.append("|\n");
@@ -170,7 +345,7 @@
     return s;
 }
 
-}; // namespace matrix
+}  // namespace matrix
 
 // -------------------------------------------------------------------------------------
 
@@ -189,17 +364,25 @@
     // multiply by a scalar
     BASE<T>& operator *= (T v) {
         BASE<T>& lhs(static_cast< BASE<T>& >(*this));
-        for (size_t r=0 ; r<lhs.row_size() ; r++) {
-            lhs[r] *= v;
+        for (size_t col = 0; col < BASE<T>::NUM_COLS; ++col) {
+            lhs[col] *= v;
         }
         return lhs;
     }
 
+    //  matrix *= matrix
+    template<typename U>
+    const BASE<T>& operator *= (const BASE<U>& rhs) {
+        BASE<T>& lhs(static_cast< BASE<T>& >(*this));
+        lhs = matrix::multiply<BASE<T> >(lhs, rhs);
+        return lhs;
+    }
+
     // divide by a scalar
     BASE<T>& operator /= (T v) {
         BASE<T>& lhs(static_cast< BASE<T>& >(*this));
-        for (size_t r=0 ; r<lhs.row_size() ; r++) {
-            lhs[r] /= v;
+        for (size_t col = 0; col < BASE<T>::NUM_COLS; ++col) {
+            lhs[col] /= v;
         }
         return lhs;
     }
@@ -211,7 +394,6 @@
     }
 };
 
-
 /*
  * TMatSquareFunctions implements functions on a matrix of type BASE<T>.
  *
@@ -229,6 +411,7 @@
 template<template<typename U> class BASE, typename T>
 class TMatSquareFunctions {
 public:
+
     /*
      * NOTE: the functions below ARE NOT member methods. They are friend functions
      * with they definition inlined with their declaration. This makes these
@@ -236,22 +419,216 @@
      * is instantiated, at which point they're only templated on the 2nd parameter
      * (the first one, BASE<T> being known).
      */
-    friend BASE<T> PURE inverse(const BASE<T>& m)   { return matrix::inverse(m); }
-    friend BASE<T> PURE transpose(const BASE<T>& m) { return matrix::transpose(m); }
-    friend T       PURE trace(const BASE<T>& m)     { return matrix::trace(m); }
+    friend inline BASE<T> PURE inverse(const BASE<T>& matrix) {
+        return matrix::inverse(matrix);
+    }
+    friend inline constexpr BASE<T> PURE transpose(const BASE<T>& m) {
+        return matrix::transpose(m);
+    }
+    friend inline constexpr T PURE trace(const BASE<T>& m) {
+        return matrix::trace(m);
+    }
 };
 
+template<template<typename U> class BASE, typename T>
+class TMatHelpers {
+public:
+    constexpr inline size_t getColumnSize() const   { return BASE<T>::COL_SIZE; }
+    constexpr inline size_t getRowSize() const      { return BASE<T>::ROW_SIZE; }
+    constexpr inline size_t getColumnCount() const  { return BASE<T>::NUM_COLS; }
+    constexpr inline size_t getRowCount() const     { return BASE<T>::NUM_ROWS; }
+    constexpr inline size_t size()  const           { return BASE<T>::ROW_SIZE; }  // for TVec*<>
+
+    // array access
+    constexpr T const* asArray() const {
+        return &static_cast<BASE<T> const &>(*this)[0][0];
+    }
+
+    // element access
+    inline constexpr T const& operator()(size_t row, size_t col) const {
+        return static_cast<BASE<T> const &>(*this)[col][row];
+    }
+
+    inline T& operator()(size_t row, size_t col) {
+        return static_cast<BASE<T>&>(*this)[col][row];
+    }
+
+    template <typename VEC>
+    static BASE<T> translate(const VEC& t) {
+        BASE<T> r;
+        r[BASE<T>::NUM_COLS-1] = t;
+        return r;
+    }
+
+    template <typename VEC>
+    static constexpr BASE<T> scale(const VEC& s) {
+        return BASE<T>(s);
+    }
+
+    friend inline BASE<T> PURE abs(BASE<T> m) {
+        for (size_t col = 0; col < BASE<T>::NUM_COLS; ++col) {
+            m[col] = abs(m[col]);
+        }
+        return m;
+    }
+};
+
+// functions for 3x3 and 4x4 matrices
+template<template<typename U> class BASE, typename T>
+class TMatTransform {
+public:
+    inline constexpr TMatTransform() {
+        static_assert(BASE<T>::NUM_ROWS == 3 || BASE<T>::NUM_ROWS == 4, "3x3 or 4x4 matrices only");
+    }
+
+    template <typename A, typename VEC>
+    static BASE<T> rotate(A radian, const VEC& about) {
+        BASE<T> r;
+        T c = std::cos(radian);
+        T s = std::sin(radian);
+        if (about.x == 1 && about.y == 0 && about.z == 0) {
+            r[1][1] = c;   r[2][2] = c;
+            r[1][2] = s;   r[2][1] = -s;
+        } else if (about.x == 0 && about.y == 1 && about.z == 0) {
+            r[0][0] = c;   r[2][2] = c;
+            r[2][0] = s;   r[0][2] = -s;
+        } else if (about.x == 0 && about.y == 0 && about.z == 1) {
+            r[0][0] = c;   r[1][1] = c;
+            r[0][1] = s;   r[1][0] = -s;
+        } else {
+            VEC nabout = normalize(about);
+            typename VEC::value_type x = nabout.x;
+            typename VEC::value_type y = nabout.y;
+            typename VEC::value_type z = nabout.z;
+            T nc = 1 - c;
+            T xy = x * y;
+            T yz = y * z;
+            T zx = z * x;
+            T xs = x * s;
+            T ys = y * s;
+            T zs = z * s;
+            r[0][0] = x*x*nc +  c;    r[1][0] =  xy*nc - zs;    r[2][0] =  zx*nc + ys;
+            r[0][1] =  xy*nc + zs;    r[1][1] = y*y*nc +  c;    r[2][1] =  yz*nc - xs;
+            r[0][2] =  zx*nc - ys;    r[1][2] =  yz*nc + xs;    r[2][2] = z*z*nc +  c;
+
+            // Clamp results to -1, 1.
+            for (size_t col = 0; col < 3; ++col) {
+                for (size_t row = 0; row < 3; ++row) {
+                    r[col][row] = std::min(std::max(r[col][row], T(-1)), T(1));
+                }
+            }
+        }
+        return r;
+    }
+
+    /**
+     * Create a matrix from euler angles using YPR around YXZ respectively
+     * @param yaw about Y axis
+     * @param pitch about X axis
+     * @param roll about Z axis
+     */
+    template <
+        typename Y, typename P, typename R,
+        typename = typename std::enable_if<std::is_arithmetic<Y>::value >::type,
+        typename = typename std::enable_if<std::is_arithmetic<P>::value >::type,
+        typename = typename std::enable_if<std::is_arithmetic<R>::value >::type
+    >
+    static BASE<T> eulerYXZ(Y yaw, P pitch, R roll) {
+        return eulerZYX(roll, pitch, yaw);
+    }
+
+    /**
+     * Create a matrix from euler angles using YPR around ZYX respectively
+     * @param roll about X axis
+     * @param pitch about Y axis
+     * @param yaw about Z axis
+     *
+     * The euler angles are applied in ZYX order. i.e: a vector is first rotated
+     * about X (roll) then Y (pitch) and then Z (yaw).
+     */
+    template <
+    typename Y, typename P, typename R,
+    typename = typename std::enable_if<std::is_arithmetic<Y>::value >::type,
+    typename = typename std::enable_if<std::is_arithmetic<P>::value >::type,
+    typename = typename std::enable_if<std::is_arithmetic<R>::value >::type
+    >
+    static BASE<T> eulerZYX(Y yaw, P pitch, R roll) {
+        BASE<T> r;
+        T cy = std::cos(yaw);
+        T sy = std::sin(yaw);
+        T cp = std::cos(pitch);
+        T sp = std::sin(pitch);
+        T cr = std::cos(roll);
+        T sr = std::sin(roll);
+        T cc = cr * cy;
+        T cs = cr * sy;
+        T sc = sr * cy;
+        T ss = sr * sy;
+        r[0][0] = cp * cy;
+        r[0][1] = cp * sy;
+        r[0][2] = -sp;
+        r[1][0] = sp * sc - cs;
+        r[1][1] = sp * ss + cc;
+        r[1][2] = cp * sr;
+        r[2][0] = sp * cc + ss;
+        r[2][1] = sp * cs - sc;
+        r[2][2] = cp * cr;
+
+        // Clamp results to -1, 1.
+        for (size_t col = 0; col < 3; ++col) {
+            for (size_t row = 0; row < 3; ++row) {
+                r[col][row] = std::min(std::max(r[col][row], T(-1)), T(1));
+            }
+        }
+        return r;
+    }
+
+    TQuaternion<T> toQuaternion() const {
+        return matrix::extractQuat(static_cast<const BASE<T>&>(*this));
+    }
+};
+
+
 template <template<typename T> class BASE, typename T>
 class TMatDebug {
 public:
+    friend std::ostream& operator<<(std::ostream& stream, const BASE<T>& m) {
+        for (size_t row = 0; row < BASE<T>::NUM_ROWS; ++row) {
+            if (row != 0) {
+                stream << std::endl;
+            }
+            if (row == 0) {
+                stream << "/ ";
+            } else if (row == BASE<T>::NUM_ROWS-1) {
+                stream << "\\ ";
+            } else {
+                stream << "| ";
+            }
+            for (size_t col = 0; col < BASE<T>::NUM_COLS; ++col) {
+                stream << std::setw(10) << std::to_string(m[col][row]);
+            }
+            if (row == 0) {
+                stream << " \\";
+            } else if (row == BASE<T>::NUM_ROWS-1) {
+                stream << " /";
+            } else {
+                stream << " |";
+            }
+        }
+        return stream;
+    }
+
     String8 asString() const {
-        return matrix::asString( static_cast< const BASE<T>& >(*this) );
+        return matrix::asString(static_cast<const BASE<T>&>(*this));
     }
 };
 
 // -------------------------------------------------------------------------------------
-}; // namespace android
+}  // namespace details
+}  // namespace android
 
+#undef LIKELY
+#undef UNLIKELY
 #undef PURE
 
-#endif /* UI_TMAT_HELPERS_H */
+#endif  // UI_TMATHELPERS_H_
diff --git a/include/ui/TQuatHelpers.h b/include/ui/TQuatHelpers.h
new file mode 100644
index 0000000..2f0f70f
--- /dev/null
+++ b/include/ui/TQuatHelpers.h
@@ -0,0 +1,304 @@
+/*
+ * Copyright 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#ifndef UI_TQUATHELPERS_H_
+#define UI_TQUATHELPERS_H_
+
+#include <math.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include <iostream>
+
+#include <ui/vec3.h>
+
+#define PURE __attribute__((pure))
+
+namespace android {
+namespace details {
+// -------------------------------------------------------------------------------------
+
+/*
+ * No user serviceable parts here.
+ *
+ * Don't use this file directly, instead include ui/quat.h
+ */
+
+
+/*
+ * TQuatProductOperators implements basic arithmetic and basic compound assignment
+ * operators on a quaternion of type BASE<T>.
+ *
+ * BASE only needs to implement operator[] and size().
+ * By simply inheriting from TQuatProductOperators<BASE, T> BASE will automatically
+ * get all the functionality here.
+ */
+
+template <template<typename T> class QUATERNION, typename T>
+class TQuatProductOperators {
+public:
+    /* compound assignment from a another quaternion of the same size but different
+     * element type.
+     */
+    template <typename OTHER>
+    QUATERNION<T>& operator *= (const QUATERNION<OTHER>& r) {
+        QUATERNION<T>& q = static_cast<QUATERNION<T>&>(*this);
+        q = q * r;
+        return q;
+    }
+
+    /* compound assignment products by a scalar
+     */
+    QUATERNION<T>& operator *= (T v) {
+        QUATERNION<T>& lhs = static_cast<QUATERNION<T>&>(*this);
+        for (size_t i = 0; i < QUATERNION<T>::size(); i++) {
+            lhs[i] *= v;
+        }
+        return lhs;
+    }
+    QUATERNION<T>& operator /= (T v) {
+        QUATERNION<T>& lhs = static_cast<QUATERNION<T>&>(*this);
+        for (size_t i = 0; i < QUATERNION<T>::size(); i++) {
+            lhs[i] /= v;
+        }
+        return lhs;
+    }
+
+    /*
+     * NOTE: the functions below ARE NOT member methods. They are friend functions
+     * with they definition inlined with their declaration. This makes these
+     * template functions available to the compiler when (and only when) this class
+     * is instantiated, at which point they're only templated on the 2nd parameter
+     * (the first one, BASE<T> being known).
+     */
+
+    /* The operators below handle operation between quaternion of the same size
+     * but of a different element type.
+     */
+    template<typename RT>
+    friend inline
+    constexpr QUATERNION<T> PURE operator *(const QUATERNION<T>& q, const QUATERNION<RT>& r) {
+        // could be written as:
+        //  return QUATERNION<T>(
+        //            q.w*r.w - dot(q.xyz, r.xyz),
+        //            q.w*r.xyz + r.w*q.xyz + cross(q.xyz, r.xyz));
+
+        return QUATERNION<T>(
+                q.w*r.w - q.x*r.x - q.y*r.y - q.z*r.z,
+                q.w*r.x + q.x*r.w + q.y*r.z - q.z*r.y,
+                q.w*r.y - q.x*r.z + q.y*r.w + q.z*r.x,
+                q.w*r.z + q.x*r.y - q.y*r.x + q.z*r.w);
+    }
+
+    template<typename RT>
+    friend inline
+    constexpr TVec3<T> PURE operator *(const QUATERNION<T>& q, const TVec3<RT>& v) {
+        // note: if q is known to be a unit quaternion, then this simplifies to:
+        //  TVec3<T> t = 2 * cross(q.xyz, v)
+        //  return v + (q.w * t) + cross(q.xyz, t)
+        return imaginary(q * QUATERNION<T>(v, 0) * inverse(q));
+    }
+
+
+    /* For quaternions, we use explicit "by a scalar" products because it's much faster
+     * than going (implicitly) through the quaternion multiplication.
+     * For reference: we could use the code below instead, but it would be a lot slower.
+     *  friend inline
+     *  constexpr BASE<T> PURE operator *(const BASE<T>& q, const BASE<T>& r) {
+     *      return BASE<T>(
+     *              q.w*r.w - q.x*r.x - q.y*r.y - q.z*r.z,
+     *              q.w*r.x + q.x*r.w + q.y*r.z - q.z*r.y,
+     *              q.w*r.y - q.x*r.z + q.y*r.w + q.z*r.x,
+     *              q.w*r.z + q.x*r.y - q.y*r.x + q.z*r.w);
+     *
+     */
+    friend inline
+    constexpr QUATERNION<T> PURE operator *(QUATERNION<T> q, T scalar) {
+        // don't pass q by reference because we need a copy anyways
+        return q *= scalar;
+    }
+    friend inline
+    constexpr QUATERNION<T> PURE operator *(T scalar, QUATERNION<T> q) {
+        // don't pass q by reference because we need a copy anyways
+        return q *= scalar;
+    }
+
+    friend inline
+    constexpr QUATERNION<T> PURE operator /(QUATERNION<T> q, T scalar) {
+        // don't pass q by reference because we need a copy anyways
+        return q /= scalar;
+    }
+};
+
+
+/*
+ * TQuatFunctions implements functions on a quaternion of type BASE<T>.
+ *
+ * BASE only needs to implement operator[] and size().
+ * By simply inheriting from TQuatFunctions<BASE, T> BASE will automatically
+ * get all the functionality here.
+ */
+template <template<typename T> class QUATERNION, typename T>
+class TQuatFunctions {
+public:
+    /*
+     * NOTE: the functions below ARE NOT member methods. They are friend functions
+     * with they definition inlined with their declaration. This makes these
+     * template functions available to the compiler when (and only when) this class
+     * is instantiated, at which point they're only templated on the 2nd parameter
+     * (the first one, BASE<T> being known).
+     */
+
+    template<typename RT>
+    friend inline
+    constexpr T PURE dot(const QUATERNION<T>& p, const QUATERNION<RT>& q) {
+        return p.x * q.x +
+               p.y * q.y +
+               p.z * q.z +
+               p.w * q.w;
+    }
+
+    friend inline
+    constexpr T PURE norm(const QUATERNION<T>& q) {
+        return std::sqrt( dot(q, q) );
+    }
+
+    friend inline
+    constexpr T PURE length(const QUATERNION<T>& q) {
+        return norm(q);
+    }
+
+    friend inline
+    constexpr T PURE length2(const QUATERNION<T>& q) {
+        return dot(q, q);
+    }
+
+    friend inline
+    constexpr QUATERNION<T> PURE normalize(const QUATERNION<T>& q) {
+        return length(q) ? q / length(q) : QUATERNION<T>(1);
+    }
+
+    friend inline
+    constexpr QUATERNION<T> PURE conj(const QUATERNION<T>& q) {
+        return QUATERNION<T>(q.w, -q.x, -q.y, -q.z);
+    }
+
+    friend inline
+    constexpr QUATERNION<T> PURE inverse(const QUATERNION<T>& q) {
+        return conj(q) * (1 / dot(q, q));
+    }
+
+    friend inline
+    constexpr T PURE real(const QUATERNION<T>& q) {
+        return q.w;
+    }
+
+    friend inline
+    constexpr TVec3<T> PURE imaginary(const QUATERNION<T>& q) {
+        return q.xyz;
+    }
+
+    friend inline
+    constexpr QUATERNION<T> PURE unreal(const QUATERNION<T>& q) {
+        return QUATERNION<T>(q.xyz, 0);
+    }
+
+    friend inline
+    constexpr QUATERNION<T> PURE cross(const QUATERNION<T>& p, const QUATERNION<T>& q) {
+        return unreal(p*q);
+    }
+
+    friend inline
+    QUATERNION<T> PURE exp(const QUATERNION<T>& q) {
+        const T nq(norm(q.xyz));
+        return std::exp(q.w)*QUATERNION<T>((sin(nq)/nq)*q.xyz, cos(nq));
+    }
+
+    friend inline
+    QUATERNION<T> PURE log(const QUATERNION<T>& q) {
+        const T nq(norm(q));
+        return QUATERNION<T>((std::acos(q.w/nq)/norm(q.xyz))*q.xyz, log(nq));
+    }
+
+    friend inline
+    QUATERNION<T> PURE pow(const QUATERNION<T>& q, T a) {
+        // could also be computed as: exp(a*log(q));
+        const T nq(norm(q));
+        const T theta(a*std::acos(q.w / nq));
+        return std::pow(nq, a) * QUATERNION<T>(normalize(q.xyz) * std::sin(theta), std::cos(theta));
+    }
+
+    friend inline
+    QUATERNION<T> PURE slerp(const QUATERNION<T>& p, const QUATERNION<T>& q, T t) {
+        // could also be computed as: pow(q * inverse(p), t) * p;
+        const T d = dot(p, q);
+        const T npq = sqrt(dot(p, p) * dot(q, q));  // ||p|| * ||q||
+        const T a = std::acos(std::abs(d) / npq);
+        const T a0 = a * (1 - t);
+        const T a1 = a * t;
+        const T isina = 1 / sin(a);
+        const T s0 = std::sin(a0) * isina;
+        const T s1 = std::sin(a1) * isina;
+        // ensure we're taking the "short" side
+        return normalize(s0 * p + ((d < 0) ? (-s1) : (s1)) * q);
+    }
+
+    friend inline
+    constexpr QUATERNION<T> PURE lerp(const QUATERNION<T>& p, const QUATERNION<T>& q, T t) {
+        return ((1 - t) * p) + (t * q);
+    }
+
+    friend inline
+    constexpr QUATERNION<T> PURE nlerp(const QUATERNION<T>& p, const QUATERNION<T>& q, T t) {
+        return normalize(lerp(p, q, t));
+    }
+
+    friend inline
+    constexpr QUATERNION<T> PURE positive(const QUATERNION<T>& q) {
+        return q.w < 0 ? -q : q;
+    }
+};
+
+/*
+ * TQuatDebug implements functions on a vector of type BASE<T>.
+ *
+ * BASE only needs to implement operator[] and size().
+ * By simply inheriting from TQuatDebug<BASE, T> BASE will automatically
+ * get all the functionality here.
+ */
+template <template<typename T> class QUATERNION, typename T>
+class TQuatDebug {
+public:
+    /*
+     * NOTE: the functions below ARE NOT member methods. They are friend functions
+     * with they definition inlined with their declaration. This makes these
+     * template functions available to the compiler when (and only when) this class
+     * is instantiated, at which point they're only templated on the 2nd parameter
+     * (the first one, BASE<T> being known).
+     */
+    friend std::ostream& operator<< (std::ostream& stream, const QUATERNION<T>& q) {
+        return stream << "< " << q.w << " + " << q.x << "i + " << q.y << "j + " << q.z << "k >";
+    }
+};
+#undef PURE
+
+// -------------------------------------------------------------------------------------
+}  // namespace details
+}  // namespace android
+
+
+#endif  // UI_TQUATHELPERS_H_
diff --git a/include/ui/TVecHelpers.h b/include/ui/TVecHelpers.h
index bb7dbfc..1eaa6e6 100644
--- a/include/ui/TVecHelpers.h
+++ b/include/ui/TVecHelpers.h
@@ -14,22 +14,22 @@
  * limitations under the License.
  */
 
-#ifndef TVEC_IMPLEMENTATION
-#error "Don't include TVecHelpers.h directly. use ui/vec*.h instead"
-#else
-#undef TVEC_IMPLEMENTATION
-#endif
 
+#ifndef UI_TVECHELPERS_H_
+#define UI_TVECHELPERS_H_
 
-#ifndef UI_TVEC_HELPERS_H
-#define UI_TVEC_HELPERS_H
-
+#include <math.h>
 #include <stdint.h>
 #include <sys/types.h>
 
+#include <cmath>
+#include <limits>
+#include <iostream>
+
 #define PURE __attribute__((pure))
 
 namespace android {
+namespace details {
 // -------------------------------------------------------------------------------------
 
 /*
@@ -39,24 +39,6 @@
  */
 
 /*
- * This class casts itself into anything and assign itself from anything!
- * Use with caution!
- */
-template <typename TYPE>
-struct Impersonator {
-    Impersonator& operator = (const TYPE& rhs) {
-        reinterpret_cast<TYPE&>(*this) = rhs;
-        return *this;
-    }
-    operator TYPE& () {
-        return reinterpret_cast<TYPE&>(*this);
-    }
-    operator TYPE const& () const {
-        return reinterpret_cast<TYPE const&>(*this);
-    }
-};
-
-/*
  * TVec{Add|Product}Operators implements basic arithmetic and basic compound assignments
  * operators on a vector of type BASE<T>.
  *
@@ -65,27 +47,27 @@
  * get all the functionality here.
  */
 
-template <template<typename T> class BASE, typename T>
+template <template<typename T> class VECTOR, typename T>
 class TVecAddOperators {
 public:
     /* compound assignment from a another vector of the same size but different
      * element type.
      */
-    template <typename OTHER>
-    BASE<T>& operator += (const BASE<OTHER>& v) {
-        BASE<T>& rhs = static_cast<BASE<T>&>(*this);
-        for (size_t i=0 ; i<BASE<T>::size() ; i++) {
-            rhs[i] += v[i];
+    template<typename OTHER>
+    VECTOR<T>& operator +=(const VECTOR<OTHER>& v) {
+        VECTOR<T>& lhs = static_cast<VECTOR<T>&>(*this);
+        for (size_t i = 0; i < lhs.size(); i++) {
+            lhs[i] += v[i];
         }
-        return rhs;
+        return lhs;
     }
-    template <typename OTHER>
-    BASE<T>& operator -= (const BASE<OTHER>& v) {
-        BASE<T>& rhs = static_cast<BASE<T>&>(*this);
-        for (size_t i=0 ; i<BASE<T>::size() ; i++) {
-            rhs[i] -= v[i];
+    template<typename OTHER>
+    VECTOR<T>& operator -=(const VECTOR<OTHER>& v) {
+        VECTOR<T>& lhs = static_cast<VECTOR<T>&>(*this);
+        for (size_t i = 0; i < lhs.size(); i++) {
+            lhs[i] -= v[i];
         }
-        return rhs;
+        return lhs;
     }
 
     /* compound assignment from a another vector of the same type.
@@ -93,19 +75,19 @@
      * like "vector *= scalar" by letting the compiler implicitly convert a scalar
      * to a vector (assuming the BASE<T> allows it).
      */
-    BASE<T>& operator += (const BASE<T>& v) {
-        BASE<T>& rhs = static_cast<BASE<T>&>(*this);
-        for (size_t i=0 ; i<BASE<T>::size() ; i++) {
-            rhs[i] += v[i];
+    VECTOR<T>& operator +=(const VECTOR<T>& v) {
+        VECTOR<T>& lhs = static_cast<VECTOR<T>&>(*this);
+        for (size_t i = 0; i < lhs.size(); i++) {
+            lhs[i] += v[i];
         }
-        return rhs;
+        return lhs;
     }
-    BASE<T>& operator -= (const BASE<T>& v) {
-        BASE<T>& rhs = static_cast<BASE<T>&>(*this);
-        for (size_t i=0 ; i<BASE<T>::size() ; i++) {
-            rhs[i] -= v[i];
+    VECTOR<T>& operator -=(const VECTOR<T>& v) {
+        VECTOR<T>& lhs = static_cast<VECTOR<T>&>(*this);
+        for (size_t i = 0; i < lhs.size(); i++) {
+            lhs[i] -= v[i];
         }
-        return rhs;
+        return lhs;
     }
 
     /*
@@ -116,57 +98,57 @@
      * (the first one, BASE<T> being known).
      */
 
-    /* The operators below handle operation between vectors of the same side
+    /* The operators below handle operation between vectors of the same size
      * but of a different element type.
      */
     template<typename RT>
-    friend inline
-    BASE<T> PURE operator +(const BASE<T>& lv, const BASE<RT>& rv) {
-        return BASE<T>(lv) += rv;
+    friend inline constexpr VECTOR<T> PURE operator +(VECTOR<T> lv, const VECTOR<RT>& rv) {
+        // don't pass lv by reference because we need a copy anyways
+        return lv += rv;
     }
     template<typename RT>
-    friend inline
-    BASE<T> PURE operator -(const BASE<T>& lv, const BASE<RT>& rv) {
-        return BASE<T>(lv) -= rv;
+    friend inline constexpr VECTOR<T> PURE operator -(VECTOR<T> lv, const VECTOR<RT>& rv) {
+        // don't pass lv by reference because we need a copy anyways
+        return lv -= rv;
     }
 
     /* The operators below (which are not templates once this class is instanced,
      * i.e.: BASE<T> is known) can be used for implicit conversion on both sides.
-     * These handle operations like "vector * scalar" and "scalar * vector" by
+     * These handle operations like "vector + scalar" and "scalar + vector" by
      * letting the compiler implicitly convert a scalar to a vector (assuming
      * the BASE<T> allows it).
      */
-    friend inline
-    BASE<T> PURE operator +(const BASE<T>& lv, const BASE<T>& rv) {
-        return BASE<T>(lv) += rv;
+    friend inline constexpr VECTOR<T> PURE operator +(VECTOR<T> lv, const VECTOR<T>& rv) {
+        // don't pass lv by reference because we need a copy anyways
+        return lv += rv;
     }
-    friend inline
-    BASE<T> PURE operator -(const BASE<T>& lv, const BASE<T>& rv) {
-        return BASE<T>(lv) -= rv;
+    friend inline constexpr VECTOR<T> PURE operator -(VECTOR<T> lv, const VECTOR<T>& rv) {
+        // don't pass lv by reference because we need a copy anyways
+        return lv -= rv;
     }
 };
 
-template <template<typename T> class BASE, typename T>
+template<template<typename T> class VECTOR, typename T>
 class TVecProductOperators {
 public:
     /* compound assignment from a another vector of the same size but different
      * element type.
      */
-    template <typename OTHER>
-    BASE<T>& operator *= (const BASE<OTHER>& v) {
-        BASE<T>& rhs = static_cast<BASE<T>&>(*this);
-        for (size_t i=0 ; i<BASE<T>::size() ; i++) {
-            rhs[i] *= v[i];
+    template<typename OTHER>
+    VECTOR<T>& operator *=(const VECTOR<OTHER>& v) {
+        VECTOR<T>& lhs = static_cast<VECTOR<T>&>(*this);
+        for (size_t i = 0; i < lhs.size(); i++) {
+            lhs[i] *= v[i];
         }
-        return rhs;
+        return lhs;
     }
-    template <typename OTHER>
-    BASE<T>& operator /= (const BASE<OTHER>& v) {
-        BASE<T>& rhs = static_cast<BASE<T>&>(*this);
-        for (size_t i=0 ; i<BASE<T>::size() ; i++) {
-            rhs[i] /= v[i];
+    template<typename OTHER>
+    VECTOR<T>& operator /=(const VECTOR<OTHER>& v) {
+        VECTOR<T>& lhs = static_cast<VECTOR<T>&>(*this);
+        for (size_t i = 0; i < lhs.size(); i++) {
+            lhs[i] /= v[i];
         }
-        return rhs;
+        return lhs;
     }
 
     /* compound assignment from a another vector of the same type.
@@ -174,19 +156,19 @@
      * like "vector *= scalar" by letting the compiler implicitly convert a scalar
      * to a vector (assuming the BASE<T> allows it).
      */
-    BASE<T>& operator *= (const BASE<T>& v) {
-        BASE<T>& rhs = static_cast<BASE<T>&>(*this);
-        for (size_t i=0 ; i<BASE<T>::size() ; i++) {
-            rhs[i] *= v[i];
+    VECTOR<T>& operator *=(const VECTOR<T>& v) {
+        VECTOR<T>& lhs = static_cast<VECTOR<T>&>(*this);
+        for (size_t i = 0; i < lhs.size(); i++) {
+            lhs[i] *= v[i];
         }
-        return rhs;
+        return lhs;
     }
-    BASE<T>& operator /= (const BASE<T>& v) {
-        BASE<T>& rhs = static_cast<BASE<T>&>(*this);
-        for (size_t i=0 ; i<BASE<T>::size() ; i++) {
-            rhs[i] /= v[i];
+    VECTOR<T>& operator /=(const VECTOR<T>& v) {
+        VECTOR<T>& lhs = static_cast<VECTOR<T>&>(*this);
+        for (size_t i = 0; i < lhs.size(); i++) {
+            lhs[i] /= v[i];
         }
-        return rhs;
+        return lhs;
     }
 
     /*
@@ -197,18 +179,18 @@
      * (the first one, BASE<T> being known).
      */
 
-    /* The operators below handle operation between vectors of the same side
+    /* The operators below handle operation between vectors of the same size
      * but of a different element type.
      */
     template<typename RT>
-    friend inline
-    BASE<T> PURE operator *(const BASE<T>& lv, const BASE<RT>& rv) {
-        return BASE<T>(lv) *= rv;
+    friend inline constexpr VECTOR<T> PURE operator *(VECTOR<T> lv, const VECTOR<RT>& rv) {
+        // don't pass lv by reference because we need a copy anyways
+        return lv *= rv;
     }
     template<typename RT>
-    friend inline
-    BASE<T> PURE operator /(const BASE<T>& lv, const BASE<RT>& rv) {
-        return BASE<T>(lv) /= rv;
+    friend inline constexpr VECTOR<T> PURE operator /(VECTOR<T> lv, const VECTOR<RT>& rv) {
+        // don't pass lv by reference because we need a copy anyways
+        return lv /= rv;
     }
 
     /* The operators below (which are not templates once this class is instanced,
@@ -217,13 +199,13 @@
      * letting the compiler implicitly convert a scalar to a vector (assuming
      * the BASE<T> allows it).
      */
-    friend inline
-    BASE<T> PURE operator *(const BASE<T>& lv, const BASE<T>& rv) {
-        return BASE<T>(lv) *= rv;
+    friend inline constexpr VECTOR<T> PURE operator *(VECTOR<T> lv, const VECTOR<T>& rv) {
+        // don't pass lv by reference because we need a copy anyways
+        return lv *= rv;
     }
-    friend inline
-    BASE<T> PURE operator /(const BASE<T>& lv, const BASE<T>& rv) {
-        return BASE<T>(lv) /= rv;
+    friend inline constexpr VECTOR<T> PURE operator /(VECTOR<T> lv, const VECTOR<T>& rv) {
+        // don't pass lv by reference because we need a copy anyways
+        return lv /= rv;
     }
 };
 
@@ -236,34 +218,33 @@
  *
  * These operators are implemented as friend functions of TVecUnaryOperators<BASE, T>
  */
-template <template<typename T> class BASE, typename T>
+template<template<typename T> class VECTOR, typename T>
 class TVecUnaryOperators {
 public:
-    BASE<T>& operator ++ () {
-        BASE<T>& rhs = static_cast<BASE<T>&>(*this);
-        for (size_t i=0 ; i<BASE<T>::size() ; i++) {
+    VECTOR<T>& operator ++() {
+        VECTOR<T>& rhs = static_cast<VECTOR<T>&>(*this);
+        for (size_t i = 0; i < rhs.size(); i++) {
             ++rhs[i];
         }
         return rhs;
     }
-    BASE<T>& operator -- () {
-        BASE<T>& rhs = static_cast<BASE<T>&>(*this);
-        for (size_t i=0 ; i<BASE<T>::size() ; i++) {
+    VECTOR<T>& operator --() {
+        VECTOR<T>& rhs = static_cast<VECTOR<T>&>(*this);
+        for (size_t i = 0; i < rhs.size(); i++) {
             --rhs[i];
         }
         return rhs;
     }
-    BASE<T> operator - () const {
-        BASE<T> r(BASE<T>::NO_INIT);
-        BASE<T> const& rv(static_cast<BASE<T> const&>(*this));
-        for (size_t i=0 ; i<BASE<T>::size() ; i++) {
+    VECTOR<T> operator -() const {
+        VECTOR<T> r(VECTOR<T>::NO_INIT);
+        VECTOR<T> const& rv(static_cast<VECTOR<T> const&>(*this));
+        for (size_t i = 0; i < r.size(); i++) {
             r[i] = -rv[i];
         }
         return r;
     }
 };
 
-
 /*
  * TVecComparisonOperators implements relational/comparison operators
  * on a vector of type BASE<T>.
@@ -272,7 +253,7 @@
  * By simply inheriting from TVecComparisonOperators<BASE, T> BASE will automatically
  * get all the functionality here.
  */
-template <template<typename T> class BASE, typename T>
+template<template<typename T> class VECTOR, typename T>
 class TVecComparisonOperators {
 public:
     /*
@@ -284,8 +265,8 @@
      */
     template<typename RT>
     friend inline
-    bool PURE operator ==(const BASE<T>& lv, const BASE<RT>& rv) {
-        for (size_t i = 0; i < BASE<T>::size(); i++)
+    bool PURE operator ==(const VECTOR<T>& lv, const VECTOR<RT>& rv) {
+        for (size_t i = 0; i < lv.size(); i++)
             if (lv[i] != rv[i])
                 return false;
         return true;
@@ -293,42 +274,47 @@
 
     template<typename RT>
     friend inline
-    bool PURE operator !=(const BASE<T>& lv, const BASE<RT>& rv) {
+    bool PURE operator !=(const VECTOR<T>& lv, const VECTOR<RT>& rv) {
         return !operator ==(lv, rv);
     }
 
     template<typename RT>
     friend inline
-    bool PURE operator >(const BASE<T>& lv, const BASE<RT>& rv) {
-        for (size_t i = 0; i < BASE<T>::size(); i++)
-            if (lv[i] <= rv[i])
-                return false;
-        return true;
+    bool PURE operator >(const VECTOR<T>& lv, const VECTOR<RT>& rv) {
+        for (size_t i = 0; i < lv.size(); i++) {
+            if (lv[i] == rv[i]) {
+                continue;
+            }
+            return lv[i] > rv[i];
+        }
+        return false;
     }
 
     template<typename RT>
     friend inline
-    bool PURE operator <=(const BASE<T>& lv, const BASE<RT>& rv) {
+    constexpr bool PURE operator <=(const VECTOR<T>& lv, const VECTOR<RT>& rv) {
         return !(lv > rv);
     }
 
     template<typename RT>
     friend inline
-    bool PURE operator <(const BASE<T>& lv, const BASE<RT>& rv) {
-        for (size_t i = 0; i < BASE<T>::size(); i++)
-            if (lv[i] >= rv[i])
-                return false;
-        return true;
+    bool PURE operator <(const VECTOR<T>& lv, const VECTOR<RT>& rv) {
+        for (size_t i = 0; i < lv.size(); i++) {
+            if (lv[i] == rv[i]) {
+                continue;
+            }
+            return lv[i] < rv[i];
+        }
+        return false;
     }
 
     template<typename RT>
     friend inline
-    bool PURE operator >=(const BASE<T>& lv, const BASE<RT>& rv) {
+    constexpr bool PURE operator >=(const VECTOR<T>& lv, const VECTOR<RT>& rv) {
         return !(lv < rv);
     }
 };
 
-
 /*
  * TVecFunctions implements functions on a vector of type BASE<T>.
  *
@@ -336,7 +322,7 @@
  * By simply inheriting from TVecFunctions<BASE, T> BASE will automatically
  * get all the functionality here.
  */
-template <template<typename T> class BASE, typename T>
+template<template<typename T> class VECTOR, typename T>
 class TVecFunctions {
 public:
     /*
@@ -347,35 +333,180 @@
      * (the first one, BASE<T> being known).
      */
     template<typename RT>
-    friend inline
-    T PURE dot(const BASE<T>& lv, const BASE<RT>& rv) {
+    friend inline T PURE dot(const VECTOR<T>& lv, const VECTOR<RT>& rv) {
         T r(0);
-        for (size_t i = 0; i < BASE<T>::size(); i++)
-            r += lv[i]*rv[i];
+        for (size_t i = 0; i < lv.size(); i++) {
+            //r = std::fma(lv[i], rv[i], r);
+            r += lv[i] * rv[i];
+        }
         return r;
     }
 
-    friend inline
-    T PURE length(const BASE<T>& lv) {
-        return sqrt( dot(lv, lv) );
+    friend inline constexpr T PURE norm(const VECTOR<T>& lv) {
+        return std::sqrt(dot(lv, lv));
+    }
+
+    friend inline constexpr T PURE length(const VECTOR<T>& lv) {
+        return norm(lv);
+    }
+
+    friend inline constexpr T PURE norm2(const VECTOR<T>& lv) {
+        return dot(lv, lv);
+    }
+
+    friend inline constexpr T PURE length2(const VECTOR<T>& lv) {
+        return norm2(lv);
     }
 
     template<typename RT>
-    friend inline
-    T PURE distance(const BASE<T>& lv, const BASE<RT>& rv) {
+    friend inline constexpr T PURE distance(const VECTOR<T>& lv, const VECTOR<RT>& rv) {
         return length(rv - lv);
     }
 
-    friend inline
-    BASE<T> PURE normalize(const BASE<T>& lv) {
-        return lv * (1 / length(lv));
+    template<typename RT>
+    friend inline constexpr T PURE distance2(const VECTOR<T>& lv, const VECTOR<RT>& rv) {
+        return length2(rv - lv);
+    }
+
+    friend inline constexpr VECTOR<T> PURE normalize(const VECTOR<T>& lv) {
+        return lv * (T(1) / length(lv));
+    }
+
+    friend inline VECTOR<T> PURE rcp(VECTOR<T> v) {
+        return T(1) / v;
+    }
+
+    friend inline VECTOR<T> PURE abs(VECTOR<T> v) {
+        for (size_t i=0 ; i<v.size() ; i++) {
+            v[i] = std::abs(v[i]);
+        }
+        return v;
+    }
+
+    friend inline VECTOR<T> PURE floor(VECTOR<T> v) {
+        for (size_t i=0 ; i<v.size() ; i++) {
+            v[i] = std::floor(v[i]);
+        }
+        return v;
+    }
+
+    friend inline VECTOR<T> PURE ceil(VECTOR<T> v) {
+        for (size_t i=0 ; i<v.size() ; i++) {
+            v[i] = std::ceil(v[i]);
+        }
+        return v;
+    }
+
+    friend inline VECTOR<T> PURE round(VECTOR<T> v) {
+        for (size_t i=0 ; i<v.size() ; i++) {
+            v[i] = std::round(v[i]);
+        }
+        return v;
+    }
+
+    friend inline VECTOR<T> PURE inversesqrt(VECTOR<T> v) {
+        for (size_t i=0 ; i<v.size() ; i++) {
+            v[i] = T(1) / std::sqrt(v[i]);
+        }
+        return v;
+    }
+
+    friend inline VECTOR<T> PURE sqrt(VECTOR<T> v) {
+        for (size_t i=0 ; i<v.size() ; i++) {
+            v[i] = std::sqrt(v[i]);
+        }
+        return v;
+    }
+
+    friend inline VECTOR<T> PURE pow(VECTOR<T> v, T p) {
+        for (size_t i=0 ; i<v.size() ; i++) {
+            v[i] = std::pow(v[i], p);
+        }
+        return v;
+    }
+
+    friend inline VECTOR<T> PURE saturate(const VECTOR<T>& lv) {
+        return clamp(lv, T(0), T(1));
+    }
+
+    friend inline VECTOR<T> PURE clamp(VECTOR<T> v, T min, T max) {
+        for (size_t i=0 ; i< v.size() ; i++) {
+            v[i] = std::min(max, std::max(min, v[i]));
+        }
+        return v;
+    }
+
+    friend inline VECTOR<T> PURE fma(const VECTOR<T>& lv, const VECTOR<T>& rv, VECTOR<T> a) {
+        for (size_t i=0 ; i<lv.size() ; i++) {
+            //a[i] = std::fma(lv[i], rv[i], a[i]);
+            a[i] += (lv[i] * rv[i]);
+        }
+        return a;
+    }
+
+    friend inline VECTOR<T> PURE min(const VECTOR<T>& u, VECTOR<T> v) {
+        for (size_t i=0 ; i<v.size() ; i++) {
+            v[i] = std::min(u[i], v[i]);
+        }
+        return v;
+    }
+
+    friend inline VECTOR<T> PURE max(const VECTOR<T>& u, VECTOR<T> v) {
+        for (size_t i=0 ; i<v.size() ; i++) {
+            v[i] = std::max(u[i], v[i]);
+        }
+        return v;
+    }
+
+    friend inline T PURE max(const VECTOR<T>& v) {
+        T r(std::numeric_limits<T>::lowest());
+        for (size_t i=0 ; i<v.size() ; i++) {
+            r = std::max(r, v[i]);
+        }
+        return r;
+    }
+
+    friend inline T PURE min(const VECTOR<T>& v) {
+        T r(std::numeric_limits<T>::max());
+        for (size_t i=0 ; i<v.size() ; i++) {
+            r = std::min(r, v[i]);
+        }
+        return r;
+    }
+};
+
+/*
+ * TVecDebug implements functions on a vector of type BASE<T>.
+ *
+ * BASE only needs to implement operator[] and size().
+ * By simply inheriting from TVecDebug<BASE, T> BASE will automatically
+ * get all the functionality here.
+ */
+template<template<typename T> class VECTOR, typename T>
+class TVecDebug {
+public:
+    /*
+     * NOTE: the functions below ARE NOT member methods. They are friend functions
+     * with they definition inlined with their declaration. This makes these
+     * template functions available to the compiler when (and only when) this class
+     * is instantiated, at which point they're only templated on the 2nd parameter
+     * (the first one, BASE<T> being known).
+     */
+    friend std::ostream& operator<<(std::ostream& stream, const VECTOR<T>& v) {
+        stream << "< ";
+        for (size_t i = 0; i < v.size() - 1; i++) {
+            stream << T(v[i]) << ", ";
+        }
+        stream << T(v[v.size() - 1]) << " >";
+        return stream;
     }
 };
 
 #undef PURE
 
 // -------------------------------------------------------------------------------------
-}; // namespace android
+}  // namespace details
+}  // namespace android
 
 
-#endif /* UI_TVEC_HELPERS_H */
+#endif  // UI_TVECHELPERS_H_
diff --git a/include/ui/half.h b/include/ui/half.h
new file mode 100644
index 0000000..cbc1ef0
--- /dev/null
+++ b/include/ui/half.h
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef UI_HALF_H
+#define UI_HALF_H
+
+#include <stdint.h>
+#include <iosfwd>
+#include <limits>
+#include <type_traits>
+
+#ifdef __cplusplus
+#   define LIKELY( exp )    (__builtin_expect( !!(exp), true ))
+#   define UNLIKELY( exp )  (__builtin_expect( !!(exp), false ))
+#else
+#   define LIKELY( exp )    (__builtin_expect( !!(exp), 1 ))
+#   define UNLIKELY( exp )  (__builtin_expect( !!(exp), 0 ))
+#endif
+
+namespace android {
+
+/*
+ * half-float
+ *
+ *  1   5       10
+ * +-+------+------------+
+ * |s|eee.ee|mm.mmmm.mmmm|
+ * +-+------+------------+
+ *
+ * minimum (denormal) value: 2^-24 = 5.96e-8
+ * minimum (normal) value:   2^-14 = 6.10e-5
+ * maximum value:            2-2^-10 = 65504
+ *
+ * Integers between 0 and 2048 can be represented exactly
+ */
+class half {
+    struct fp16 {
+        uint16_t bits = 0;
+        fp16() noexcept = default;
+        explicit constexpr fp16(uint16_t bits) noexcept : bits(bits) { }
+        void setS(unsigned int s) noexcept { bits = uint16_t((bits & 0x7FFF) | (s<<15)); }
+        void setE(unsigned int s) noexcept { bits = uint16_t((bits & 0xE3FF) | (s<<10)); }
+        void setM(unsigned int s) noexcept { bits = uint16_t((bits & 0xFC00) | (s<< 0)); }
+        unsigned int getS() const noexcept { return  bits >> 15u; }
+        unsigned int getE() const noexcept { return (bits >> 10u) & 0x1Fu; }
+        unsigned int getM() const noexcept { return  bits         & 0x3FFu; }
+    };
+    struct fp32 {
+        union {
+            uint32_t bits = 0;
+            float fp;
+        };
+        fp32() noexcept = default;
+        explicit constexpr fp32(float f) : fp(f) { }
+        void setS(unsigned int s) noexcept { bits = uint32_t((bits & 0x7FFFFFFF) | (s<<31)); }
+        void setE(unsigned int s) noexcept { bits = uint32_t((bits & 0x807FFFFF) | (s<<23)); }
+        void setM(unsigned int s) noexcept { bits = uint32_t((bits & 0xFF800000) | (s<< 0)); }
+        unsigned int getS() const noexcept { return  bits >> 31u; }
+        unsigned int getE() const noexcept { return (bits >> 23u) & 0xFFu; }
+        unsigned int getM() const noexcept { return  bits         & 0x7FFFFFu; }
+    };
+
+public:
+    half(float v) noexcept : mBits(ftoh(v)) { }
+    operator float() const noexcept { return htof(mBits); }
+
+    uint16_t getBits() const noexcept { return mBits.bits; }
+    unsigned int getExponent() const noexcept { return mBits.getE(); }
+    unsigned int getMantissa() const noexcept { return mBits.getM(); }
+
+private:
+    friend class std::numeric_limits<half>;
+    friend half operator"" _hf(long double v);
+
+    enum Binary { binary };
+    explicit constexpr half(Binary, uint16_t bits) noexcept : mBits(bits) { }
+    static fp16 ftoh(float v) noexcept;
+    static float htof(fp16 v) noexcept;
+    fp16 mBits;
+};
+
+inline /* constexpr */ half::fp16 half::ftoh(float v) noexcept {
+    fp16 out;
+    fp32 in(v);
+    if (UNLIKELY(in.getE() == 0xFF)) { // inf or nan
+        out.setE(0x1F);
+        out.setM(in.getM() ? 0x200 : 0);
+    } else {
+        int e = in.getE() - 127 + 15;
+        if (e >= 0x1F) {
+            // overflow
+            out.setE(0x31); // +/- inf
+        } else if (e <= 0) {
+            // underflow
+            // flush to +/- 0
+        } else {
+            unsigned int m = in.getM();
+            out.setE(uint16_t(e));
+            out.setM(m >> 13);
+            if (m & 0x1000) {
+                // rounding
+                out.bits++;
+            }
+        }
+    }
+    out.setS(in.getS());
+    return out;
+}
+
+inline float half::htof(half::fp16 in) noexcept {
+    fp32 out;
+    if (UNLIKELY(in.getE() == 0x1F)) { // inf or nan
+        out.setE(0xFF);
+        out.setM(in.getM() ? 0x400000 : 0);
+    } else {
+        if (in.getE() == 0) {
+            if (in.getM()) {
+                // TODO: denormal half float, treat as zero for now
+                // (it's stupid because they can be represented as regular float)
+            }
+        } else {
+            int e = in.getE() - 15 + 127;
+            unsigned int m = in.getM();
+            out.setE(uint32_t(e));
+            out.setM(m << 13);
+        }
+    }
+    out.setS(in.getS());
+    return out.fp;
+}
+
+inline /* constexpr */ android::half operator"" _hf(long double v) {
+    return android::half(android::half::binary, android::half::ftoh(v).bits);
+}
+
+} // namespace android
+
+namespace std {
+
+template<> struct is_floating_point<android::half> : public std::true_type {};
+
+template<>
+class numeric_limits<android::half> {
+public:
+    typedef android::half type;
+
+    static constexpr const bool is_specialized = true;
+    static constexpr const bool is_signed = true;
+    static constexpr const bool is_integer = false;
+    static constexpr const bool is_exact = false;
+    static constexpr const bool has_infinity = true;
+    static constexpr const bool has_quiet_NaN = true;
+    static constexpr const bool has_signaling_NaN = false;
+    static constexpr const float_denorm_style has_denorm = denorm_absent;
+    static constexpr const bool has_denorm_loss = true;
+    static constexpr const bool is_iec559 = false;
+    static constexpr const bool is_bounded = true;
+    static constexpr const bool is_modulo = false;
+    static constexpr const bool traps = false;
+    static constexpr const bool tinyness_before = false;
+    static constexpr const float_round_style round_style = round_indeterminate;
+
+    static constexpr const int digits = 11;
+    static constexpr const int digits10 = 3;
+    static constexpr const int max_digits10 = 5;
+    static constexpr const int radix = 2;
+    static constexpr const int min_exponent = -13;
+    static constexpr const int min_exponent10 = -4;
+    static constexpr const int max_exponent = 16;
+    static constexpr const int max_exponent10 = 4;
+
+    inline static constexpr type round_error() noexcept { return android::half(android::half::binary, 0x3800); }
+    inline static constexpr type min() noexcept { return android::half(android::half::binary, 0x0400); }
+    inline static constexpr type max() noexcept { return android::half(android::half::binary, 0x7bff); }
+    inline static constexpr type lowest() noexcept { return android::half(android::half::binary, 0xfbff); }
+    inline static constexpr type epsilon() noexcept { return android::half(android::half::binary, 0x1400); }
+    inline static constexpr type infinity() noexcept { return android::half(android::half::binary, 0x7c00); }
+    inline static constexpr type quiet_NaN() noexcept { return android::half(android::half::binary, 0x7fff); }
+    inline static constexpr type denorm_min() noexcept { return android::half(android::half::binary, 0x0001); }
+    inline static constexpr type signaling_NaN() noexcept { return android::half(android::half::binary, 0x7dff); }
+};
+
+} // namespace std
+
+#undef LIKELY
+#undef UNLIKELY
+
+#endif // UI_HALF_H
diff --git a/include/ui/mat2.h b/include/ui/mat2.h
new file mode 100644
index 0000000..5ae73dc
--- /dev/null
+++ b/include/ui/mat2.h
@@ -0,0 +1,376 @@
+/*
+ * Copyright 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef UI_MAT2_H_
+#define UI_MAT2_H_
+
+#include <ui/TMatHelpers.h>
+#include <ui/vec2.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#define PURE __attribute__((pure))
+
+namespace android {
+// -------------------------------------------------------------------------------------
+namespace details {
+
+/**
+ * A 2x2 column-major matrix class.
+ *
+ * Conceptually a 2x2 matrix is a an array of 2 column vec2:
+ *
+ * mat2 m =
+ *      \f$
+ *      \left(
+ *      \begin{array}{cc}
+ *      m[0] & m[1] \\
+ *      \end{array}
+ *      \right)
+ *      \f$
+ *      =
+ *      \f$
+ *      \left(
+ *      \begin{array}{cc}
+ *      m[0][0] & m[1][0] \\
+ *      m[0][1] & m[1][1] \\
+ *      \end{array}
+ *      \right)
+ *      \f$
+ *      =
+ *      \f$
+ *      \left(
+ *      \begin{array}{cc}
+ *      m(0,0) & m(0,1) \\
+ *      m(1,0) & m(1,1) \\
+ *      \end{array}
+ *      \right)
+ *      \f$
+ *
+ * m[n] is the \f$ n^{th} \f$ column of the matrix and is a vec2.
+ *
+ */
+template <typename T>
+class TMat22 :  public TVecUnaryOperators<TMat22, T>,
+                public TVecComparisonOperators<TMat22, T>,
+                public TVecAddOperators<TMat22, T>,
+                public TMatProductOperators<TMat22, T>,
+                public TMatSquareFunctions<TMat22, T>,
+                public TMatHelpers<TMat22, T>,
+                public TMatDebug<TMat22, T> {
+public:
+    enum no_init { NO_INIT };
+    typedef T value_type;
+    typedef T& reference;
+    typedef T const& const_reference;
+    typedef size_t size_type;
+    typedef TVec2<T> col_type;
+    typedef TVec2<T> row_type;
+
+    static constexpr size_t COL_SIZE = col_type::SIZE;  // size of a column (i.e.: number of rows)
+    static constexpr size_t ROW_SIZE = row_type::SIZE;  // size of a row (i.e.: number of columns)
+    static constexpr size_t NUM_ROWS = COL_SIZE;
+    static constexpr size_t NUM_COLS = ROW_SIZE;
+
+private:
+    /*
+     *  <--  N columns  -->
+     *
+     *  a[0][0] a[1][0] a[2][0] ... a[N][0]    ^
+     *  a[0][1] a[1][1] a[2][1] ... a[N][1]    |
+     *  a[0][2] a[1][2] a[2][2] ... a[N][2]  M rows
+     *  ...                                    |
+     *  a[0][M] a[1][M] a[2][M] ... a[N][M]    v
+     *
+     *  COL_SIZE = M
+     *  ROW_SIZE = N
+     *  m[0] = [ a[0][0] a[0][1] a[0][2] ... a[0][M] ]
+     */
+
+    col_type m_value[NUM_COLS];
+
+public:
+    // array access
+    inline constexpr col_type const& operator[](size_t column) const {
+#if __cplusplus >= 201402L
+        // only possible in C++0x14 with constexpr
+        assert(column < NUM_COLS);
+#endif
+        return m_value[column];
+    }
+
+    inline col_type& operator[](size_t column) {
+        assert(column < NUM_COLS);
+        return m_value[column];
+    }
+
+    // -----------------------------------------------------------------------
+    // we want the compiler generated versions for these...
+    TMat22(const TMat22&) = default;
+    ~TMat22() = default;
+    TMat22& operator = (const TMat22&) = default;
+
+    /**
+     *  constructors
+     */
+
+    /**
+     * leaves object uninitialized. use with caution.
+     */
+    explicit
+    constexpr TMat22(no_init)
+            : m_value{ col_type(col_type::NO_INIT),
+                       col_type(col_type::NO_INIT) } {}
+
+
+    /**
+     * initialize to identity.
+     *
+     *      \f$
+     *      \left(
+     *      \begin{array}{cc}
+     *      1 & 0 \\
+     *      0 & 1 \\
+     *      \end{array}
+     *      \right)
+     *      \f$
+     */
+    TMat22();
+
+    /**
+     * initialize to Identity*scalar.
+     *
+     *      \f$
+     *      \left(
+     *      \begin{array}{cc}
+     *      v & 0 \\
+     *      0 & v \\
+     *      \end{array}
+     *      \right)
+     *      \f$
+     */
+    template<typename U>
+    explicit TMat22(U v);
+
+    /**
+     * sets the diagonal to a vector.
+     *
+     *      \f$
+     *      \left(
+     *      \begin{array}{cc}
+     *      v[0] & 0 \\
+     *      0 & v[1] \\
+     *      \end{array}
+     *      \right)
+     *      \f$
+     */
+    template <typename U>
+    explicit TMat22(const TVec2<U>& v);
+
+    /**
+     * construct from another matrix of the same size
+     */
+    template <typename U>
+    explicit TMat22(const TMat22<U>& rhs);
+
+    /**
+     * construct from 2 column vectors.
+     *
+     *      \f$
+     *      \left(
+     *      \begin{array}{cc}
+     *      v0 & v1 \\
+     *      \end{array}
+     *      \right)
+     *      \f$
+     */
+    template <typename A, typename B>
+    TMat22(const TVec2<A>& v0, const TVec2<B>& v1);
+
+    /** construct from 4 elements in column-major form.
+     *
+     *      \f$
+     *      \left(
+     *      \begin{array}{cc}
+     *      m[0][0] & m[1][0] \\
+     *      m[0][1] & m[1][1] \\
+     *      \end{array}
+     *      \right)
+     *      \f$
+     */
+    template <
+        typename A, typename B,
+        typename C, typename D>
+    TMat22(A m00, B m01,
+           C m10, D m11);
+
+    /**
+     * construct from a C array in column major form.
+     */
+    template <typename U>
+    explicit TMat22(U const* rawArray);
+
+    /**
+     * Rotate by radians in the 2D plane
+     */
+    static TMat22<T> rotate(T radian) {
+        TMat22<T> r(TMat22<T>::NO_INIT);
+        T c = std::cos(radian);
+        T s = std::sin(radian);
+        r[0][0] = c;   r[1][1] = c;
+        r[0][1] = s;   r[1][0] = -s;
+        return r;
+    }
+};
+
+// ----------------------------------------------------------------------------------------
+// Constructors
+// ----------------------------------------------------------------------------------------
+
+// Since the matrix code could become pretty big quickly, we don't inline most
+// operations.
+
+template <typename T>
+TMat22<T>::TMat22() {
+    m_value[0] = col_type(1, 0);
+    m_value[1] = col_type(0, 1);
+}
+
+template <typename T>
+template <typename U>
+TMat22<T>::TMat22(U v) {
+    m_value[0] = col_type(v, 0);
+    m_value[1] = col_type(0, v);
+}
+
+template<typename T>
+template<typename U>
+TMat22<T>::TMat22(const TVec2<U>& v) {
+    m_value[0] = col_type(v.x, 0);
+    m_value[1] = col_type(0, v.y);
+}
+
+// construct from 4 scalars. Note that the arrangement
+// of values in the constructor is the transpose of the matrix
+// notation.
+template<typename T>
+template <
+    typename A, typename B,
+    typename C, typename D>
+TMat22<T>::TMat22(A m00, B m01,
+                  C m10, D m11) {
+    m_value[0] = col_type(m00, m01);
+    m_value[1] = col_type(m10, m11);
+}
+
+template <typename T>
+template <typename U>
+TMat22<T>::TMat22(const TMat22<U>& rhs) {
+    for (size_t col = 0; col < NUM_COLS; ++col) {
+        m_value[col] = col_type(rhs[col]);
+    }
+}
+
+// Construct from 2 column vectors.
+template <typename T>
+template <typename A, typename B>
+TMat22<T>::TMat22(const TVec2<A>& v0, const TVec2<B>& v1) {
+    m_value[0] = v0;
+    m_value[1] = v1;
+}
+
+// Construct from raw array, in column-major form.
+template <typename T>
+template <typename U>
+TMat22<T>::TMat22(U const* rawArray) {
+    for (size_t col = 0; col < NUM_COLS; ++col) {
+        for (size_t row = 0; row < NUM_ROWS; ++row) {
+            m_value[col][row] = *rawArray++;
+        }
+    }
+}
+
+// ----------------------------------------------------------------------------------------
+// Arithmetic operators outside of class
+// ----------------------------------------------------------------------------------------
+
+/* We use non-friend functions here to prevent the compiler from using
+ * implicit conversions, for instance of a scalar to a vector. The result would
+ * not be what the caller expects.
+ *
+ * Also note that the order of the arguments in the inner loop is important since
+ * it determines the output type (only relevant when T != U).
+ */
+
+// matrix * column-vector, result is a vector of the same type than the input vector
+template <typename T, typename U>
+typename TMat22<U>::col_type PURE operator *(const TMat22<T>& lhs, const TVec2<U>& rhs) {
+    // Result is initialized to zero.
+    typename TMat22<U>::col_type result;
+    for (size_t col = 0; col < TMat22<T>::NUM_COLS; ++col) {
+        result += lhs[col] * rhs[col];
+    }
+    return result;
+}
+
+// row-vector * matrix, result is a vector of the same type than the input vector
+template <typename T, typename U>
+typename TMat22<U>::row_type PURE operator *(const TVec2<U>& lhs, const TMat22<T>& rhs) {
+    typename TMat22<U>::row_type result(TMat22<U>::row_type::NO_INIT);
+    for (size_t col = 0; col < TMat22<T>::NUM_COLS; ++col) {
+        result[col] = dot(lhs, rhs[col]);
+    }
+    return result;
+}
+
+// matrix * scalar, result is a matrix of the same type than the input matrix
+template<typename T, typename U>
+constexpr typename std::enable_if<std::is_arithmetic<U>::value, TMat22<T>>::type PURE
+operator*(TMat22<T> lhs, U rhs) {
+    return lhs *= rhs;
+}
+
+// scalar * matrix, result is a matrix of the same type than the input matrix
+template<typename T, typename U>
+constexpr typename std::enable_if<std::is_arithmetic<U>::value, TMat22<T>>::type PURE
+operator*(U lhs, const TMat22<T>& rhs) {
+    return rhs * lhs;
+}
+
+// ----------------------------------------------------------------------------------------
+
+/* FIXME: this should go into TMatSquareFunctions<> but for some reason
+ * BASE<T>::col_type is not accessible from there (???)
+ */
+template<typename T>
+typename TMat22<T>::col_type PURE diag(const TMat22<T>& m) {
+    return matrix::diag(m);
+}
+
+}  // namespace details
+
+// ----------------------------------------------------------------------------------------
+
+typedef details::TMat22<double> mat2d;
+typedef details::TMat22<float> mat2;
+typedef details::TMat22<float> mat2f;
+
+// ----------------------------------------------------------------------------------------
+}  // namespace android
+
+#undef PURE
+
+#endif  // UI_MAT2_H_
diff --git a/include/ui/mat3.h b/include/ui/mat3.h
new file mode 100644
index 0000000..6a071c1
--- /dev/null
+++ b/include/ui/mat3.h
@@ -0,0 +1,435 @@
+/*
+ * Copyright 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef UI_MAT3_H_
+#define UI_MAT3_H_
+
+#include <ui/quat.h>
+#include <ui/TMatHelpers.h>
+#include <ui/vec3.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#define PURE __attribute__((pure))
+
+namespace android {
+// -------------------------------------------------------------------------------------
+namespace details {
+
+template<typename T>
+class TQuaternion;
+
+/**
+ * A 3x3 column-major matrix class.
+ *
+ * Conceptually a 3x3 matrix is a an array of 3 column vec3:
+ *
+ * mat3 m =
+ *      \f$
+ *      \left(
+ *      \begin{array}{ccc}
+ *      m[0] & m[1] & m[2] \\
+ *      \end{array}
+ *      \right)
+ *      \f$
+ *      =
+ *      \f$
+ *      \left(
+ *      \begin{array}{ccc}
+ *      m[0][0] & m[1][0] & m[2][0] \\
+ *      m[0][1] & m[1][1] & m[2][1] \\
+ *      m[0][2] & m[1][2] & m[2][2] \\
+ *      \end{array}
+ *      \right)
+ *      \f$
+ *      =
+ *      \f$
+ *      \left(
+ *      \begin{array}{ccc}
+ *      m(0,0) & m(0,1) & m(0,2) \\
+ *      m(1,0) & m(1,1) & m(1,2) \\
+ *      m(2,0) & m(2,1) & m(2,2) \\
+ *      \end{array}
+ *      \right)
+ *      \f$
+ *
+ * m[n] is the \f$ n^{th} \f$ column of the matrix and is a vec3.
+ *
+ */
+template <typename T>
+class TMat33 :  public TVecUnaryOperators<TMat33, T>,
+                public TVecComparisonOperators<TMat33, T>,
+                public TVecAddOperators<TMat33, T>,
+                public TMatProductOperators<TMat33, T>,
+                public TMatSquareFunctions<TMat33, T>,
+                public TMatTransform<TMat33, T>,
+                public TMatHelpers<TMat33, T>,
+                public TMatDebug<TMat33, T> {
+public:
+    enum no_init { NO_INIT };
+    typedef T value_type;
+    typedef T& reference;
+    typedef T const& const_reference;
+    typedef size_t size_type;
+    typedef TVec3<T> col_type;
+    typedef TVec3<T> row_type;
+
+    static constexpr size_t COL_SIZE = col_type::SIZE;  // size of a column (i.e.: number of rows)
+    static constexpr size_t ROW_SIZE = row_type::SIZE;  // size of a row (i.e.: number of columns)
+    static constexpr size_t NUM_ROWS = COL_SIZE;
+    static constexpr size_t NUM_COLS = ROW_SIZE;
+
+private:
+    /*
+     *  <--  N columns  -->
+     *
+     *  a[0][0] a[1][0] a[2][0] ... a[N][0]    ^
+     *  a[0][1] a[1][1] a[2][1] ... a[N][1]    |
+     *  a[0][2] a[1][2] a[2][2] ... a[N][2]  M rows
+     *  ...                                    |
+     *  a[0][M] a[1][M] a[2][M] ... a[N][M]    v
+     *
+     *  COL_SIZE = M
+     *  ROW_SIZE = N
+     *  m[0] = [ a[0][0] a[0][1] a[0][2] ... a[0][M] ]
+     */
+
+    col_type m_value[NUM_COLS];
+
+public:
+    // array access
+    inline constexpr col_type const& operator[](size_t column) const {
+#if __cplusplus >= 201402L
+        // only possible in C++0x14 with constexpr
+        assert(column < NUM_COLS);
+#endif
+        return m_value[column];
+    }
+
+    inline col_type& operator[](size_t column) {
+        assert(column < NUM_COLS);
+        return m_value[column];
+    }
+
+    // -----------------------------------------------------------------------
+    // we want the compiler generated versions for these...
+    TMat33(const TMat33&) = default;
+    ~TMat33() = default;
+    TMat33& operator = (const TMat33&) = default;
+
+    /**
+     *  constructors
+     */
+
+    /**
+     * leaves object uninitialized. use with caution.
+     */
+    explicit
+    constexpr TMat33(no_init)
+            : m_value{ col_type(col_type::NO_INIT),
+                       col_type(col_type::NO_INIT),
+                       col_type(col_type::NO_INIT) } {}
+
+
+    /**
+     * initialize to identity.
+     *
+     *      \f$
+     *      \left(
+     *      \begin{array}{ccc}
+     *      1 & 0 & 0 \\
+     *      0 & 1 & 0 \\
+     *      0 & 0 & 1 \\
+     *      \end{array}
+     *      \right)
+     *      \f$
+     */
+    TMat33();
+
+    /**
+     * initialize to Identity*scalar.
+     *
+     *      \f$
+     *      \left(
+     *      \begin{array}{ccc}
+     *      v & 0 & 0 \\
+     *      0 & v & 0 \\
+     *      0 & 0 & v \\
+     *      \end{array}
+     *      \right)
+     *      \f$
+     */
+    template<typename U>
+    explicit TMat33(U v);
+
+    /**
+     * sets the diagonal to a vector.
+     *
+     *      \f$
+     *      \left(
+     *      \begin{array}{ccc}
+     *      v[0] & 0 & 0 \\
+     *      0 & v[1] & 0 \\
+     *      0 & 0 & v[2] \\
+     *      \end{array}
+     *      \right)
+     *      \f$
+     */
+    template <typename U>
+    explicit TMat33(const TVec3<U>& v);
+
+    /**
+     * construct from another matrix of the same size
+     */
+    template <typename U>
+    explicit TMat33(const TMat33<U>& rhs);
+
+    /**
+     * construct from 3 column vectors.
+     *
+     *      \f$
+     *      \left(
+     *      \begin{array}{ccc}
+     *      v0 & v1 & v2 \\
+     *      \end{array}
+     *      \right)
+     *      \f$
+     */
+    template <typename A, typename B, typename C>
+    TMat33(const TVec3<A>& v0, const TVec3<B>& v1, const TVec3<C>& v2);
+
+    /** construct from 9 elements in column-major form.
+     *
+     *      \f$
+     *      \left(
+     *      \begin{array}{ccc}
+     *      m[0][0] & m[1][0] & m[2][0] \\
+     *      m[0][1] & m[1][1] & m[2][1] \\
+     *      m[0][2] & m[1][2] & m[2][2] \\
+     *      \end{array}
+     *      \right)
+     *      \f$
+     */
+    template <
+        typename A, typename B, typename C,
+        typename D, typename E, typename F,
+        typename G, typename H, typename I>
+    TMat33(A m00, B m01, C m02,
+           D m10, E m11, F m12,
+           G m20, H m21, I m22);
+
+    /**
+     * construct from a quaternion
+     */
+    template <typename U>
+    explicit TMat33(const TQuaternion<U>& q);
+
+    /**
+     * construct from a C array in column major form.
+     */
+    template <typename U>
+    explicit TMat33(U const* rawArray);
+
+    /**
+     * orthogonalize only works on matrices of size 3x3
+     */
+    friend inline
+    TMat33 orthogonalize(const TMat33& m) {
+        TMat33 ret(TMat33::NO_INIT);
+        ret[0] = normalize(m[0]);
+        ret[2] = normalize(cross(ret[0], m[1]));
+        ret[1] = normalize(cross(ret[2], ret[0]));
+        return ret;
+    }
+};
+
+// ----------------------------------------------------------------------------------------
+// Constructors
+// ----------------------------------------------------------------------------------------
+
+// Since the matrix code could become pretty big quickly, we don't inline most
+// operations.
+
+template <typename T>
+TMat33<T>::TMat33() {
+    m_value[0] = col_type(1, 0, 0);
+    m_value[1] = col_type(0, 1, 0);
+    m_value[2] = col_type(0, 0, 1);
+}
+
+template <typename T>
+template <typename U>
+TMat33<T>::TMat33(U v) {
+    m_value[0] = col_type(v, 0, 0);
+    m_value[1] = col_type(0, v, 0);
+    m_value[2] = col_type(0, 0, v);
+}
+
+template<typename T>
+template<typename U>
+TMat33<T>::TMat33(const TVec3<U>& v) {
+    m_value[0] = col_type(v.x, 0, 0);
+    m_value[1] = col_type(0, v.y, 0);
+    m_value[2] = col_type(0, 0, v.z);
+}
+
+// construct from 16 scalars. Note that the arrangement
+// of values in the constructor is the transpose of the matrix
+// notation.
+template<typename T>
+template <
+    typename A, typename B, typename C,
+    typename D, typename E, typename F,
+    typename G, typename H, typename I>
+TMat33<T>::TMat33(A m00, B m01, C m02,
+                  D m10, E m11, F m12,
+                  G m20, H m21, I m22) {
+    m_value[0] = col_type(m00, m01, m02);
+    m_value[1] = col_type(m10, m11, m12);
+    m_value[2] = col_type(m20, m21, m22);
+}
+
+template <typename T>
+template <typename U>
+TMat33<T>::TMat33(const TMat33<U>& rhs) {
+    for (size_t col = 0; col < NUM_COLS; ++col) {
+        m_value[col] = col_type(rhs[col]);
+    }
+}
+
+// Construct from 3 column vectors.
+template <typename T>
+template <typename A, typename B, typename C>
+TMat33<T>::TMat33(const TVec3<A>& v0, const TVec3<B>& v1, const TVec3<C>& v2) {
+    m_value[0] = v0;
+    m_value[1] = v1;
+    m_value[2] = v2;
+}
+
+// Construct from raw array, in column-major form.
+template <typename T>
+template <typename U>
+TMat33<T>::TMat33(U const* rawArray) {
+    for (size_t col = 0; col < NUM_COLS; ++col) {
+        for (size_t row = 0; row < NUM_ROWS; ++row) {
+            m_value[col][row] = *rawArray++;
+        }
+    }
+}
+
+template <typename T>
+template <typename U>
+TMat33<T>::TMat33(const TQuaternion<U>& q) {
+    const U n = q.x*q.x + q.y*q.y + q.z*q.z + q.w*q.w;
+    const U s = n > 0 ? 2/n : 0;
+    const U x = s*q.x;
+    const U y = s*q.y;
+    const U z = s*q.z;
+    const U xx = x*q.x;
+    const U xy = x*q.y;
+    const U xz = x*q.z;
+    const U xw = x*q.w;
+    const U yy = y*q.y;
+    const U yz = y*q.z;
+    const U yw = y*q.w;
+    const U zz = z*q.z;
+    const U zw = z*q.w;
+    m_value[0] = col_type(1-yy-zz,    xy+zw,    xz-yw);  // NOLINT
+    m_value[1] = col_type(  xy-zw,  1-xx-zz,    yz+xw);  // NOLINT
+    m_value[2] = col_type(  xz+yw,    yz-xw,  1-xx-yy);  // NOLINT
+}
+
+// ----------------------------------------------------------------------------------------
+// Arithmetic operators outside of class
+// ----------------------------------------------------------------------------------------
+
+/* We use non-friend functions here to prevent the compiler from using
+ * implicit conversions, for instance of a scalar to a vector. The result would
+ * not be what the caller expects.
+ *
+ * Also note that the order of the arguments in the inner loop is important since
+ * it determines the output type (only relevant when T != U).
+ */
+
+// matrix * column-vector, result is a vector of the same type than the input vector
+template <typename T, typename U>
+typename TMat33<U>::col_type PURE operator *(const TMat33<T>& lhs, const TVec3<U>& rhs) {
+    // Result is initialized to zero.
+    typename TMat33<U>::col_type result;
+    for (size_t col = 0; col < TMat33<T>::NUM_COLS; ++col) {
+        result += lhs[col] * rhs[col];
+    }
+    return result;
+}
+
+// row-vector * matrix, result is a vector of the same type than the input vector
+template <typename T, typename U>
+typename TMat33<U>::row_type PURE operator *(const TVec3<U>& lhs, const TMat33<T>& rhs) {
+    typename TMat33<U>::row_type result(TMat33<U>::row_type::NO_INIT);
+    for (size_t col = 0; col < TMat33<T>::NUM_COLS; ++col) {
+        result[col] = dot(lhs, rhs[col]);
+    }
+    return result;
+}
+
+// matrix * scalar, result is a matrix of the same type than the input matrix
+template<typename T, typename U>
+constexpr typename std::enable_if<std::is_arithmetic<U>::value, TMat33<T>>::type PURE
+operator*(TMat33<T> lhs, U rhs) {
+    return lhs *= rhs;
+}
+
+// scalar * matrix, result is a matrix of the same type than the input matrix
+template<typename T, typename U>
+constexpr typename std::enable_if<std::is_arithmetic<U>::value, TMat33<T>>::type PURE
+operator*(U lhs, const TMat33<T>& rhs) {
+    return rhs * lhs;
+}
+
+//------------------------------------------------------------------------------
+template <typename T>
+TMat33<T> orthogonalize(const TMat33<T>& m) {
+    TMat33<T> ret(TMat33<T>::NO_INIT);
+    ret[0] = normalize(m[0]);
+    ret[2] = normalize(cross(ret[0], m[1]));
+    ret[1] = normalize(cross(ret[2], ret[0]));
+    return ret;
+}
+
+// ----------------------------------------------------------------------------------------
+
+/* FIXME: this should go into TMatSquareFunctions<> but for some reason
+ * BASE<T>::col_type is not accessible from there (???)
+ */
+template<typename T>
+typename TMat33<T>::col_type PURE diag(const TMat33<T>& m) {
+    return matrix::diag(m);
+}
+
+}  // namespace details
+
+// ----------------------------------------------------------------------------------------
+
+typedef details::TMat33<double> mat3d;
+typedef details::TMat33<float> mat3;
+typedef details::TMat33<float> mat3f;
+
+// ----------------------------------------------------------------------------------------
+}  // namespace android
+
+#undef PURE
+
+#endif  // UI_MAT3_H_
diff --git a/include/ui/mat4.h b/include/ui/mat4.h
index 4fd1eff..a607023 100644
--- a/include/ui/mat4.h
+++ b/include/ui/mat4.h
@@ -14,173 +14,325 @@
  * limitations under the License.
  */
 
-#ifndef UI_MAT4_H
-#define UI_MAT4_H
+#ifndef UI_MAT4_H_
+#define UI_MAT4_H_
+
+#include <ui/mat3.h>
+#include <ui/quat.h>
+#include <ui/TMatHelpers.h>
+#include <ui/vec3.h>
+#include <ui/vec4.h>
 
 #include <stdint.h>
 #include <sys/types.h>
-
-#include <ui/vec4.h>
-#include <utils/String8.h>
-
-#define TMAT_IMPLEMENTATION
-#include <ui/TMatHelpers.h>
+#include <limits>
 
 #define PURE __attribute__((pure))
 
 namespace android {
 // -------------------------------------------------------------------------------------
+namespace details {
 
+template<typename T>
+class TQuaternion;
+
+/**
+ * A 4x4 column-major matrix class.
+ *
+ * Conceptually a 4x4 matrix is a an array of 4 column double4:
+ *
+ * mat4 m =
+ *      \f$
+ *      \left(
+ *      \begin{array}{cccc}
+ *      m[0] & m[1] & m[2] & m[3] \\
+ *      \end{array}
+ *      \right)
+ *      \f$
+ *      =
+ *      \f$
+ *      \left(
+ *      \begin{array}{cccc}
+ *      m[0][0] & m[1][0] & m[2][0] & m[3][0] \\
+ *      m[0][1] & m[1][1] & m[2][1] & m[3][1] \\
+ *      m[0][2] & m[1][2] & m[2][2] & m[3][2] \\
+ *      m[0][3] & m[1][3] & m[2][3] & m[3][3] \\
+ *      \end{array}
+ *      \right)
+ *      \f$
+ *      =
+ *      \f$
+ *      \left(
+ *      \begin{array}{cccc}
+ *      m(0,0) & m(0,1) & m(0,2) & m(0,3) \\
+ *      m(1,0) & m(1,1) & m(1,2) & m(1,3) \\
+ *      m(2,0) & m(2,1) & m(2,2) & m(2,3) \\
+ *      m(3,0) & m(3,1) & m(3,2) & m(3,3) \\
+ *      \end{array}
+ *      \right)
+ *      \f$
+ *
+ * m[n] is the \f$ n^{th} \f$ column of the matrix and is a double4.
+ *
+ */
 template <typename T>
-class tmat44 :  public TVecUnaryOperators<tmat44, T>,
-                public TVecComparisonOperators<tmat44, T>,
-                public TVecAddOperators<tmat44, T>,
-                public TMatProductOperators<tmat44, T>,
-                public TMatSquareFunctions<tmat44, T>,
-                public TMatDebug<tmat44, T>
-{
+class TMat44 :  public TVecUnaryOperators<TMat44, T>,
+                public TVecComparisonOperators<TMat44, T>,
+                public TVecAddOperators<TMat44, T>,
+                public TMatProductOperators<TMat44, T>,
+                public TMatSquareFunctions<TMat44, T>,
+                public TMatTransform<TMat44, T>,
+                public TMatHelpers<TMat44, T>,
+                public TMatDebug<TMat44, T> {
 public:
     enum no_init { NO_INIT };
     typedef T value_type;
     typedef T& reference;
     typedef T const& const_reference;
     typedef size_t size_type;
-    typedef tvec4<T> col_type;
-    typedef tvec4<T> row_type;
+    typedef TVec4<T> col_type;
+    typedef TVec4<T> row_type;
 
-    // size of a column (i.e.: number of rows)
-    enum { COL_SIZE = col_type::SIZE };
-    static inline size_t col_size() { return COL_SIZE; }
-
-    // size of a row (i.e.: number of columns)
-    enum { ROW_SIZE = row_type::SIZE };
-    static inline size_t row_size() { return ROW_SIZE; }
-    static inline size_t size()     { return row_size(); }  // for TVec*<>
+    static constexpr size_t COL_SIZE = col_type::SIZE;  // size of a column (i.e.: number of rows)
+    static constexpr size_t ROW_SIZE = row_type::SIZE;  // size of a row (i.e.: number of columns)
+    static constexpr size_t NUM_ROWS = COL_SIZE;
+    static constexpr size_t NUM_COLS = ROW_SIZE;
 
 private:
-
     /*
      *  <--  N columns  -->
      *
-     *  a00 a10 a20 ... aN0    ^
-     *  a01 a11 a21 ... aN1    |
-     *  a02 a12 a22 ... aN2  M rows
-     *  ...                    |
-     *  a0M a1M a2M ... aNM    v
+     *  a[0][0] a[1][0] a[2][0] ... a[N][0]    ^
+     *  a[0][1] a[1][1] a[2][1] ... a[N][1]    |
+     *  a[0][2] a[1][2] a[2][2] ... a[N][2]  M rows
+     *  ...                                    |
+     *  a[0][M] a[1][M] a[2][M] ... a[N][M]    v
      *
      *  COL_SIZE = M
      *  ROW_SIZE = N
-     *  m[0] = [a00 a01 a02 ... a01M]
+     *  m[0] = [ a[0][0] a[0][1] a[0][2] ... a[0][M] ]
      */
 
-    col_type mValue[ROW_SIZE];
+    col_type m_value[NUM_COLS];
 
 public:
     // array access
-    inline col_type const& operator [] (size_t i) const { return mValue[i]; }
-    inline col_type&       operator [] (size_t i)       { return mValue[i]; }
+    inline constexpr col_type const& operator[](size_t column) const {
+#if __cplusplus >= 201402L
+        // only possible in C++0x14 with constexpr
+        assert(column < NUM_COLS);
+#endif
+        return m_value[column];
+    }
 
-    T const* asArray() const { return &mValue[0][0]; }
+    inline col_type& operator[](size_t column) {
+        assert(column < NUM_COLS);
+        return m_value[column];
+    }
 
     // -----------------------------------------------------------------------
-    // we don't provide copy-ctor and operator= on purpose
-    // because we want the compiler generated versions
+    // we want the compiler generated versions for these...
+    TMat44(const TMat44&) = default;
+    ~TMat44() = default;
+    TMat44& operator = (const TMat44&) = default;
 
     /*
      *  constructors
      */
 
     // leaves object uninitialized. use with caution.
-    explicit tmat44(no_init) { }
+    explicit
+    constexpr TMat44(no_init)
+            : m_value{ col_type(col_type::NO_INIT),
+                       col_type(col_type::NO_INIT),
+                       col_type(col_type::NO_INIT),
+                       col_type(col_type::NO_INIT) } {}
 
-    // initialize to identity
-    tmat44();
+    /** initialize to identity.
+     *
+     *      \f$
+     *      \left(
+     *      \begin{array}{cccc}
+     *      1 & 0 & 0 & 0 \\
+     *      0 & 1 & 0 & 0 \\
+     *      0 & 0 & 1 & 0 \\
+     *      0 & 0 & 0 & 1 \\
+     *      \end{array}
+     *      \right)
+     *      \f$
+     */
+    TMat44();
 
-    // initialize to Identity*scalar.
+    /** initialize to Identity*scalar.
+     *
+     *      \f$
+     *      \left(
+     *      \begin{array}{cccc}
+     *      v & 0 & 0 & 0 \\
+     *      0 & v & 0 & 0 \\
+     *      0 & 0 & v & 0 \\
+     *      0 & 0 & 0 & v \\
+     *      \end{array}
+     *      \right)
+     *      \f$
+     */
     template<typename U>
-    explicit tmat44(U v);
+    explicit TMat44(U v);
 
-    // sets the diagonal to the passed vector
+    /** sets the diagonal to a vector.
+     *
+     *      \f$
+     *      \left(
+     *      \begin{array}{cccc}
+     *      v[0] & 0 & 0 & 0 \\
+     *      0 & v[1] & 0 & 0 \\
+     *      0 & 0 & v[2] & 0 \\
+     *      0 & 0 & 0 & v[3] \\
+     *      \end{array}
+     *      \right)
+     *      \f$
+     */
     template <typename U>
-    explicit tmat44(const tvec4<U>& rhs);
+    explicit TMat44(const TVec4<U>& v);
 
     // construct from another matrix of the same size
     template <typename U>
-    explicit tmat44(const tmat44<U>& rhs);
+    explicit TMat44(const TMat44<U>& rhs);
 
-    // construct from 4 column vectors
+    /** construct from 4 column vectors.
+     *
+     *      \f$
+     *      \left(
+     *      \begin{array}{cccc}
+     *      v0 & v1 & v2 & v3 \\
+     *      \end{array}
+     *      \right)
+     *      \f$
+     */
     template <typename A, typename B, typename C, typename D>
-    tmat44(const tvec4<A>& v0, const tvec4<B>& v1, const tvec4<C>& v2, const tvec4<D>& v3);
+    TMat44(const TVec4<A>& v0, const TVec4<B>& v1, const TVec4<C>& v2, const TVec4<D>& v3);
 
-    // construct from 16 scalars
+    /** construct from 16 elements in column-major form.
+     *
+     *      \f$
+     *      \left(
+     *      \begin{array}{cccc}
+     *      m[0][0] & m[1][0] & m[2][0] & m[3][0] \\
+     *      m[0][1] & m[1][1] & m[2][1] & m[3][1] \\
+     *      m[0][2] & m[1][2] & m[2][2] & m[3][2] \\
+     *      m[0][3] & m[1][3] & m[2][3] & m[3][3] \\
+     *      \end{array}
+     *      \right)
+     *      \f$
+     */
     template <
         typename A, typename B, typename C, typename D,
         typename E, typename F, typename G, typename H,
         typename I, typename J, typename K, typename L,
         typename M, typename N, typename O, typename P>
-    tmat44( A m00, B m01, C m02, D m03,
-            E m10, F m11, G m12, H m13,
-            I m20, J m21, K m22, L m23,
-            M m30, N m31, O m32, P m33);
+    TMat44(A m00, B m01, C m02, D m03,
+           E m10, F m11, G m12, H m13,
+           I m20, J m21, K m22, L m23,
+           M m30, N m31, O m32, P m33);
 
-    // construct from a C array
+    /**
+     * construct from a quaternion
+     */
     template <typename U>
-    explicit tmat44(U const* rawArray);
+    explicit TMat44(const TQuaternion<U>& q);
+
+    /**
+     * construct from a C array in column major form.
+     */
+    template <typename U>
+    explicit TMat44(U const* rawArray);
+
+    /**
+     * construct from a 3x3 matrix
+     */
+    template <typename U>
+    explicit TMat44(const TMat33<U>& matrix);
+
+    /**
+     * construct from a 3x3 matrix and 3d translation
+     */
+    template <typename U, typename V>
+    TMat44(const TMat33<U>& matrix, const TVec3<V>& translation);
+
+    /**
+     * construct from a 3x3 matrix and 4d last column.
+     */
+    template <typename U, typename V>
+    TMat44(const TMat33<U>& matrix, const TVec4<V>& column3);
 
     /*
      *  helpers
      */
 
-    static tmat44 ortho(T left, T right, T bottom, T top, T near, T far);
+    static TMat44 ortho(T left, T right, T bottom, T top, T near, T far);
 
-    static tmat44 frustum(T left, T right, T bottom, T top, T near, T far);
+    static TMat44 frustum(T left, T right, T bottom, T top, T near, T far);
+
+    enum class Fov {
+        HORIZONTAL,
+        VERTICAL
+    };
+    static TMat44 perspective(T fov, T aspect, T near, T far, Fov direction = Fov::VERTICAL);
 
     template <typename A, typename B, typename C>
-    static tmat44 lookAt(const tvec3<A>& eye, const tvec3<B>& center, const tvec3<C>& up);
+    static TMat44 lookAt(const TVec3<A>& eye, const TVec3<B>& center, const TVec3<C>& up);
 
     template <typename A>
-    static tmat44 translate(const tvec4<A>& t);
+    static TVec3<A> project(const TMat44& projectionMatrix, TVec3<A> vertice) {
+        TVec4<A> r = projectionMatrix * TVec4<A>{ vertice, 1 };
+        return r.xyz / r.w;
+    }
 
     template <typename A>
-    static tmat44 scale(const tvec4<A>& s);
+    static TVec4<A> project(const TMat44& projectionMatrix, TVec4<A> vertice) {
+        vertice = projectionMatrix * vertice;
+        return { vertice.xyz / vertice.w, 1 };
+    }
 
-    template <typename A, typename B>
-    static tmat44 rotate(A radian, const tvec3<B>& about);
+    /**
+     * Constructs a 3x3 matrix from the upper-left corner of this 4x4 matrix
+     */
+    inline constexpr TMat33<T> upperLeft() const {
+        return TMat33<T>(m_value[0].xyz, m_value[1].xyz, m_value[2].xyz);
+    }
 };
 
 // ----------------------------------------------------------------------------------------
 // Constructors
 // ----------------------------------------------------------------------------------------
 
-/*
- * Since the matrix code could become pretty big quickly, we don't inline most
- * operations.
- */
+// Since the matrix code could become pretty big quickly, we don't inline most
+// operations.
 
 template <typename T>
-tmat44<T>::tmat44() {
-    mValue[0] = col_type(1,0,0,0);
-    mValue[1] = col_type(0,1,0,0);
-    mValue[2] = col_type(0,0,1,0);
-    mValue[3] = col_type(0,0,0,1);
+TMat44<T>::TMat44() {
+    m_value[0] = col_type(1, 0, 0, 0);
+    m_value[1] = col_type(0, 1, 0, 0);
+    m_value[2] = col_type(0, 0, 1, 0);
+    m_value[3] = col_type(0, 0, 0, 1);
 }
 
 template <typename T>
 template <typename U>
-tmat44<T>::tmat44(U v) {
-    mValue[0] = col_type(v,0,0,0);
-    mValue[1] = col_type(0,v,0,0);
-    mValue[2] = col_type(0,0,v,0);
-    mValue[3] = col_type(0,0,0,v);
+TMat44<T>::TMat44(U v) {
+    m_value[0] = col_type(v, 0, 0, 0);
+    m_value[1] = col_type(0, v, 0, 0);
+    m_value[2] = col_type(0, 0, v, 0);
+    m_value[3] = col_type(0, 0, 0, v);
 }
 
 template<typename T>
 template<typename U>
-tmat44<T>::tmat44(const tvec4<U>& v) {
-    mValue[0] = col_type(v.x,0,0,0);
-    mValue[1] = col_type(0,v.y,0,0);
-    mValue[2] = col_type(0,0,v.z,0);
-    mValue[3] = col_type(0,0,0,v.w);
+TMat44<T>::TMat44(const TVec4<U>& v) {
+    m_value[0] = col_type(v.x, 0, 0, 0);
+    m_value[1] = col_type(0, v.y, 0, 0);
+    m_value[2] = col_type(0, 0, v.z, 0);
+    m_value[3] = col_type(0, 0, 0, v.w);
 }
 
 // construct from 16 scalars
@@ -190,38 +342,93 @@
     typename E, typename F, typename G, typename H,
     typename I, typename J, typename K, typename L,
     typename M, typename N, typename O, typename P>
-tmat44<T>::tmat44(  A m00, B m01, C m02, D m03,
-                    E m10, F m11, G m12, H m13,
-                    I m20, J m21, K m22, L m23,
-                    M m30, N m31, O m32, P m33) {
-    mValue[0] = col_type(m00, m01, m02, m03);
-    mValue[1] = col_type(m10, m11, m12, m13);
-    mValue[2] = col_type(m20, m21, m22, m23);
-    mValue[3] = col_type(m30, m31, m32, m33);
+TMat44<T>::TMat44(A m00, B m01, C m02, D m03,
+                  E m10, F m11, G m12, H m13,
+                  I m20, J m21, K m22, L m23,
+                  M m30, N m31, O m32, P m33) {
+    m_value[0] = col_type(m00, m01, m02, m03);
+    m_value[1] = col_type(m10, m11, m12, m13);
+    m_value[2] = col_type(m20, m21, m22, m23);
+    m_value[3] = col_type(m30, m31, m32, m33);
 }
 
 template <typename T>
 template <typename U>
-tmat44<T>::tmat44(const tmat44<U>& rhs) {
-    for (size_t r=0 ; r<row_size() ; r++)
-        mValue[r] = rhs[r];
+TMat44<T>::TMat44(const TMat44<U>& rhs) {
+    for (size_t col = 0; col < NUM_COLS; ++col) {
+        m_value[col] = col_type(rhs[col]);
+    }
 }
 
+// Construct from 4 column vectors.
 template <typename T>
 template <typename A, typename B, typename C, typename D>
-tmat44<T>::tmat44(const tvec4<A>& v0, const tvec4<B>& v1, const tvec4<C>& v2, const tvec4<D>& v3) {
-    mValue[0] = v0;
-    mValue[1] = v1;
-    mValue[2] = v2;
-    mValue[3] = v3;
+TMat44<T>::TMat44(const TVec4<A>& v0, const TVec4<B>& v1, const TVec4<C>& v2, const TVec4<D>& v3) {
+    m_value[0] = col_type(v0);
+    m_value[1] = col_type(v1);
+    m_value[2] = col_type(v2);
+    m_value[3] = col_type(v3);
+}
+
+// Construct from raw array, in column-major form.
+template <typename T>
+template <typename U>
+TMat44<T>::TMat44(U const* rawArray) {
+    for (size_t col = 0; col < NUM_COLS; ++col) {
+        for (size_t row = 0; row < NUM_ROWS; ++row) {
+            m_value[col][row] = *rawArray++;
+        }
+    }
 }
 
 template <typename T>
 template <typename U>
-tmat44<T>::tmat44(U const* rawArray) {
-    for (size_t r=0 ; r<row_size() ; r++)
-        for (size_t c=0 ; c<col_size() ; c++)
-            mValue[r][c] = *rawArray++;
+TMat44<T>::TMat44(const TQuaternion<U>& q) {
+    const U n = q.x*q.x + q.y*q.y + q.z*q.z + q.w*q.w;
+    const U s = n > 0 ? 2/n : 0;
+    const U x = s*q.x;
+    const U y = s*q.y;
+    const U z = s*q.z;
+    const U xx = x*q.x;
+    const U xy = x*q.y;
+    const U xz = x*q.z;
+    const U xw = x*q.w;
+    const U yy = y*q.y;
+    const U yz = y*q.z;
+    const U yw = y*q.w;
+    const U zz = z*q.z;
+    const U zw = z*q.w;
+    m_value[0] = col_type(1-yy-zz,    xy+zw,    xz-yw,   0);
+    m_value[1] = col_type(  xy-zw,  1-xx-zz,    yz+xw,   0);  // NOLINT
+    m_value[2] = col_type(  xz+yw,    yz-xw,  1-xx-yy,   0);  // NOLINT
+    m_value[3] = col_type(      0,        0,        0,   1);  // NOLINT
+}
+
+template <typename T>
+template <typename U>
+TMat44<T>::TMat44(const TMat33<U>& m) {
+    m_value[0] = col_type(m[0][0], m[0][1], m[0][2], 0);
+    m_value[1] = col_type(m[1][0], m[1][1], m[1][2], 0);
+    m_value[2] = col_type(m[2][0], m[2][1], m[2][2], 0);
+    m_value[3] = col_type(      0,       0,       0, 1);  // NOLINT
+}
+
+template <typename T>
+template <typename U, typename V>
+TMat44<T>::TMat44(const TMat33<U>& m, const TVec3<V>& v) {
+    m_value[0] = col_type(m[0][0], m[0][1], m[0][2], 0);
+    m_value[1] = col_type(m[1][0], m[1][1], m[1][2], 0);
+    m_value[2] = col_type(m[2][0], m[2][1], m[2][2], 0);
+    m_value[3] = col_type(   v[0],    v[1],    v[2], 1);  // NOLINT
+}
+
+template <typename T>
+template <typename U, typename V>
+TMat44<T>::TMat44(const TMat33<U>& m, const TVec4<V>& v) {
+    m_value[0] = col_type(m[0][0], m[0][1], m[0][2],    0);  // NOLINT
+    m_value[1] = col_type(m[1][0], m[1][1], m[1][2],    0);  // NOLINT
+    m_value[2] = col_type(m[2][0], m[2][1], m[2][2],    0);  // NOLINT
+    m_value[3] = col_type(   v[0],    v[1],    v[2], v[3]);  // NOLINT
 }
 
 // ----------------------------------------------------------------------------------------
@@ -229,8 +436,8 @@
 // ----------------------------------------------------------------------------------------
 
 template <typename T>
-tmat44<T> tmat44<T>::ortho(T left, T right, T bottom, T top, T near, T far) {
-    tmat44<T> m;
+TMat44<T> TMat44<T>::ortho(T left, T right, T bottom, T top, T near, T far) {
+    TMat44<T> m;
     m[0][0] =  2 / (right - left);
     m[1][1] =  2 / (top   - bottom);
     m[2][2] = -2 / (far   - near);
@@ -241,88 +448,55 @@
 }
 
 template <typename T>
-tmat44<T> tmat44<T>::frustum(T left, T right, T bottom, T top, T near, T far) {
-    tmat44<T> m;
-    T A = (right + left)   / (right - left);
-    T B = (top   + bottom) / (top   - bottom);
-    T C = (far   + near)   / (far   - near);
-    T D = (2 * far * near) / (far   - near);
-    m[0][0] = (2 * near) / (right - left);
-    m[1][1] = (2 * near) / (top   - bottom);
-    m[2][0] = A;
-    m[2][1] = B;
-    m[2][2] = C;
-    m[2][3] =-1;
-    m[3][2] = D;
-    m[3][3] = 0;
+TMat44<T> TMat44<T>::frustum(T left, T right, T bottom, T top, T near, T far) {
+    TMat44<T> m;
+    m[0][0] =  (2 * near) / (right - left);
+    m[1][1] =  (2 * near) / (top   - bottom);
+    m[2][0] =  (right + left)   / (right - left);
+    m[2][1] =  (top   + bottom) / (top   - bottom);
+    m[2][2] = -(far   + near)   / (far   - near);
+    m[2][3] = -1;
+    m[3][2] = -(2 * far * near) / (far   - near);
+    m[3][3] =  0;
     return m;
 }
 
 template <typename T>
-template <typename A, typename B, typename C>
-tmat44<T> tmat44<T>::lookAt(const tvec3<A>& eye, const tvec3<B>& center, const tvec3<C>& up) {
-    tvec3<T> L(normalize(center - eye));
-    tvec3<T> S(normalize( cross(L, up) ));
-    tvec3<T> U(cross(S, L));
-    return tmat44<T>(
-            tvec4<T>( S, 0),
-            tvec4<T>( U, 0),
-            tvec4<T>(-L, 0),
-            tvec4<T>(-eye, 1));
-}
+TMat44<T> TMat44<T>::perspective(T fov, T aspect, T near, T far, TMat44::Fov direction) {
+    T h;
+    T w;
 
-template <typename T>
-template <typename A>
-tmat44<T> tmat44<T>::translate(const tvec4<A>& t) {
-    tmat44<T> r;
-    r[3] = t;
-    return r;
-}
-
-template <typename T>
-template <typename A>
-tmat44<T> tmat44<T>::scale(const tvec4<A>& s) {
-    tmat44<T> r;
-    r[0][0] = s[0];
-    r[1][1] = s[1];
-    r[2][2] = s[2];
-    r[3][3] = s[3];
-    return r;
-}
-
-template <typename T>
-template <typename A, typename B>
-tmat44<T> tmat44<T>::rotate(A radian, const tvec3<B>& about) {
-    tmat44<T> rotation;
-    T* r = const_cast<T*>(rotation.asArray());
-    T c = cos(radian);
-    T s = sin(radian);
-    if (about.x==1 && about.y==0 && about.z==0) {
-        r[5] = c;   r[10]= c;
-        r[6] = s;   r[9] = -s;
-    } else if (about.x==0 && about.y==1 && about.z==0) {
-        r[0] = c;   r[10]= c;
-        r[8] = s;   r[2] = -s;
-    } else if (about.x==0 && about.y==0 && about.z==1) {
-        r[0] = c;   r[5] = c;
-        r[1] = s;   r[4] = -s;
+    if (direction == TMat44::Fov::VERTICAL) {
+        h = std::tan(fov * M_PI / 360.0f) * near;
+        w = h * aspect;
     } else {
-        tvec3<B> nabout = normalize(about);
-        B x = nabout.x;
-        B y = nabout.y;
-        B z = nabout.z;
-        T nc = 1 - c;
-        T xy = x * y;
-        T yz = y * z;
-        T zx = z * x;
-        T xs = x * s;
-        T ys = y * s;
-        T zs = z * s;
-        r[ 0] = x*x*nc +  c;    r[ 4] =  xy*nc - zs;    r[ 8] =  zx*nc + ys;
-        r[ 1] =  xy*nc + zs;    r[ 5] = y*y*nc +  c;    r[ 9] =  yz*nc - xs;
-        r[ 2] =  zx*nc - ys;    r[ 6] =  yz*nc + xs;    r[10] = z*z*nc +  c;
+        w = std::tan(fov * M_PI / 360.0f) * near;
+        h = w / aspect;
     }
-    return rotation;
+    return frustum(-w, w, -h, h, near, far);
+}
+
+/*
+ * Returns a matrix representing the pose of a virtual camera looking towards -Z in its
+ * local Y-up coordinate system. "eye" is where the camera is located, "center" is the points its
+ * looking at and "up" defines where the Y axis of the camera's local coordinate system is.
+ */
+template <typename T>
+template <typename A, typename B, typename C>
+TMat44<T> TMat44<T>::lookAt(const TVec3<A>& eye, const TVec3<B>& center, const TVec3<C>& up) {
+    TVec3<T> z_axis(normalize(center - eye));
+    TVec3<T> norm_up(normalize(up));
+    if (std::abs(dot(z_axis, norm_up)) > 0.999) {
+        // Fix up vector if we're degenerate (looking straight up, basically)
+        norm_up = { norm_up.z, norm_up.x, norm_up.y };
+    }
+    TVec3<T> x_axis(normalize(cross(z_axis, norm_up)));
+    TVec3<T> y_axis(cross(x_axis, z_axis));
+    return TMat44<T>(
+            TVec4<T>(x_axis, 0),
+            TVec4<T>(y_axis, 0),
+            TVec4<T>(-z_axis, 0),
+            TVec4<T>(eye, 1));
 }
 
 // ----------------------------------------------------------------------------------------
@@ -337,40 +511,46 @@
  * it determines the output type (only relevant when T != U).
  */
 
-// matrix * vector, result is a vector of the same type than the input vector
+// matrix * column-vector, result is a vector of the same type than the input vector
 template <typename T, typename U>
-typename tmat44<U>::col_type PURE operator *(const tmat44<T>& lv, const tvec4<U>& rv) {
-    typename tmat44<U>::col_type result;
-    for (size_t r=0 ; r<tmat44<T>::row_size() ; r++)
-        result += rv[r]*lv[r];
+typename TMat44<T>::col_type PURE operator *(const TMat44<T>& lhs, const TVec4<U>& rhs) {
+    // Result is initialized to zero.
+    typename TMat44<T>::col_type result;
+    for (size_t col = 0; col < TMat44<T>::NUM_COLS; ++col) {
+        result += lhs[col] * rhs[col];
+    }
     return result;
 }
 
-// vector * matrix, result is a vector of the same type than the input vector
+// mat44 * vec3, result is vec3( mat44 * {vec3, 1} )
 template <typename T, typename U>
-typename tmat44<U>::row_type PURE operator *(const tvec4<U>& rv, const tmat44<T>& lv) {
-    typename tmat44<U>::row_type result(tmat44<U>::row_type::NO_INIT);
-    for (size_t r=0 ; r<tmat44<T>::row_size() ; r++)
-        result[r] = dot(rv, lv[r]);
+typename TMat44<T>::col_type PURE operator *(const TMat44<T>& lhs, const TVec3<U>& rhs) {
+    return lhs * TVec4<U>{ rhs, 1 };
+}
+
+
+// row-vector * matrix, result is a vector of the same type than the input vector
+template <typename T, typename U>
+typename TMat44<U>::row_type PURE operator *(const TVec4<U>& lhs, const TMat44<T>& rhs) {
+    typename TMat44<U>::row_type result(TMat44<U>::row_type::NO_INIT);
+    for (size_t col = 0; col < TMat44<T>::NUM_COLS; ++col) {
+        result[col] = dot(lhs, rhs[col]);
+    }
     return result;
 }
 
 // matrix * scalar, result is a matrix of the same type than the input matrix
 template <typename T, typename U>
-tmat44<T> PURE operator *(const tmat44<T>& lv, U rv) {
-    tmat44<T> result(tmat44<T>::NO_INIT);
-    for (size_t r=0 ; r<tmat44<T>::row_size() ; r++)
-        result[r] = lv[r]*rv;
-    return result;
+constexpr typename std::enable_if<std::is_arithmetic<U>::value, TMat44<T>>::type PURE
+operator *(TMat44<T> lhs, U rhs) {
+    return lhs *= rhs;
 }
 
 // scalar * matrix, result is a matrix of the same type than the input matrix
 template <typename T, typename U>
-tmat44<T> PURE operator *(U rv, const tmat44<T>& lv) {
-    tmat44<T> result(tmat44<T>::NO_INIT);
-    for (size_t r=0 ; r<tmat44<T>::row_size() ; r++)
-        result[r] = lv[r]*rv;
-    return result;
+constexpr typename std::enable_if<std::is_arithmetic<U>::value, TMat44<T>>::type PURE
+operator *(U lhs, const TMat44<T>& rhs) {
+    return rhs * lhs;
 }
 
 // ----------------------------------------------------------------------------------------
@@ -379,17 +559,21 @@
  * BASE<T>::col_type is not accessible from there (???)
  */
 template<typename T>
-typename tmat44<T>::col_type PURE diag(const tmat44<T>& m) {
+typename TMat44<T>::col_type PURE diag(const TMat44<T>& m) {
     return matrix::diag(m);
 }
 
-// ----------------------------------------------------------------------------------------
-
-typedef tmat44<float> mat4;
+} // namespace details
 
 // ----------------------------------------------------------------------------------------
-}; // namespace android
+
+typedef details::TMat44<double> mat4d;
+typedef details::TMat44<float> mat4;
+typedef details::TMat44<float> mat4f;
+
+// ----------------------------------------------------------------------------------------
+}  // namespace android
 
 #undef PURE
 
-#endif /* UI_MAT4_H */
+#endif  // UI_MAT4_H_
diff --git a/include/ui/quat.h b/include/ui/quat.h
new file mode 100644
index 0000000..8c89cd7
--- /dev/null
+++ b/include/ui/quat.h
@@ -0,0 +1,189 @@
+/*
+ * Copyright 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef UI_QUAT_H_
+#define UI_QUAT_H_
+
+#include <ui/half.h>
+#include <ui/TQuatHelpers.h>
+#include <ui/vec3.h>
+#include <ui/vec4.h>
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#ifndef PURE
+#define PURE __attribute__((pure))
+#endif
+
+namespace android {
+// -------------------------------------------------------------------------------------
+
+namespace details {
+
+template <typename T>
+class TQuaternion : public TVecAddOperators<TQuaternion, T>,
+                    public TVecUnaryOperators<TQuaternion, T>,
+                    public TVecComparisonOperators<TQuaternion, T>,
+                    public TQuatProductOperators<TQuaternion, T>,
+                    public TQuatFunctions<TQuaternion, T>,
+                    public TQuatDebug<TQuaternion, T> {
+public:
+    enum no_init { NO_INIT };
+    typedef T value_type;
+    typedef T& reference;
+    typedef T const& const_reference;
+    typedef size_t size_type;
+
+    /*
+     * quaternion internals stored as:
+     *
+     * q = w + xi + yj + zk
+     *
+     *  q[0] = x;
+     *  q[1] = y;
+     *  q[2] = z;
+     *  q[3] = w;
+     *
+     */
+    union {
+        struct { T x, y, z, w; };
+        TVec4<T> xyzw;
+        TVec3<T> xyz;
+        TVec2<T> xy;
+    };
+
+    enum { SIZE = 4 };
+    inline constexpr static size_type size() { return SIZE; }
+
+    // array access
+    inline constexpr T const& operator[](size_t i) const {
+#if __cplusplus >= 201402L
+        // only possible in C++0x14 with constexpr
+        assert(i < SIZE);
+#endif
+        return (&x)[i];
+    }
+
+    inline T& operator[](size_t i) {
+        assert(i < SIZE);
+        return (&x)[i];
+    }
+
+    // -----------------------------------------------------------------------
+    // we want the compiler generated versions for these...
+    TQuaternion(const TQuaternion&) = default;
+    ~TQuaternion() = default;
+    TQuaternion& operator = (const TQuaternion&) = default;
+
+    // constructors
+
+    // leaves object uninitialized. use with caution.
+    explicit
+    constexpr TQuaternion(no_init) : xyzw(TVec4<T>::NO_INIT) {}
+
+    // default constructor. sets all values to zero.
+    constexpr TQuaternion() : x(0), y(0), z(0), w(0) { }
+
+    // handles implicit conversion to a tvec4. must not be explicit.
+    template<typename A>
+    constexpr TQuaternion(A w) : x(0), y(0), z(0), w(w) {
+        static_assert(std::is_arithmetic<A>::value, "requires arithmetic type");
+    }
+
+    // initialize from 4 values to w + xi + yj + zk
+    template<typename A, typename B, typename C, typename D>
+    constexpr TQuaternion(A w, B x, C y, D z) : x(x), y(y), z(z), w(w) { }
+
+    // initialize from a vec3 + a value to : v.xi + v.yj + v.zk + w
+    template<typename A, typename B>
+    constexpr TQuaternion(const TVec3<A>& v, B w) : x(v.x), y(v.y), z(v.z), w(w) { }
+
+    // initialize from a double4
+    template<typename A>
+    constexpr explicit TQuaternion(const TVec4<A>& v) : x(v.x), y(v.y), z(v.z), w(v.w) { }
+
+    // initialize from a quaternion of a different type
+    template<typename A>
+    constexpr explicit TQuaternion(const TQuaternion<A>& v) : x(v.x), y(v.y), z(v.z), w(v.w) { }
+
+    // conjugate operator
+    constexpr TQuaternion operator~() const {
+        return conj(*this);
+    }
+
+    // constructs a quaternion from an axis and angle
+    template <typename A, typename B>
+    constexpr static TQuaternion PURE fromAxisAngle(const TVec3<A>& axis, B angle) {
+        return TQuaternion(std::sin(angle*0.5) * normalize(axis), std::cos(angle*0.5));
+    }
+};
+
+}  // namespace details
+
+// ----------------------------------------------------------------------------------------
+
+typedef details::TQuaternion<double> quatd;
+typedef details::TQuaternion<float> quat;
+typedef details::TQuaternion<float> quatf;
+typedef details::TQuaternion<half> quath;
+
+constexpr inline quat operator"" _i(long double v) {
+    return quat(0, v, 0, 0);
+}
+constexpr inline quat operator"" _j(long double v) {
+    return quat(0, 0, v, 0);
+}
+constexpr inline quat operator"" _k(long double v) {
+    return quat(0, 0, 0, v);
+}
+
+constexpr inline quat operator"" _i(unsigned long long v) {  // NOLINT
+    return quat(0, v, 0, 0);
+}
+constexpr inline quat operator"" _j(unsigned long long v) {  // NOLINT
+    return quat(0, 0, v, 0);
+}
+constexpr inline quat operator"" _k(unsigned long long v) {  // NOLINT
+    return quat(0, 0, 0, v);
+}
+
+constexpr inline quatd operator"" _id(long double v) {
+    return quatd(0, v, 0, 0);
+}
+constexpr inline quatd operator"" _jd(long double v) {
+    return quatd(0, 0, v, 0);
+}
+constexpr inline quatd operator"" _kd(long double v) {
+    return quatd(0, 0, 0, v);
+}
+
+constexpr inline quatd operator"" _id(unsigned long long v) {  // NOLINT
+    return quatd(0, v, 0, 0);
+}
+constexpr inline quatd operator"" _jd(unsigned long long v) {  // NOLINT
+    return quatd(0, 0, v, 0);
+}
+constexpr inline quatd operator"" _kd(unsigned long long v) {  // NOLINT
+    return quatd(0, 0, 0, v);
+}
+
+// ----------------------------------------------------------------------------------------
+}  // namespace android
+
+#undef PURE
+
+#endif  // UI_QUAT_H_
diff --git a/include/ui/scalar.h b/include/ui/scalar.h
new file mode 100644
index 0000000..c938d5c
--- /dev/null
+++ b/include/ui/scalar.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef UI_SCALAR_H
+#define UI_SCALAR_H
+
+#include <algorithm>
+#include <cmath>
+
+namespace android {
+
+template<typename T>
+static constexpr T saturate(T v) noexcept {
+    return T(std::min(T(1), std::max(T(0), v)));
+}
+
+template<typename T>
+static constexpr T clamp(T v, T min, T max) noexcept {
+    return T(std::min(max, std::max(min, v)));
+}
+
+template<typename T>
+static constexpr T mix(T x, T y, T a) noexcept {
+    return x * (T(1) - a) + y * a;
+}
+
+template<typename T>
+static constexpr T lerp(T x, T y, T a) noexcept {
+    return mix(x, y, a);
+}
+
+namespace details {
+    static int asInt(float x) {
+        return *reinterpret_cast<int*>(&x);
+    }
+
+    static float asFloat(int x) {
+        return *reinterpret_cast<float*>(&x);
+    }
+
+    static constexpr float inversesqrtNewtonRaphson(float x, float inverseSqrtX) {
+        return inverseSqrtX * (-x * 0.5f * (inverseSqrtX * inverseSqrtX) + 1.5f);
+    }
+
+    static constexpr float rcpNewtonRaphson(float x, float rcpX) {
+        return rcpX * (-rcpX * x + 2.0f);
+    }
+
+    static const float inverseSqrtFast(float f, int c) {
+        int v = details::asInt(f);
+        v = c - (v >> 1);
+        return details::asFloat(v);
+    }
+
+    static const float rcpFast(float f, int c) {
+        int v = details::asInt(f);
+        v = c - v;
+        return details::asFloat(v);
+    }
+} // namespace details
+
+/**
+ * Approximates an inverse square root using a specified
+ * number of Newton-Raphson iterations. The number of iterations
+ * can be:
+ *
+ * - 0, with a precision of ~3.4% over the full range
+ * - 1, with a precision of ~0.2% over the full range
+ * - 2, with a precision of ~4e-4% over the full range
+ */
+template<int>
+static float inversesqrtFast(float f) noexcept;
+
+template<>
+float inversesqrtFast<0>(float f) noexcept {
+    return details::inverseSqrtFast(f, 0x5f3759df);
+}
+
+template<>
+float inversesqrtFast<1>(float f) noexcept {
+    float x = details::inverseSqrtFast(f, 0x5f375a86);
+    return details::inversesqrtNewtonRaphson(f, x);
+}
+
+template<>
+float inversesqrtFast<2>(float f) noexcept {
+    float x = details::inverseSqrtFast(f, 0x5f375a86);
+    x = details::inversesqrtNewtonRaphson(f, x);
+    x = details::inversesqrtNewtonRaphson(f, x);
+    return x;
+}
+
+/**
+ * Approximates a square root using a specified number of
+ * Newton-Raphson iterations. The number of iterations can be:
+ *
+ * - 0, with a precision of ~0.7% over the full range
+ * - 1, with a precision of ~0.2% over the full range
+ * - 2, with a precision of ~4e-4% over the full range
+ */
+template<int>
+static float sqrtFast(float f) noexcept;
+
+template<>
+float sqrtFast<0>(float f) noexcept {
+    int v = details::asInt(f);
+    v = 0x1fbd1df5 + (v >> 1);
+    return details::asFloat(v);
+}
+
+template<>
+float sqrtFast<1>(float f) noexcept {
+    return f * inversesqrtFast<1>(f);
+}
+
+template<>
+float sqrtFast<2>(float f) noexcept {
+    return f * inversesqrtFast<2>(f);
+}
+
+/**
+ * Approximates a reciprocal using a specified number
+ * of Newton-Raphson iterations. The number of iterations
+ * can be:
+ *
+ * - 0, with a precision of ~0.4% over the full range
+ * - 1, with a precision of ~0.02% over the full range
+ * - 2, with a precision of ~5e-5% over the full range
+ */
+template<int>
+static float rcpFast(float f) noexcept;
+
+template<>
+float rcpFast<0>(float f) noexcept {
+    return details::rcpFast(f, 0x7ef311c2);
+}
+
+template<>
+float rcpFast<1>(float f) noexcept {
+    float x = details::rcpFast(f, 0x7ef311c3);
+    return details::rcpNewtonRaphson(f, x);
+}
+
+template<>
+float rcpFast<2>(float f) noexcept {
+    float x = details::rcpFast(f, 0x7ef312ac);
+    x = details::rcpNewtonRaphson(f, x);
+    x = details::rcpNewtonRaphson(f, x);
+    return x;
+}
+
+} // namespace std
+
+#endif // UI_SCALAR_H
diff --git a/include/ui/vec2.h b/include/ui/vec2.h
index c31d0e4..a88d026 100644
--- a/include/ui/vec2.h
+++ b/include/ui/vec2.h
@@ -14,25 +14,29 @@
  * limitations under the License.
  */
 
-#ifndef UI_VEC2_H
-#define UI_VEC2_H
+#ifndef UI_VEC2_H_
+#define UI_VEC2_H_
 
+#include <ui/TVecHelpers.h>
+#include <ui/half.h>
+#include <assert.h>
 #include <stdint.h>
 #include <sys/types.h>
+#include <type_traits>
 
-#define TVEC_IMPLEMENTATION
-#include <ui/TVecHelpers.h>
 
 namespace android {
 // -------------------------------------------------------------------------------------
 
+namespace details {
+
 template <typename T>
-class tvec2 :   public TVecProductOperators<tvec2, T>,
-                public TVecAddOperators<tvec2, T>,
-                public TVecUnaryOperators<tvec2, T>,
-                public TVecComparisonOperators<tvec2, T>,
-                public TVecFunctions<tvec2, T>
-{
+class TVec2 :   public TVecProductOperators<TVec2, T>,
+                public TVecAddOperators<TVec2, T>,
+                public TVecUnaryOperators<TVec2, T>,
+                public TVecComparisonOperators<TVec2, T>,
+                public TVecFunctions<TVec2, T>,
+                public TVecDebug<TVec2, T> {
 public:
     enum no_init { NO_INIT };
     typedef T value_type;
@@ -46,46 +50,73 @@
         struct { T r, g; };
     };
 
-    enum { SIZE = 2 };
-    inline static size_type size() { return SIZE; }
+    static constexpr size_t SIZE = 2;
+    inline constexpr size_type size() const { return SIZE; }
 
     // array access
-    inline T const& operator [] (size_t i) const { return (&x)[i]; }
-    inline T&       operator [] (size_t i)       { return (&x)[i]; }
+    inline constexpr T const& operator[](size_t i) const {
+#if __cplusplus >= 201402L
+        // only possible in C++0x14 with constexpr
+        assert(i < SIZE);
+#endif
+        return (&x)[i];
+    }
+
+    inline T& operator[](size_t i) {
+        assert(i < SIZE);
+        return (&x)[i];
+    }
 
     // -----------------------------------------------------------------------
-    // we don't provide copy-ctor and operator= on purpose
-    // because we want the compiler generated versions
+    // we want the compiler generated versions for these...
+    TVec2(const TVec2&) = default;
+    ~TVec2() = default;
+    TVec2& operator = (const TVec2&) = default;
 
     // constructors
 
     // leaves object uninitialized. use with caution.
-    explicit tvec2(no_init) { }
+    explicit
+    constexpr TVec2(no_init) { }
 
     // default constructor
-    tvec2() : x(0), y(0) { }
+    constexpr TVec2() : x(0), y(0) { }
 
     // handles implicit conversion to a tvec4. must not be explicit.
-    template<typename A>
-    tvec2(A v) : x(v), y(v) { }
+    template<typename A, typename = typename std::enable_if<std::is_arithmetic<A>::value >::type>
+    constexpr TVec2(A v) : x(v), y(v) { }
 
     template<typename A, typename B>
-    tvec2(A x, B y) : x(x), y(y) { }
+    constexpr TVec2(A x, B y) : x(x), y(y) { }
 
     template<typename A>
-    explicit tvec2(const tvec2<A>& v) : x(v.x), y(v.y) { }
+    explicit
+    constexpr TVec2(const TVec2<A>& v) : x(v.x), y(v.y) { }
 
-    template<typename A>
-    tvec2(const Impersonator< tvec2<A> >& v)
-        : x(((const tvec2<A>&)v).x),
-          y(((const tvec2<A>&)v).y) { }
+    // cross product works only on vectors of size 2 or 3
+    template <typename RT>
+    friend inline
+    constexpr value_type cross(const TVec2& u, const TVec2<RT>& v) {
+        return value_type(u.x*v.y - u.y*v.x);
+    }
 };
 
-// ----------------------------------------------------------------------------------------
-
-typedef tvec2<float> vec2;
+}  // namespace details
 
 // ----------------------------------------------------------------------------------------
-}; // namespace android
 
-#endif /* UI_VEC4_H */
+typedef details::TVec2<double> double2;
+typedef details::TVec2<float> float2;
+typedef details::TVec2<float> vec2;
+typedef details::TVec2<half> half2;
+typedef details::TVec2<int32_t> int2;
+typedef details::TVec2<uint32_t> uint2;
+typedef details::TVec2<int16_t> short2;
+typedef details::TVec2<uint16_t> ushort2;
+typedef details::TVec2<int8_t> byte2;
+typedef details::TVec2<uint8_t> ubyte2;
+
+// ----------------------------------------------------------------------------------------
+}  // namespace android
+
+#endif  // UI_VEC2_H_
diff --git a/include/ui/vec3.h b/include/ui/vec3.h
index dde59a9..0254f5a 100644
--- a/include/ui/vec3.h
+++ b/include/ui/vec3.h
@@ -14,24 +14,27 @@
  * limitations under the License.
  */
 
-#ifndef UI_VEC3_H
-#define UI_VEC3_H
+#ifndef UI_VEC3_H_
+#define UI_VEC3_H_
 
+#include <ui/vec2.h>
+#include <ui/half.h>
 #include <stdint.h>
 #include <sys/types.h>
 
-#include <ui/vec2.h>
 
 namespace android {
 // -------------------------------------------------------------------------------------
 
+namespace details {
+
 template <typename T>
-class tvec3 :   public TVecProductOperators<tvec3, T>,
-                public TVecAddOperators<tvec3, T>,
-                public TVecUnaryOperators<tvec3, T>,
-                public TVecComparisonOperators<tvec3, T>,
-                public TVecFunctions<tvec3, T>
-{
+class TVec3 :   public TVecProductOperators<TVec3, T>,
+                public TVecAddOperators<TVec3, T>,
+                public TVecUnaryOperators<TVec3, T>,
+                public TVecComparisonOperators<TVec3, T>,
+                public TVecFunctions<TVec3, T>,
+                public TVecDebug<TVec3, T> {
 public:
     enum no_init { NO_INIT };
     typedef T value_type;
@@ -43,71 +46,83 @@
         struct { T x, y, z; };
         struct { T s, t, p; };
         struct { T r, g, b; };
-        Impersonator< tvec2<T> > xy;
-        Impersonator< tvec2<T> > st;
-        Impersonator< tvec2<T> > rg;
+        TVec2<T> xy;
+        TVec2<T> st;
+        TVec2<T> rg;
     };
 
-    enum { SIZE = 3 };
-    inline static size_type size() { return SIZE; }
+    static constexpr size_t SIZE = 3;
+    inline constexpr size_type size() const { return SIZE; }
 
     // array access
-    inline T const& operator [] (size_t i) const { return (&x)[i]; }
-    inline T&       operator [] (size_t i)       { return (&x)[i]; }
+    inline constexpr T const& operator[](size_t i) const {
+#if __cplusplus >= 201402L
+        // only possible in C++0x14 with constexpr
+        assert(i < SIZE);
+#endif
+        return (&x)[i];
+    }
+
+    inline T& operator[](size_t i) {
+        assert(i < SIZE);
+        return (&x)[i];
+    }
 
     // -----------------------------------------------------------------------
-    // we don't provide copy-ctor and operator= on purpose
-    // because we want the compiler generated versions
+    // we want the compiler generated versions for these...
+    TVec3(const TVec3&) = default;
+    ~TVec3() = default;
+    TVec3& operator = (const TVec3&) = default;
 
     // constructors
     // leaves object uninitialized. use with caution.
-    explicit tvec3(no_init) { }
+    explicit
+    constexpr TVec3(no_init) { }
 
     // default constructor
-    tvec3() : x(0), y(0), z(0) { }
+    constexpr TVec3() : x(0), y(0), z(0) { }
 
     // handles implicit conversion to a tvec4. must not be explicit.
-    template<typename A>
-    tvec3(A v) : x(v), y(v), z(v) { }
+    template<typename A, typename = typename std::enable_if<std::is_arithmetic<A>::value >::type>
+    constexpr TVec3(A v) : x(v), y(v), z(v) { }
 
     template<typename A, typename B, typename C>
-    tvec3(A x, B y, C z) : x(x), y(y), z(z) { }
+    constexpr TVec3(A x, B y, C z) : x(x), y(y), z(z) { }
 
     template<typename A, typename B>
-    tvec3(const tvec2<A>& v, B z) : x(v.x), y(v.y), z(z) { }
+    constexpr TVec3(const TVec2<A>& v, B z) : x(v.x), y(v.y), z(z) { }
 
     template<typename A>
-    explicit tvec3(const tvec3<A>& v) : x(v.x), y(v.y), z(v.z) { }
-
-    template<typename A>
-    tvec3(const Impersonator< tvec3<A> >& v)
-        : x(((const tvec3<A>&)v).x),
-          y(((const tvec3<A>&)v).y),
-          z(((const tvec3<A>&)v).z) { }
-
-    template<typename A, typename B>
-    tvec3(const Impersonator< tvec2<A> >& v, B z)
-        : x(((const tvec2<A>&)v).x),
-          y(((const tvec2<A>&)v).y),
-          z(z) { }
+    explicit
+    constexpr TVec3(const TVec3<A>& v) : x(v.x), y(v.y), z(v.z) { }
 
     // cross product works only on vectors of size 3
     template <typename RT>
     friend inline
-    tvec3 __attribute__((pure)) cross(const tvec3& u, const tvec3<RT>& v) {
-        return tvec3(
+    constexpr TVec3 cross(const TVec3& u, const TVec3<RT>& v) {
+        return TVec3(
                 u.y*v.z - u.z*v.y,
                 u.z*v.x - u.x*v.z,
                 u.x*v.y - u.y*v.x);
     }
 };
 
+}  // namespace details
 
 // ----------------------------------------------------------------------------------------
 
-typedef tvec3<float> vec3;
+typedef details::TVec3<double> double3;
+typedef details::TVec3<float> float3;
+typedef details::TVec3<float> vec3;
+typedef details::TVec3<half> half3;
+typedef details::TVec3<int32_t> int3;
+typedef details::TVec3<uint32_t> uint3;
+typedef details::TVec3<int16_t> short3;
+typedef details::TVec3<uint16_t> ushort3;
+typedef details::TVec3<int8_t> byte3;
+typedef details::TVec3<uint8_t> ubyte3;
 
 // ----------------------------------------------------------------------------------------
-}; // namespace android
+}  // namespace android
 
-#endif /* UI_VEC4_H */
+#endif  // UI_VEC3_H_
diff --git a/include/ui/vec4.h b/include/ui/vec4.h
index e03d331..1281aa4 100644
--- a/include/ui/vec4.h
+++ b/include/ui/vec4.h
@@ -14,24 +14,27 @@
  * limitations under the License.
  */
 
-#ifndef UI_VEC4_H
-#define UI_VEC4_H
+#ifndef UI_VEC4_H_
+#define UI_VEC4_H_
 
+#include <ui/vec3.h>
+#include <ui/half.h>
 #include <stdint.h>
 #include <sys/types.h>
 
-#include <ui/vec3.h>
 
 namespace android {
 // -------------------------------------------------------------------------------------
 
+namespace details {
+
 template <typename T>
-class tvec4 :   public TVecProductOperators<tvec4, T>,
-                public TVecAddOperators<tvec4, T>,
-                public TVecUnaryOperators<tvec4, T>,
-                public TVecComparisonOperators<tvec4, T>,
-                public TVecFunctions<tvec4, T>
-{
+class  TVec4 :  public TVecProductOperators<TVec4, T>,
+                public TVecAddOperators<TVec4, T>,
+                public TVecUnaryOperators<TVec4, T>,
+                public TVecComparisonOperators<TVec4, T>,
+                public TVecFunctions<TVec4, T>,
+                public TVecDebug<TVec4, T> {
 public:
     enum no_init { NO_INIT };
     typedef T value_type;
@@ -43,76 +46,80 @@
         struct { T x, y, z, w; };
         struct { T s, t, p, q; };
         struct { T r, g, b, a; };
-        Impersonator< tvec2<T> > xy;
-        Impersonator< tvec2<T> > st;
-        Impersonator< tvec2<T> > rg;
-        Impersonator< tvec3<T> > xyz;
-        Impersonator< tvec3<T> > stp;
-        Impersonator< tvec3<T> > rgb;
+        TVec2<T> xy;
+        TVec2<T> st;
+        TVec2<T> rg;
+        TVec3<T> xyz;
+        TVec3<T> stp;
+        TVec3<T> rgb;
     };
 
-    enum { SIZE = 4 };
-    inline static size_type size() { return SIZE; }
+    static constexpr size_t SIZE = 4;
+    inline constexpr size_type size() const { return SIZE; }
 
     // array access
-    inline T const& operator [] (size_t i) const { return (&x)[i]; }
-    inline T&       operator [] (size_t i)       { return (&x)[i]; }
+    inline constexpr T const& operator[](size_t i) const {
+#if __cplusplus >= 201402L
+        // only possible in C++0x14 with constexpr
+        assert(i < SIZE);
+#endif
+        return (&x)[i];
+    }
+
+    inline T& operator[](size_t i) {
+        assert(i < SIZE);
+        return (&x)[i];
+    }
 
     // -----------------------------------------------------------------------
-    // we don't provide copy-ctor and operator= on purpose
-    // because we want the compiler generated versions
+    // we want the compiler generated versions for these...
+    TVec4(const TVec4&) = default;
+    ~TVec4() = default;
+    TVec4& operator = (const TVec4&) = default;
 
     // constructors
 
     // leaves object uninitialized. use with caution.
-    explicit tvec4(no_init) { }
+    explicit
+    constexpr TVec4(no_init) { }
 
     // default constructor
-    tvec4() : x(0), y(0), z(0), w(0) { }
+    constexpr TVec4() : x(0), y(0), z(0), w(0) { }
 
     // handles implicit conversion to a tvec4. must not be explicit.
-    template<typename A>
-    tvec4(A v) : x(v), y(v), z(v), w(v) { }
+    template<typename A, typename = typename std::enable_if<std::is_arithmetic<A>::value >::type>
+    constexpr TVec4(A v) : x(v), y(v), z(v), w(v) { }
 
     template<typename A, typename B, typename C, typename D>
-    tvec4(A x, B y, C z, D w) : x(x), y(y), z(z), w(w) { }
+    constexpr TVec4(A x, B y, C z, D w) : x(x), y(y), z(z), w(w) { }
 
     template<typename A, typename B, typename C>
-    tvec4(const tvec2<A>& v, B z, C w) : x(v.x), y(v.y), z(z), w(w) { }
+    constexpr TVec4(const TVec2<A>& v, B z, C w) : x(v.x), y(v.y), z(z), w(w) { }
 
     template<typename A, typename B>
-    tvec4(const tvec3<A>& v, B w) : x(v.x), y(v.y), z(v.z), w(w) { }
+    constexpr TVec4(const TVec3<A>& v, B w) : x(v.x), y(v.y), z(v.z), w(w) { }
 
     template<typename A>
-    explicit tvec4(const tvec4<A>& v) : x(v.x), y(v.y), z(v.z), w(v.w) { }
-
-    template<typename A>
-    tvec4(const Impersonator< tvec4<A> >& v)
-        : x(((const tvec4<A>&)v).x),
-          y(((const tvec4<A>&)v).y),
-          z(((const tvec4<A>&)v).z),
-          w(((const tvec4<A>&)v).w) { }
-
-    template<typename A, typename B>
-    tvec4(const Impersonator< tvec3<A> >& v, B w)
-        : x(((const tvec3<A>&)v).x),
-          y(((const tvec3<A>&)v).y),
-          z(((const tvec3<A>&)v).z),
-          w(w) { }
-
-    template<typename A, typename B, typename C>
-    tvec4(const Impersonator< tvec2<A> >& v, B z, C w)
-        : x(((const tvec2<A>&)v).x),
-          y(((const tvec2<A>&)v).y),
-          z(z),
-          w(w) { }
+    explicit
+    constexpr TVec4(const TVec4<A>& v) : x(v.x), y(v.y), z(v.z), w(v.w) { }
 };
 
-// ----------------------------------------------------------------------------------------
-
-typedef tvec4<float> vec4;
+}  // namespace details
 
 // ----------------------------------------------------------------------------------------
-}; // namespace android
 
-#endif /* UI_VEC4_H */
+typedef details::TVec4<double> double4;
+typedef details::TVec4<float> float4;
+typedef details::TVec4<float> vec4;
+typedef details::TVec4<half> half4;
+typedef details::TVec4<int32_t> int4;
+typedef details::TVec4<uint32_t> uint4;
+typedef details::TVec4<int16_t> short4;
+typedef details::TVec4<uint16_t> ushort4;
+typedef details::TVec4<int8_t> byte4;
+typedef details::TVec4<uint8_t> ubyte4;
+
+// ----------------------------------------------------------------------------------------
+}  // namespace android
+
+#endif  // UI_VEC4_H_
diff --git a/libs/ui/tests/Android.bp b/libs/ui/tests/Android.bp
index 8cdab8c..9594466 100644
--- a/libs/ui/tests/Android.bp
+++ b/libs/ui/tests/Android.bp
@@ -29,3 +29,13 @@
     name: "mat_test",
     srcs: ["mat_test.cpp"],
 }
+
+cc_test {
+    name: "half_test",
+    srcs: ["half_test.cpp"],
+}
+
+cc_test {
+    name: "quat_test",
+    srcs: ["quat_test.cpp"],
+}
diff --git a/libs/ui/tests/half_test.cpp b/libs/ui/tests/half_test.cpp
new file mode 100644
index 0000000..0ea9265
--- /dev/null
+++ b/libs/ui/tests/half_test.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "HalfTest"
+
+#include <math.h>
+#include <stdlib.h>
+
+#include <ui/half.h>
+#include <ui/vec4.h>
+
+#include <gtest/gtest.h>
+
+namespace android {
+
+class HalfTest : public testing::Test {
+protected:
+};
+
+TEST_F(HalfTest, Basics) {
+
+    EXPECT_EQ(2, sizeof(half));
+
+    // test +/- zero
+    EXPECT_EQ(0x0000, half( 0.0f).getBits());
+    EXPECT_EQ(0x8000, half(-0.0f).getBits());
+
+    // test nan
+    EXPECT_EQ(0x7e00, half(NAN).getBits());
+
+    // test +/- infinity
+    EXPECT_EQ(0x7C00, half( std::numeric_limits<float>::infinity()).getBits());
+    EXPECT_EQ(0xFC00, half(-std::numeric_limits<float>::infinity()).getBits());
+
+    // test a few known values
+    EXPECT_EQ(0x3C01, half(1.0009765625).getBits());
+    EXPECT_EQ(0xC000, half(-2).getBits());
+    EXPECT_EQ(0x0400, half(6.10352e-5).getBits());
+    EXPECT_EQ(0x7BFF, half(65504).getBits());
+    EXPECT_EQ(0x3555, half(1.0f/3).getBits());
+
+    // numeric limits
+    EXPECT_EQ(0x7C00, std::numeric_limits<half>::infinity().getBits());
+    EXPECT_EQ(0x0400, std::numeric_limits<half>::min().getBits());
+    EXPECT_EQ(0x7BFF, std::numeric_limits<half>::max().getBits());
+    EXPECT_EQ(0xFBFF, std::numeric_limits<half>::lowest().getBits());
+
+    // denormals (flushed to zero)
+    EXPECT_EQ(0x0000, half( 6.09756e-5).getBits());      // if handled, should be: 0x03FF
+    EXPECT_EQ(0x0000, half( 5.96046e-8).getBits());      // if handled, should be: 0x0001
+    EXPECT_EQ(0x8000, half(-6.09756e-5).getBits());      // if handled, should be: 0x83FF
+    EXPECT_EQ(0x8000, half(-5.96046e-8).getBits());      // if handled, should be: 0x8001
+
+    // test all exactly representable integers
+    for (int i=-2048 ; i<= 2048 ; ++i) {
+        half h = i;
+        EXPECT_EQ(i, float(h));
+    }
+}
+
+TEST_F(HalfTest, Literals) {
+    half one = 1.0_hf;
+    half pi = 3.1415926_hf;
+    half minusTwo = -2.0_hf;
+
+    EXPECT_EQ(half(1.0f), one);
+    EXPECT_EQ(half(3.1415926), pi);
+    EXPECT_EQ(half(-2.0f), minusTwo);
+}
+
+
+TEST_F(HalfTest, Vec) {
+    float4 f4(1,2,3,4);
+    half4 h4(f4);
+    half3 h3(f4.xyz);
+    half2 h2(f4.xy);
+
+    EXPECT_EQ(f4, h4);
+    EXPECT_EQ(f4.xyz, h3);
+    EXPECT_EQ(f4.xy, h2);
+}
+
+}; // namespace android
diff --git a/libs/ui/tests/mat_test.cpp b/libs/ui/tests/mat_test.cpp
index a2c63ac..0f8e631 100644
--- a/libs/ui/tests/mat_test.cpp
+++ b/libs/ui/tests/mat_test.cpp
@@ -14,13 +14,17 @@
  * limitations under the License.
  */
 
-#define LOG_TAG "RegionTest"
+#define LOG_TAG "MatTest"
 
 #include <stdlib.h>
-#include <ui/Region.h>
-#include <ui/Rect.h>
+
+#include <limits>
+#include <random>
+#include <functional>
+
 #include <gtest/gtest.h>
 
+#include <ui/mat2.h>
 #include <ui/mat4.h>
 
 namespace android {
@@ -99,11 +103,13 @@
     const mat4 identity;
     mat4 m0;
 
-    ++m0;
-    EXPECT_EQ(mat4( vec4(2,1,1,1), vec4(1,2,1,1), vec4(1,1,2,1), vec4(1,1,1,2) ), m0);
-    EXPECT_EQ(mat4( -vec4(2,1,1,1), -vec4(1,2,1,1), -vec4(1,1,2,1), -vec4(1,1,1,2) ), -m0);
+    m0 = -m0;
+    EXPECT_EQ(mat4(vec4(-1, 0,  0,  0),
+                   vec4(0, -1,  0,  0),
+                   vec4(0,  0, -1,  0),
+                   vec4(0,  0,  0, -1)), m0);
 
-    --m0;
+    m0 = -m0;
     EXPECT_EQ(identity, m0);
 }
 
@@ -112,19 +118,19 @@
     mat4 m0;
     EXPECT_EQ(4, trace(m0));
 
-    mat4 m1(vec4(1,2,3,4), vec4(5,6,7,8), vec4(9,10,11,12), vec4(13,14,15,16));
-    mat4 m2(vec4(1,5,9,13), vec4(2,6,10,14), vec4(3,7,11,15), vec4(4,8,12,16));
+    mat4 m1(vec4(1, 2, 3, 4), vec4(5, 6, 7, 8), vec4(9, 10, 11, 12), vec4(13, 14, 15, 16));
+    mat4 m2(vec4(1, 5, 9, 13), vec4(2, 6, 10, 14), vec4(3, 7, 11, 15), vec4(4, 8, 12, 16));
     EXPECT_EQ(m1, transpose(m2));
     EXPECT_EQ(m2, transpose(m1));
-    EXPECT_EQ(vec4(1,6,11,16), diag(m1));
+    EXPECT_EQ(vec4(1, 6, 11, 16), diag(m1));
 
     EXPECT_EQ(identity, inverse(identity));
 
-    mat4 m3(vec4(4,3,0,0), vec4(3,2,0,0), vec4(0,0,1,0), vec4(0,0,0,1));
+    mat4 m3(vec4(4, 3, 0, 0), vec4(3, 2, 0, 0), vec4(0, 0, 1, 0), vec4(0, 0, 0, 1));
     mat4 m3i(inverse(m3));
     EXPECT_FLOAT_EQ(-2, m3i[0][0]);
-    EXPECT_FLOAT_EQ( 3, m3i[0][1]);
-    EXPECT_FLOAT_EQ( 3, m3i[1][0]);
+    EXPECT_FLOAT_EQ(3,  m3i[0][1]);
+    EXPECT_FLOAT_EQ(3,  m3i[1][0]);
     EXPECT_FLOAT_EQ(-4, m3i[1][1]);
 
     mat4 m3ii(inverse(m3i));
@@ -134,6 +140,553 @@
     EXPECT_FLOAT_EQ(m3[1][1], m3ii[1][1]);
 
     EXPECT_EQ(m1, m1*identity);
+
+
+    for (size_t c=0 ; c<4 ; c++) {
+        for (size_t r=0 ; r<4 ; r++) {
+            EXPECT_FLOAT_EQ(m1[c][r], m1(r, c));
+        }
+    }
 }
 
+TEST_F(MatTest, ElementAccess) {
+    mat4 m(vec4(1, 2, 3, 4), vec4(5, 6, 7, 8), vec4(9, 10, 11, 12), vec4(13, 14, 15, 16));
+    for (size_t c=0 ; c<4 ; c++) {
+        for (size_t r=0 ; r<4 ; r++) {
+            EXPECT_FLOAT_EQ(m[c][r], m(r, c));
+        }
+    }
+
+    m(3,2) = 100;
+    EXPECT_FLOAT_EQ(m[2][3], 100);
+    EXPECT_FLOAT_EQ(m(3, 2), 100);
+}
+
+//------------------------------------------------------------------------------
+// MAT 3
+//------------------------------------------------------------------------------
+
+class Mat3Test : public testing::Test {
+protected:
+};
+
+TEST_F(Mat3Test, Basics) {
+    mat3 m0;
+    EXPECT_EQ(sizeof(mat3), sizeof(float)*9);
+}
+
+TEST_F(Mat3Test, ComparisonOps) {
+    mat3 m0;
+    mat3 m1(2);
+
+    EXPECT_TRUE(m0 == m0);
+    EXPECT_TRUE(m0 != m1);
+    EXPECT_FALSE(m0 != m0);
+    EXPECT_FALSE(m0 == m1);
+}
+
+TEST_F(Mat3Test, Constructors) {
+    mat3 m0;
+    ASSERT_EQ(m0[0].x, 1);
+    ASSERT_EQ(m0[0].y, 0);
+    ASSERT_EQ(m0[0].z, 0);
+    ASSERT_EQ(m0[1].x, 0);
+    ASSERT_EQ(m0[1].y, 1);
+    ASSERT_EQ(m0[1].z, 0);
+    ASSERT_EQ(m0[2].x, 0);
+    ASSERT_EQ(m0[2].y, 0);
+    ASSERT_EQ(m0[2].z, 1);
+
+    mat3 m1(2);
+    mat3 m2(vec3(2));
+    mat3 m3(m2);
+
+    EXPECT_EQ(m1, m2);
+    EXPECT_EQ(m2, m3);
+    EXPECT_EQ(m3, m1);
+}
+
+TEST_F(Mat3Test, ArithmeticOps) {
+    mat3 m0;
+    mat3 m1(2);
+    mat3 m2(vec3(2));
+
+    m1 += m2;
+    EXPECT_EQ(mat3(4), m1);
+
+    m2 -= m1;
+    EXPECT_EQ(mat3(-2), m2);
+
+    m1 *= 2;
+    EXPECT_EQ(mat3(8), m1);
+
+    m1 /= 2;
+    EXPECT_EQ(mat3(4), m1);
+
+    m0 = -m0;
+    EXPECT_EQ(mat3(-1), m0);
+}
+
+TEST_F(Mat3Test, UnaryOps) {
+    const mat3 identity;
+    mat3 m0;
+
+    m0 = -m0;
+    EXPECT_EQ(mat3(vec3(-1, 0,  0),
+                   vec3(0, -1,  0),
+                   vec3(0,  0, -1)), m0);
+
+    m0 = -m0;
+    EXPECT_EQ(identity, m0);
+}
+
+TEST_F(Mat3Test, MiscOps) {
+    const mat3 identity;
+    mat3 m0;
+    EXPECT_EQ(3, trace(m0));
+
+    mat3 m1(vec3(1, 2, 3), vec3(4, 5, 6), vec3(7, 8, 9));
+    mat3 m2(vec3(1, 4, 7), vec3(2, 5, 8), vec3(3, 6, 9));
+    EXPECT_EQ(m1, transpose(m2));
+    EXPECT_EQ(m2, transpose(m1));
+    EXPECT_EQ(vec3(1, 5, 9), diag(m1));
+
+    EXPECT_EQ(identity, inverse(identity));
+
+    mat3 m3(vec3(4, 3, 0), vec3(3, 2, 0), vec3(0, 0, 1));
+    mat3 m3i(inverse(m3));
+    EXPECT_FLOAT_EQ(-2, m3i[0][0]);
+    EXPECT_FLOAT_EQ(3,  m3i[0][1]);
+    EXPECT_FLOAT_EQ(3,  m3i[1][0]);
+    EXPECT_FLOAT_EQ(-4, m3i[1][1]);
+
+    mat3 m3ii(inverse(m3i));
+    EXPECT_FLOAT_EQ(m3[0][0], m3ii[0][0]);
+    EXPECT_FLOAT_EQ(m3[0][1], m3ii[0][1]);
+    EXPECT_FLOAT_EQ(m3[1][0], m3ii[1][0]);
+    EXPECT_FLOAT_EQ(m3[1][1], m3ii[1][1]);
+
+    EXPECT_EQ(m1, m1*identity);
+}
+
+//------------------------------------------------------------------------------
+// MAT 2
+//------------------------------------------------------------------------------
+
+class Mat2Test : public testing::Test {
+protected:
+};
+
+TEST_F(Mat2Test, Basics) {
+    mat2 m0;
+    EXPECT_EQ(sizeof(mat2), sizeof(float)*4);
+}
+
+TEST_F(Mat2Test, ComparisonOps) {
+    mat2 m0;
+    mat2 m1(2);
+
+    EXPECT_TRUE(m0 == m0);
+    EXPECT_TRUE(m0 != m1);
+    EXPECT_FALSE(m0 != m0);
+    EXPECT_FALSE(m0 == m1);
+}
+
+TEST_F(Mat2Test, Constructors) {
+    mat2 m0;
+    ASSERT_EQ(m0[0].x, 1);
+    ASSERT_EQ(m0[0].y, 0);
+    ASSERT_EQ(m0[1].x, 0);
+    ASSERT_EQ(m0[1].y, 1);
+
+    mat2 m1(2);
+    mat2 m2(vec2(2));
+    mat2 m3(m2);
+
+    EXPECT_EQ(m1, m2);
+    EXPECT_EQ(m2, m3);
+    EXPECT_EQ(m3, m1);
+}
+
+TEST_F(Mat2Test, ArithmeticOps) {
+    mat2 m0;
+    mat2 m1(2);
+    mat2 m2(vec2(2));
+
+    m1 += m2;
+    EXPECT_EQ(mat2(4), m1);
+
+    m2 -= m1;
+    EXPECT_EQ(mat2(-2), m2);
+
+    m1 *= 2;
+    EXPECT_EQ(mat2(8), m1);
+
+    m1 /= 2;
+    EXPECT_EQ(mat2(4), m1);
+
+    m0 = -m0;
+    EXPECT_EQ(mat2(-1), m0);
+}
+
+TEST_F(Mat2Test, UnaryOps) {
+    const mat2 identity;
+    mat2 m0;
+
+    m0 = -m0;
+    EXPECT_EQ(mat2(vec2(-1, 0),
+                   vec2(0, -1)), m0);
+
+    m0 = -m0;
+    EXPECT_EQ(identity, m0);
+}
+
+TEST_F(Mat2Test, MiscOps) {
+    const mat2 identity;
+    mat2 m0;
+    EXPECT_EQ(2, trace(m0));
+
+    mat2 m1(vec2(1, 2), vec2(3, 4));
+    mat2 m2(vec2(1, 3), vec2(2, 4));
+    EXPECT_EQ(m1, transpose(m2));
+    EXPECT_EQ(m2, transpose(m1));
+    EXPECT_EQ(vec2(1, 4), diag(m1));
+
+    EXPECT_EQ(identity, inverse(identity));
+
+    EXPECT_EQ(m1, m1*identity);
+}
+
+//------------------------------------------------------------------------------
+// MORE MATRIX TESTS
+//------------------------------------------------------------------------------
+
+template <typename T>
+class MatTestT : public ::testing::Test {
+public:
+};
+
+typedef ::testing::Types<float,float> TestMatrixValueTypes;
+
+TYPED_TEST_CASE(MatTestT, TestMatrixValueTypes);
+
+#define TEST_MATRIX_INVERSE(MATRIX, EPSILON)                                \
+{                                                                           \
+    typedef decltype(MATRIX) MatrixType;                                    \
+    MatrixType inv1 = inverse(MATRIX);                                      \
+    MatrixType ident1 = MATRIX * inv1;                                      \
+    static const MatrixType IDENTITY;                                       \
+    for (size_t row = 0; row < MatrixType::ROW_SIZE; ++row) {               \
+        for (size_t col = 0; col < MatrixType::COL_SIZE; ++col) {           \
+            EXPECT_NEAR(ident1[row][col], IDENTITY[row][col], EPSILON);     \
+        }                                                                   \
+    }                                                                       \
+}
+
+TYPED_TEST(MatTestT, Inverse4) {
+    typedef ::android::details::TMat44<TypeParam> M44T;
+
+    M44T m1(1,  0,  0,  0,
+            0,  1,  0,  0,
+            0,  0,  1,  0,
+            0,  0,  0,  1);
+
+    M44T m2(0,  -1,  0,  0,
+            1,  0,  0,  0,
+            0,  0,  1,  0,
+            0,  0,  0,  1);
+
+    M44T m3(1,  0,  0,  0,
+            0,  2,  0,  0,
+            0,  0,  0,  1,
+            0,  0,  -1,  0);
+
+    M44T m4(
+            4.683281e-01, 1.251189e-02, -8.834660e-01, -4.726541e+00,
+             -8.749647e-01,  1.456563e-01, -4.617587e-01, 3.044795e+00,
+             1.229049e-01,  9.892561e-01, 7.916244e-02, -6.737138e+00,
+             0.000000e+00, 0.000000e+00, 0.000000e+00, 1.000000e+00);
+
+    M44T m5(
+        4.683281e-01, 1.251189e-02, -8.834660e-01, -4.726541e+00,
+        -8.749647e-01,  1.456563e-01, -4.617587e-01, 3.044795e+00,
+        1.229049e-01,  9.892561e-01, 7.916244e-02, -6.737138e+00,
+        1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00);
+
+    TEST_MATRIX_INVERSE(m1, 0);
+    TEST_MATRIX_INVERSE(m2, 0);
+    TEST_MATRIX_INVERSE(m3, 0);
+    TEST_MATRIX_INVERSE(m4, 20.0 * std::numeric_limits<TypeParam>::epsilon());
+    TEST_MATRIX_INVERSE(m5, 20.0 * std::numeric_limits<TypeParam>::epsilon());
+}
+
+//------------------------------------------------------------------------------
+TYPED_TEST(MatTestT, Inverse3) {
+    typedef ::android::details::TMat33<TypeParam> M33T;
+
+    M33T m1(1,  0,  0,
+            0,  1,  0,
+            0,  0,  1);
+
+    M33T m2(0,  -1,  0,
+            1,  0,  0,
+            0,  0,  1);
+
+    M33T m3(2,  0,  0,
+            0,  0,  1,
+            0,  -1,  0);
+
+    M33T m4(
+            4.683281e-01, 1.251189e-02, 0.000000e+00,
+            -8.749647e-01, 1.456563e-01, 0.000000e+00,
+            0.000000e+00, 0.000000e+00, 1.000000e+00);
+
+    M33T m5(
+            4.683281e-01, 1.251189e-02, -8.834660e-01,
+           -8.749647e-01, 1.456563e-01, -4.617587e-01,
+            1.229049e-01, 9.892561e-01, 7.916244e-02);
+
+    TEST_MATRIX_INVERSE(m1, 0);
+    TEST_MATRIX_INVERSE(m2, 0);
+    TEST_MATRIX_INVERSE(m3, 0);
+    TEST_MATRIX_INVERSE(m4, 20.0 * std::numeric_limits<TypeParam>::epsilon());
+    TEST_MATRIX_INVERSE(m5, 20.0 * std::numeric_limits<TypeParam>::epsilon());
+}
+
+//------------------------------------------------------------------------------
+TYPED_TEST(MatTestT, Inverse2) {
+    typedef ::android::details::TMat22<TypeParam> M22T;
+
+    M22T m1(1,  0,
+            0,  1);
+
+    M22T m2(0,  -1,
+            1,  0);
+
+    M22T m3(
+            4.683281e-01, 1.251189e-02,
+            -8.749647e-01, 1.456563e-01);
+
+    M22T m4(
+            4.683281e-01, 1.251189e-02,
+           -8.749647e-01, 1.456563e-01);
+
+    TEST_MATRIX_INVERSE(m1, 0);
+    TEST_MATRIX_INVERSE(m2, 0);
+    TEST_MATRIX_INVERSE(m3, 20.0 * std::numeric_limits<TypeParam>::epsilon());
+    TEST_MATRIX_INVERSE(m4, 20.0 * std::numeric_limits<TypeParam>::epsilon());
+}
+
+//------------------------------------------------------------------------------
+// A macro to help with vector comparisons within floating point range.
+#define EXPECT_VEC_EQ(VEC1, VEC2)                               \
+do {                                                            \
+    const decltype(VEC1) v1 = VEC1;                             \
+    const decltype(VEC2) v2 = VEC2;                             \
+    if (std::is_same<TypeParam,float>::value) {                 \
+        for (size_t i = 0; i < v1.size(); ++i) {                \
+            EXPECT_FLOAT_EQ(v1[i], v2[i]);                      \
+        }                                                       \
+    } else if (std::is_same<TypeParam,float>::value) {          \
+        for (size_t i = 0; i < v1.size(); ++i) {                \
+            EXPECT_DOUBLE_EQ(v1[i], v2[i]);                     \
+        }                                                       \
+    } else {                                                    \
+        for (size_t i = 0; i < v1.size(); ++i) {                \
+            EXPECT_EQ(v1[i], v2[i]);                            \
+        }                                                       \
+    }                                                           \
+} while(0)
+
+//------------------------------------------------------------------------------
+// A macro to help with type comparisons within floating point range.
+#define ASSERT_TYPE_EQ(T1, T2)                                  \
+do {                                                            \
+    const decltype(T1) t1 = T1;                                 \
+    const decltype(T2) t2 = T2;                                 \
+    if (std::is_same<TypeParam,float>::value) {                 \
+        ASSERT_FLOAT_EQ(t1, t2);                                \
+    } else if (std::is_same<TypeParam,float>::value) {         \
+        ASSERT_DOUBLE_EQ(t1, t2);                               \
+    } else {                                                    \
+        ASSERT_EQ(t1, t2);                                      \
+    }                                                           \
+} while(0)
+
+//------------------------------------------------------------------------------
+// Test some translation stuff.
+TYPED_TEST(MatTestT, Translation4) {
+    typedef ::android::details::TMat44<TypeParam> M44T;
+    typedef ::android::details::TVec4<TypeParam> V4T;
+
+    V4T translateBy(-7.3, 1.1, 14.4, 0.0);
+    V4T translation(translateBy[0], translateBy[1], translateBy[2], 1.0);
+    M44T translation_matrix = M44T::translate(translation);
+
+    V4T p1(9.9, 3.1, 41.1, 1.0);
+    V4T p2(-18.0, 0.0, 1.77, 1.0);
+    V4T p3(0, 0, 0, 1);
+    V4T p4(-1000, -1000, 1000, 1.0);
+
+    EXPECT_VEC_EQ(translation_matrix * p1, translateBy + p1);
+    EXPECT_VEC_EQ(translation_matrix * p2, translateBy + p2);
+    EXPECT_VEC_EQ(translation_matrix * p3, translateBy + p3);
+    EXPECT_VEC_EQ(translation_matrix * p4, translateBy + p4);
+}
+
+//------------------------------------------------------------------------------
+template <typename MATRIX>
+static void verifyOrthonormal(const MATRIX& A) {
+    typedef typename MATRIX::value_type T;
+
+    static constexpr T value_eps = T(100) * std::numeric_limits<T>::epsilon();
+
+    const MATRIX prod = A * transpose(A);
+    for (size_t i = 0; i < MATRIX::NUM_COLS; ++i) {
+        for (size_t j = 0; j < MATRIX::NUM_ROWS; ++j) {
+            if (i == j) {
+                ASSERT_NEAR(prod[i][j], T(1), value_eps);
+            } else {
+                ASSERT_NEAR(prod[i][j], T(0), value_eps);
+            }
+        }
+    }
+}
+
+//------------------------------------------------------------------------------
+// Test euler code.
+TYPED_TEST(MatTestT, EulerZYX_44) {
+    typedef ::android::details::TMat44<TypeParam> M44T;
+
+    std::default_random_engine generator(82828);
+    std::uniform_real_distribution<float> distribution(-6.0 * 2.0*M_PI, 6.0 * 2.0*M_PI);
+    auto rand_gen = std::bind(distribution, generator);
+
+    for (size_t i = 0; i < 100; ++i) {
+        M44T m = M44T::eulerZYX(rand_gen(), rand_gen(), rand_gen());
+        verifyOrthonormal(m);
+    }
+
+    M44T m = M44T::eulerZYX(1, 2, 3);
+    verifyOrthonormal(m);
+}
+
+//------------------------------------------------------------------------------
+// Test euler code.
+TYPED_TEST(MatTestT, EulerZYX_33) {
+
+    typedef ::android::details::TMat33<TypeParam> M33T;
+
+    std::default_random_engine generator(112233);
+    std::uniform_real_distribution<float> distribution(-6.0 * 2.0*M_PI, 6.0 * 2.0*M_PI);
+    auto rand_gen = std::bind(distribution, generator);
+
+    for (size_t i = 0; i < 100; ++i) {
+        M33T m = M33T::eulerZYX(rand_gen(), rand_gen(), rand_gen());
+        verifyOrthonormal(m);
+    }
+
+    M33T m = M33T::eulerZYX(1, 2, 3);
+    verifyOrthonormal(m);
+}
+
+//------------------------------------------------------------------------------
+// Test to quaternion with post translation.
+TYPED_TEST(MatTestT, ToQuaternionPostTranslation) {
+
+    typedef ::android::details::TMat44<TypeParam> M44T;
+    typedef ::android::details::TVec4<TypeParam> V4T;
+    typedef ::android::details::TQuaternion<TypeParam> QuatT;
+
+    std::default_random_engine generator(112233);
+    std::uniform_real_distribution<float> distribution(-6.0 * 2.0*M_PI, 6.0 * 2.0*M_PI);
+    auto rand_gen = std::bind(distribution, generator);
+
+    for (size_t i = 0; i < 100; ++i) {
+        M44T r = M44T::eulerZYX(rand_gen(), rand_gen(), rand_gen());
+        M44T t = M44T::translate(V4T(rand_gen(), rand_gen(), rand_gen(), 1));
+        QuatT qr = r.toQuaternion();
+        M44T tr = t * r;
+        QuatT qtr = tr.toQuaternion();
+
+        ASSERT_TYPE_EQ(qr.x, qtr.x);
+        ASSERT_TYPE_EQ(qr.y, qtr.y);
+        ASSERT_TYPE_EQ(qr.z, qtr.z);
+        ASSERT_TYPE_EQ(qr.w, qtr.w);
+    }
+
+    M44T r = M44T::eulerZYX(1, 2, 3);
+    M44T t = M44T::translate(V4T(20, -15, 2, 1));
+    QuatT qr = r.toQuaternion();
+    M44T tr = t * r;
+    QuatT qtr = tr.toQuaternion();
+
+    ASSERT_TYPE_EQ(qr.x, qtr.x);
+    ASSERT_TYPE_EQ(qr.y, qtr.y);
+    ASSERT_TYPE_EQ(qr.z, qtr.z);
+    ASSERT_TYPE_EQ(qr.w, qtr.w);
+}
+
+//------------------------------------------------------------------------------
+// Test to quaternion with post translation.
+TYPED_TEST(MatTestT, ToQuaternionPointTransformation33) {
+    static constexpr TypeParam value_eps =
+            TypeParam(1000) * std::numeric_limits<TypeParam>::epsilon();
+
+    typedef ::android::details::TMat33<TypeParam> M33T;
+    typedef ::android::details::TVec3<TypeParam> V3T;
+    typedef ::android::details::TQuaternion<TypeParam> QuatT;
+
+    std::default_random_engine generator(112233);
+    std::uniform_real_distribution<float> distribution(-100.0, 100.0);
+    auto rand_gen = std::bind(distribution, generator);
+
+    for (size_t i = 0; i < 100; ++i) {
+        M33T r = M33T::eulerZYX(rand_gen(), rand_gen(), rand_gen());
+        QuatT qr = r.toQuaternion();
+        V3T p(rand_gen(), rand_gen(), rand_gen());
+
+        V3T pr = r * p;
+        V3T pq = qr * p;
+
+        ASSERT_NEAR(pr.x, pq.x, value_eps);
+        ASSERT_NEAR(pr.y, pq.y, value_eps);
+        ASSERT_NEAR(pr.z, pq.z, value_eps);
+    }
+}
+
+//------------------------------------------------------------------------------
+// Test to quaternion with post translation.
+TYPED_TEST(MatTestT, ToQuaternionPointTransformation44) {
+    static constexpr TypeParam value_eps =
+            TypeParam(1000) * std::numeric_limits<TypeParam>::epsilon();
+
+    typedef ::android::details::TMat44<TypeParam> M44T;
+    typedef ::android::details::TVec4<TypeParam> V4T;
+    typedef ::android::details::TVec3<TypeParam> V3T;
+    typedef ::android::details::TQuaternion<TypeParam> QuatT;
+
+    std::default_random_engine generator(992626);
+    std::uniform_real_distribution<float> distribution(-100.0, 100.0);
+    auto rand_gen = std::bind(distribution, generator);
+
+    for (size_t i = 0; i < 100; ++i) {
+        M44T r = M44T::eulerZYX(rand_gen(), rand_gen(), rand_gen());
+        QuatT qr = r.toQuaternion();
+        V3T p(rand_gen(), rand_gen(), rand_gen());
+
+        V4T pr = r * V4T(p.x, p.y, p.z, 1);
+        pr.x /= pr.w;
+        pr.y /= pr.w;
+        pr.z /= pr.w;
+        V3T pq = qr * p;
+
+        ASSERT_NEAR(pr.x, pq.x, value_eps);
+        ASSERT_NEAR(pr.y, pq.y, value_eps);
+        ASSERT_NEAR(pr.z, pq.z, value_eps);
+    }
+}
+
+#undef TEST_MATRIX_INVERSE
+
 }; // namespace android
diff --git a/libs/ui/tests/quat_test.cpp b/libs/ui/tests/quat_test.cpp
new file mode 100644
index 0000000..f5cb659
--- /dev/null
+++ b/libs/ui/tests/quat_test.cpp
@@ -0,0 +1,301 @@
+/*
+ * Copyright 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "QuatTest"
+
+#include <math.h>
+#include <stdlib.h>
+
+#include <random>
+#include <functional>
+
+#include <ui/quat.h>
+#include <ui/mat4.h>
+#include <ui/vec3.h>
+#include <ui/vec4.h>
+
+#include <gtest/gtest.h>
+
+namespace android {
+
+class QuatTest : public testing::Test {
+protected:
+};
+
+TEST_F(QuatTest, Basics) {
+    quatd q;
+    double4& v(q.xyzw);
+
+    EXPECT_EQ(sizeof(quatd), sizeof(double)*4);
+    EXPECT_EQ(reinterpret_cast<void*>(&q), reinterpret_cast<void*>(&v));
+}
+
+TEST_F(QuatTest, Constructors) {
+    quatd q0;
+    EXPECT_EQ(q0.x, 0);
+    EXPECT_EQ(q0.y, 0);
+    EXPECT_EQ(q0.z, 0);
+    EXPECT_EQ(q0.w, 0);
+
+    quatd q1(1);
+    EXPECT_EQ(q1.x, 0);
+    EXPECT_EQ(q1.y, 0);
+    EXPECT_EQ(q1.z, 0);
+    EXPECT_EQ(q1.w, 1);
+
+    quatd q2(1, 2, 3, 4);
+    EXPECT_EQ(q2.x, 2);
+    EXPECT_EQ(q2.y, 3);
+    EXPECT_EQ(q2.z, 4);
+    EXPECT_EQ(q2.w, 1);
+
+    quatd q3(q2);
+    EXPECT_EQ(q3.x, 2);
+    EXPECT_EQ(q3.y, 3);
+    EXPECT_EQ(q3.z, 4);
+    EXPECT_EQ(q3.w, 1);
+
+    quatd q4(q3.xyz, 42);
+    EXPECT_EQ(q4.x, 2);
+    EXPECT_EQ(q4.y, 3);
+    EXPECT_EQ(q4.z, 4);
+    EXPECT_EQ(q4.w, 42);
+
+    quatd q5(double3(q2.xy, 42), 24);
+    EXPECT_EQ(q5.x, 2);
+    EXPECT_EQ(q5.y, 3);
+    EXPECT_EQ(q5.z, 42);
+    EXPECT_EQ(q5.w, 24);
+
+    quatd q6;
+    q6 = 12;
+    EXPECT_EQ(q6.x, 0);
+    EXPECT_EQ(q6.y, 0);
+    EXPECT_EQ(q6.z, 0);
+    EXPECT_EQ(q6.w, 12);
+
+    quatd q7 = 1 + 2_id + 3_jd + 4_kd;
+    EXPECT_EQ(q7.x, 2);
+    EXPECT_EQ(q7.y, 3);
+    EXPECT_EQ(q7.z, 4);
+    EXPECT_EQ(q7.w, 1);
+
+    quatf qf(2);
+    EXPECT_EQ(qf.x, 0);
+    EXPECT_EQ(qf.y, 0);
+    EXPECT_EQ(qf.z, 0);
+    EXPECT_EQ(qf.w, 2);
+}
+
+TEST_F(QuatTest, Access) {
+    quatd q0(1, 2, 3, 4);
+    q0.x = 10;
+    q0.y = 20;
+    q0.z = 30;
+    q0.w = 40;
+    EXPECT_EQ(q0.x, 10);
+    EXPECT_EQ(q0.y, 20);
+    EXPECT_EQ(q0.z, 30);
+    EXPECT_EQ(q0.w, 40);
+
+    q0[0] = 100;
+    q0[1] = 200;
+    q0[2] = 300;
+    q0[3] = 400;
+    EXPECT_EQ(q0.x, 100);
+    EXPECT_EQ(q0.y, 200);
+    EXPECT_EQ(q0.z, 300);
+    EXPECT_EQ(q0.w, 400);
+
+    q0.xyz = double3(1, 2, 3);
+    EXPECT_EQ(q0.x, 1);
+    EXPECT_EQ(q0.y, 2);
+    EXPECT_EQ(q0.z, 3);
+    EXPECT_EQ(q0.w, 400);
+}
+
+TEST_F(QuatTest, UnaryOps) {
+    quatd q0(1, 2, 3, 4);
+
+    q0 += 1;
+    EXPECT_EQ(q0.x, 2);
+    EXPECT_EQ(q0.y, 3);
+    EXPECT_EQ(q0.z, 4);
+    EXPECT_EQ(q0.w, 2);
+
+    q0 -= 1;
+    EXPECT_EQ(q0.x, 2);
+    EXPECT_EQ(q0.y, 3);
+    EXPECT_EQ(q0.z, 4);
+    EXPECT_EQ(q0.w, 1);
+
+    q0 *= 2;
+    EXPECT_EQ(q0.x, 4);
+    EXPECT_EQ(q0.y, 6);
+    EXPECT_EQ(q0.z, 8);
+    EXPECT_EQ(q0.w, 2);
+
+    q0 /= 2;
+    EXPECT_EQ(q0.x, 2);
+    EXPECT_EQ(q0.y, 3);
+    EXPECT_EQ(q0.z, 4);
+    EXPECT_EQ(q0.w, 1);
+
+    quatd q1(10, 20, 30, 40);
+
+    q0 += q1;
+    EXPECT_EQ(q0.x, 22);
+    EXPECT_EQ(q0.y, 33);
+    EXPECT_EQ(q0.z, 44);
+    EXPECT_EQ(q0.w, 11);
+
+    q0 -= q1;
+    EXPECT_EQ(q0.x, 2);
+    EXPECT_EQ(q0.y, 3);
+    EXPECT_EQ(q0.z, 4);
+    EXPECT_EQ(q0.w, 1);
+
+    q1 = -q1;
+    EXPECT_EQ(q1.x, -20);
+    EXPECT_EQ(q1.y, -30);
+    EXPECT_EQ(q1.z, -40);
+    EXPECT_EQ(q1.w, -10);
+
+    // TODO(mathias): multiplies
+}
+
+TEST_F(QuatTest, ComparisonOps) {
+    quatd q0(1, 2, 3, 4);
+    quatd q1(10, 20, 30, 40);
+
+    EXPECT_TRUE(q0 == q0);
+    EXPECT_TRUE(q0 != q1);
+    EXPECT_FALSE(q0 != q0);
+    EXPECT_FALSE(q0 == q1);
+}
+
+TEST_F(QuatTest, ArithmeticOps) {
+    quatd q0(1, 2, 3, 4);
+    quatd q1(10, 20, 30, 40);
+
+    quatd q2(q0 + q1);
+    EXPECT_EQ(q2.x, 22);
+    EXPECT_EQ(q2.y, 33);
+    EXPECT_EQ(q2.z, 44);
+    EXPECT_EQ(q2.w, 11);
+
+    q0 = q1 * 2;
+    EXPECT_EQ(q0.x, 40);
+    EXPECT_EQ(q0.y, 60);
+    EXPECT_EQ(q0.z, 80);
+    EXPECT_EQ(q0.w, 20);
+
+    q0 = 2 * q1;
+    EXPECT_EQ(q0.x, 40);
+    EXPECT_EQ(q0.y, 60);
+    EXPECT_EQ(q0.z, 80);
+    EXPECT_EQ(q0.w, 20);
+
+    quatf qf(2);
+    q0 = q1 * qf;
+    EXPECT_EQ(q0.x, 40);
+    EXPECT_EQ(q0.y, 60);
+    EXPECT_EQ(q0.z, 80);
+    EXPECT_EQ(q0.w, 20);
+
+    EXPECT_EQ(1_id * 1_id, quat(-1));
+    EXPECT_EQ(1_jd * 1_jd, quat(-1));
+    EXPECT_EQ(1_kd * 1_kd, quat(-1));
+    EXPECT_EQ(1_id * 1_jd * 1_kd, quat(-1));
+}
+
+TEST_F(QuatTest, ArithmeticFunc) {
+    quatd q(1, 2, 3, 4);
+    quatd qc(conj(q));
+    __attribute__((unused)) quatd qi(inverse(q));
+    quatd qn(normalize(q));
+
+    EXPECT_EQ(qc.x, -2);
+    EXPECT_EQ(qc.y, -3);
+    EXPECT_EQ(qc.z, -4);
+    EXPECT_EQ(qc.w,  1);
+
+    EXPECT_EQ(~q, qc);
+    EXPECT_EQ(length(q), length(qc));
+    EXPECT_EQ(sqrt(30), length(q));
+    EXPECT_FLOAT_EQ(1, length(qn));
+    EXPECT_FLOAT_EQ(1, dot(qn, qn));
+
+    quatd qr = quatd::fromAxisAngle(double3(0, 0, 1), M_PI / 2);
+    EXPECT_EQ(mat4d(qr).toQuaternion(), qr);
+    EXPECT_EQ(1_id, mat4d(1_id).toQuaternion());
+    EXPECT_EQ(1_jd, mat4d(1_jd).toQuaternion());
+    EXPECT_EQ(1_kd, mat4d(1_kd).toQuaternion());
+
+
+    EXPECT_EQ(qr, log(exp(qr)));
+
+    quatd qq = qr * qr;
+    quatd q2 = pow(qr, 2);
+    EXPECT_NEAR(qq.x, q2.x, 1e-15);
+    EXPECT_NEAR(qq.y, q2.y, 1e-15);
+    EXPECT_NEAR(qq.z, q2.z, 1e-15);
+    EXPECT_NEAR(qq.w, q2.w, 1e-15);
+
+    quatd qa = quatd::fromAxisAngle(double3(0, 0, 1), 0);
+    quatd qb = quatd::fromAxisAngle(double3(0, 0, 1), M_PI / 2);
+    quatd qs = slerp(qa, qb, 0.5);
+    qr = quatd::fromAxisAngle(double3(0, 0, 1), M_PI / 4);
+    EXPECT_FLOAT_EQ(qr.x, qs.x);
+    EXPECT_FLOAT_EQ(qr.y, qs.y);
+    EXPECT_FLOAT_EQ(qr.z, qs.z);
+    EXPECT_FLOAT_EQ(qr.w, qs.w);
+
+    qs = nlerp(qa, qb, 0.5);
+    EXPECT_FLOAT_EQ(qr.x, qs.x);
+    EXPECT_FLOAT_EQ(qr.y, qs.y);
+    EXPECT_FLOAT_EQ(qr.z, qs.z);
+    EXPECT_FLOAT_EQ(qr.w, qs.w);
+}
+
+TEST_F(QuatTest, MultiplicationExhaustive) {
+    static constexpr double value_eps = double(1000) * std::numeric_limits<double>::epsilon();
+
+    std::default_random_engine generator(171717);
+    std::uniform_real_distribution<double> distribution(-10.0, 10.0);
+    auto rand_gen = std::bind(distribution, generator);
+
+    for (size_t i = 0; i < (1024 * 1024); ++i) {
+        double3 axis_a = normalize(double3(rand_gen(), rand_gen(), rand_gen()));
+        double angle_a = rand_gen();
+        quatd a = quatd::fromAxisAngle(axis_a, angle_a);
+
+        double3 axis_b = normalize(double3(rand_gen(), rand_gen(), rand_gen()));
+        double angle_b = rand_gen();
+        quatd b = quatd::fromAxisAngle(axis_b, angle_b);
+
+        quatd ab = a * b;
+        quatd ab_other(a.w * b.xyz + b.w * a.xyz + cross(a.xyz, b.xyz),
+            (a.w * b.w) - dot(a.xyz, b.xyz));
+
+        ASSERT_NEAR(ab.x, ab_other.x, value_eps);
+        ASSERT_NEAR(ab.y, ab_other.y, value_eps);
+        ASSERT_NEAR(ab.z, ab_other.z, value_eps);
+        ASSERT_NEAR(ab.w, ab_other.w, value_eps);
+    }
+}
+
+}; // namespace android
diff --git a/libs/ui/tests/vec_test.cpp b/libs/ui/tests/vec_test.cpp
index 454c999..586d1a8 100644
--- a/libs/ui/tests/vec_test.cpp
+++ b/libs/ui/tests/vec_test.cpp
@@ -14,13 +14,11 @@
  * limitations under the License.
  */
 
-#define LOG_TAG "RegionTest"
+#define LOG_TAG "VecTest"
 
 #include <math.h>
 #include <stdlib.h>
 
-#include <ui/Region.h>
-#include <ui/Rect.h>
 #include <ui/vec4.h>
 
 #include <gtest/gtest.h>
@@ -37,7 +35,7 @@
     EXPECT_EQ(sizeof(vec4), sizeof(float)*4);
     EXPECT_EQ(sizeof(vec3), sizeof(float)*3);
     EXPECT_EQ(sizeof(vec2), sizeof(float)*2);
-    EXPECT_EQ((void*)&v3, (void*)&v4);
+    EXPECT_EQ(reinterpret_cast<void*>(&v3), reinterpret_cast<void*>(&v4));
 }
 
 TEST_F(VecTest, Constructors) {
@@ -53,7 +51,7 @@
     EXPECT_EQ(v1.z, 1);
     EXPECT_EQ(v1.w, 1);
 
-    vec4 v2(1,2,3,4);
+    vec4 v2(1, 2, 3, 4);
     EXPECT_EQ(v2.x, 1);
     EXPECT_EQ(v2.y, 2);
     EXPECT_EQ(v2.z, 3);
@@ -77,15 +75,16 @@
     EXPECT_EQ(v5.z, 42);
     EXPECT_EQ(v5.w, 24);
 
-    tvec4<double> vd(2);
-    EXPECT_EQ(vd.x, 2);
-    EXPECT_EQ(vd.y, 2);
-    EXPECT_EQ(vd.z, 2);
-    EXPECT_EQ(vd.w, 2);
+    float4 vf(2);
+    EXPECT_EQ(vf.x, 2);
+    EXPECT_EQ(vf.y, 2);
+    EXPECT_EQ(vf.z, 2);
+    EXPECT_EQ(vf.w, 2);
 }
 
 TEST_F(VecTest, Access) {
-    vec4 v0(1,2,3,4);
+    vec4 v0(1, 2, 3, 4);
+
     v0.x = 10;
     v0.y = 20;
     v0.z = 30;
@@ -104,7 +103,7 @@
     EXPECT_EQ(v0.z, 300);
     EXPECT_EQ(v0.w, 400);
 
-    v0.xyz = vec3(1,2,3);
+    v0.xyz = vec3(1, 2, 3);
     EXPECT_EQ(v0.x, 1);
     EXPECT_EQ(v0.y, 2);
     EXPECT_EQ(v0.z, 3);
@@ -112,7 +111,7 @@
 }
 
 TEST_F(VecTest, UnaryOps) {
-    vec4 v0(1,2,3,4);
+    vec4 v0(1, 2, 3, 4);
 
     v0 += 1;
     EXPECT_EQ(v0.x, 2);
@@ -164,41 +163,23 @@
     EXPECT_EQ(v0.z, 3);
     EXPECT_EQ(v0.w, 4);
 
-    ++v0;
-    EXPECT_EQ(v0.x, 2);
-    EXPECT_EQ(v0.y, 3);
-    EXPECT_EQ(v0.z, 4);
-    EXPECT_EQ(v0.w, 5);
-
-    ++++v0;
-    EXPECT_EQ(v0.x, 4);
-    EXPECT_EQ(v0.y, 5);
-    EXPECT_EQ(v0.z, 6);
-    EXPECT_EQ(v0.w, 7);
-
-    --v1;
-    EXPECT_EQ(v1.x,  9);
-    EXPECT_EQ(v1.y, 19);
-    EXPECT_EQ(v1.z, 29);
-    EXPECT_EQ(v1.w, 39);
-
     v1 = -v1;
-    EXPECT_EQ(v1.x,  -9);
-    EXPECT_EQ(v1.y, -19);
-    EXPECT_EQ(v1.z, -29);
-    EXPECT_EQ(v1.w, -39);
+    EXPECT_EQ(v1.x, -10);
+    EXPECT_EQ(v1.y, -20);
+    EXPECT_EQ(v1.z, -30);
+    EXPECT_EQ(v1.w, -40);
 
-    tvec4<double> dv(1,2,3,4);
-    v1 += dv;
-    EXPECT_EQ(v1.x,  -8);
-    EXPECT_EQ(v1.y, -17);
-    EXPECT_EQ(v1.z, -26);
-    EXPECT_EQ(v1.w, -35);
+    float4 fv(1, 2, 3, 4);
+    v1 += fv;
+    EXPECT_EQ(v1.x,  -9);
+    EXPECT_EQ(v1.y, -18);
+    EXPECT_EQ(v1.z, -27);
+    EXPECT_EQ(v1.w, -36);
 }
 
 TEST_F(VecTest, ComparisonOps) {
-    vec4 v0(1,2,3,4);
-    vec4 v1(10,20,30,40);
+    vec4 v0(1, 2, 3, 4);
+    vec4 v1(10, 20, 30, 40);
 
     EXPECT_TRUE(v0 == v0);
     EXPECT_TRUE(v0 != v1);
@@ -207,8 +188,8 @@
 }
 
 TEST_F(VecTest, ArithmeticOps) {
-    vec4 v0(1,2,3,4);
-    vec4 v1(10,20,30,40);
+    vec4 v0(1, 2, 3, 4);
+    vec4 v1(10, 20, 30, 40);
 
     vec4 v2(v0 + v1);
     EXPECT_EQ(v2.x, 11);
@@ -228,8 +209,8 @@
     EXPECT_EQ(v0.z, 60);
     EXPECT_EQ(v0.w, 80);
 
-    tvec4<double> vd(2);
-    v0 = v1 * vd;
+    float4 vf(2);
+    v0 = v1 * vf;
     EXPECT_EQ(v0.x, 20);
     EXPECT_EQ(v0.y, 40);
     EXPECT_EQ(v0.z, 60);
@@ -239,19 +220,19 @@
 TEST_F(VecTest, ArithmeticFunc) {
     vec3 east(1, 0, 0);
     vec3 north(0, 1, 0);
-    vec3 up( cross(east, north) );
-    EXPECT_EQ(up, vec3(0,0,1));
+    vec3 up(cross(east, north));
+    EXPECT_EQ(up, vec3(0, 0, 1));
     EXPECT_EQ(dot(east, north), 0);
     EXPECT_EQ(length(east), 1);
     EXPECT_EQ(distance(east, north), sqrtf(2));
 
-    vec3 v0(1,2,3);
+    vec3 v0(1, 2, 3);
     vec3 vn(normalize(v0));
     EXPECT_FLOAT_EQ(1, length(vn));
     EXPECT_FLOAT_EQ(length(v0), dot(v0, vn));
 
-    tvec3<double> vd(east);
-    EXPECT_EQ(length(vd), 1);
+    float3 vf(east);
+    EXPECT_EQ(length(vf), 1);
 }
 
 }; // namespace android