Pixelflinger: Add AArch64 support to pixelflinger JIT.

See the comment-block at the top of Aarch64Assembler.cpp
for overview on how AArch64 support has been implemented

In addition, this commit contains
[x] AArch64 inline asm versions of gglmul series of
    functions and a new unit test bench to test the
    functions

[x] Assembly implementations of scanline_col32cb16blend
    and scanline_t32cb16blend for AArch64, with unit
    test bench

Change-Id: I915cded9e1d39d9a2a70bf8a0394b8a0064d1eb4
Signed-off-by: Ashok Bhat <ashok.bhat@arm.com>
diff --git a/include/private/pixelflinger/ggl_fixed.h b/include/private/pixelflinger/ggl_fixed.h
index 217ec04..d0493f3 100644
--- a/include/private/pixelflinger/ggl_fixed.h
+++ b/include/private/pixelflinger/ggl_fixed.h
@@ -457,6 +457,69 @@
     return u.res;
 }
 
+#elif defined(__aarch64__)
+
+// inline AArch64 implementations
+
+inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) CONST;
+inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift)
+{
+    GGLfixed result;
+    GGLfixed round;
+
+    asm("mov    %x[round], #1                        \n"
+        "lsl    %x[round], %x[round], %x[shift]      \n"
+        "lsr    %x[round], %x[round], #1             \n"
+        "smaddl %x[result], %w[x], %w[y],%x[round]   \n"
+        "lsr    %x[result], %x[result], %x[shift]    \n"
+        : [round]"=&r"(round), [result]"=&r"(result) \
+        : [x]"r"(x), [y]"r"(y), [shift] "r"(shift)   \
+        :
+       );
+    return result;
+}
+inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST;
+inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift)
+{
+    GGLfixed result;
+    asm("smull  %x[result], %w[x], %w[y]                     \n"
+        "lsr    %x[result], %x[result], %x[shift]            \n"
+        "add    %w[result], %w[result], %w[a]                \n"
+        : [result]"=&r"(result)                               \
+        : [x]"r"(x), [y]"r"(y), [a]"r"(a), [shift] "r"(shift) \
+        :
+        );
+    return result;
+}
+
+inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST;
+inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift)
+{
+
+    GGLfixed result;
+    int rshift;
+
+    asm("smull  %x[result], %w[x], %w[y]                     \n"
+        "lsr    %x[result], %x[result], %x[shift]            \n"
+        "sub    %w[result], %w[result], %w[a]                \n"
+        : [result]"=&r"(result)                               \
+        : [x]"r"(x), [y]"r"(y), [a]"r"(a), [shift] "r"(shift) \
+        :
+        );
+    return result;
+}
+inline int64_t gglMulii(int32_t x, int32_t y) CONST;
+inline int64_t gglMulii(int32_t x, int32_t y)
+{
+    int64_t res;
+    asm("smull  %x0, %w1, %w2 \n"
+        : "=r"(res)
+        : "%r"(x), "r"(y)
+        :
+        );
+    return res;
+}
+
 #else // ----------------------------------------------------------------------
 
 inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST;
@@ -498,7 +561,7 @@
 inline int32_t gglClz(int32_t x) CONST;
 inline int32_t gglClz(int32_t x)
 {
-#if (defined(__arm__) && !defined(__thumb__)) || defined(__mips__)
+#if (defined(__arm__) && !defined(__thumb__)) || defined(__mips__) || defined(__aarch64__)
     return __builtin_clz(x);
 #else
     if (!x) return 32;
@@ -554,6 +617,8 @@
     // clamps to zero in one instruction, but gcc won't generate it and
     // replace it by a cmp + movlt (it's quite amazing actually).
     asm("bic %0, %1, %1, asr #31\n" : "=r"(c) : "r"(c));
+#elif defined(__aarch64__)
+    asm("bic %w0, %w1, %w1, asr #31\n" : "=r"(c) : "r"(c));
 #else
     c &= ~(c>>31);
 #endif
diff --git a/libpixelflinger/Android.mk b/libpixelflinger/Android.mk
index 7f20e5b..0f502c0 100644
--- a/libpixelflinger/Android.mk
+++ b/libpixelflinger/Android.mk
@@ -9,13 +9,11 @@
 PIXELFLINGER_SRC_FILES:= \
     codeflinger/ARMAssemblerInterface.cpp \
     codeflinger/ARMAssemblerProxy.cpp \
-    codeflinger/ARMAssembler.cpp \
     codeflinger/CodeCache.cpp \
     codeflinger/GGLAssembler.cpp \
     codeflinger/load_store.cpp \
     codeflinger/blending.cpp \
     codeflinger/texturing.cpp \
-    codeflinger/disassem.c \
 	codeflinger/tinyutils/SharedBuffer.cpp \
 	codeflinger/tinyutils/VectorImpl.cpp \
 	fixed.cpp.arm \
@@ -39,6 +37,8 @@
 endif
 
 ifeq ($(TARGET_ARCH),arm)
+PIXELFLINGER_SRC_FILES += codeflinger/ARMAssembler.cpp
+PIXELFLINGER_SRC_FILES += codeflinger/disassem.c
 # special optimization flags for pixelflinger
 PIXELFLINGER_CFLAGS += -fstrict-aliasing -fomit-frame-pointer
 endif
@@ -52,6 +52,14 @@
 
 LOCAL_SHARED_LIBRARIES := libcutils liblog
 
+ifeq ($(TARGET_ARCH),aarch64)
+PIXELFLINGER_SRC_FILES += arch-aarch64/t32cb16blend.S
+PIXELFLINGER_SRC_FILES += arch-aarch64/col32cb16blend.S
+PIXELFLINGER_SRC_FILES += codeflinger/Aarch64Assembler.cpp
+PIXELFLINGER_SRC_FILES += codeflinger/Aarch64Disassembler.cpp
+PIXELFLINGER_CFLAGS += -fstrict-aliasing -fomit-frame-pointer
+endif
+
 #
 # Shared library
 #
diff --git a/libpixelflinger/arch-aarch64/col32cb16blend.S b/libpixelflinger/arch-aarch64/col32cb16blend.S
new file mode 100644
index 0000000..aa969a4
--- /dev/null
+++ b/libpixelflinger/arch-aarch64/col32cb16blend.S
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+    .text
+    .align
+
+    .global scanline_col32cb16blend_aarch64
+
+//
+// This function alpha blends a fixed color into a destination scanline, using
+// the formula:
+//
+//     d = s + (((a + (a >> 7)) * d) >> 8)
+//
+// where d is the destination pixel,
+//       s is the source color,
+//       a is the alpha channel of the source color.
+//
+
+// x0 = destination buffer pointer
+// w1 = color value
+// w2 = count
+
+
+scanline_col32cb16blend_aarch64:
+
+    lsr         w5, w1, #24                     // shift down alpha
+    mov         w9, #0xff                       // create mask
+    add         w5, w5, w5, lsr #7              // add in top bit
+    mov         w4, #256                        // create #0x100
+    sub         w5, w4, w5                      // invert alpha
+    and         w10, w1, #0xff                  // extract red
+    and         w12, w9, w1, lsr #8             // extract green
+    and         w4,  w9, w1, lsr #16            // extract blue
+    lsl         w10, w10, #5                    // prescale red
+    lsl         w12, w12, #6                    // prescale green
+    lsl         w4,  w4,  #5                    // prescale blue
+    lsr         w9,  w9,  #2                    // create dest green mask
+
+1:
+    ldrh        w8, [x0]                        // load dest pixel
+    subs        w2, w2, #1                      // decrement loop counter
+    lsr         w6, w8, #11                     // extract dest red
+    and         w7, w9, w8, lsr #5              // extract dest green
+    and         w8, w8, #0x1f                   // extract dest blue
+
+    madd        w6, w6, w5, w10                 // dest red * alpha + src red
+    madd        w7, w7, w5, w12                 // dest green * alpha + src green
+    madd        w8, w8, w5, w4                  // dest blue * alpha + src blue
+
+    lsr         w6, w6, #8                      // shift down red
+    lsr         w7, w7, #8                      // shift down green
+    lsl         w6, w6, #11                     // shift red into 565
+    orr         w6, w6, w7, lsl #5              // shift green into 565
+    orr         w6, w6, w8, lsr #8              // shift blue into 565
+
+    strh        w6, [x0], #2                    // store pixel to dest, update ptr
+    b.ne        1b                              // if count != 0, loop
+
+    ret
+
+
+
diff --git a/libpixelflinger/arch-aarch64/t32cb16blend.S b/libpixelflinger/arch-aarch64/t32cb16blend.S
new file mode 100644
index 0000000..b62ed36
--- /dev/null
+++ b/libpixelflinger/arch-aarch64/t32cb16blend.S
@@ -0,0 +1,213 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+    .text
+    .align
+
+    .global scanline_t32cb16blend_aarch64
+
+/*
+ * .macro pixel
+ *
+ *  This macro alpha blends RGB565 original pixel located in either
+ *  top or bottom 16 bits of DREG register with SRC 32 bit pixel value
+ *  and writes the result to FB register
+ *
+ * \DREG is a 32-bit register containing *two* original destination RGB565
+ *       pixels, with the even one in the low-16 bits, and the odd one in the
+ *       high 16 bits.
+ *
+ * \SRC is a 32-bit 0xAABBGGRR pixel value, with pre-multiplied colors.
+ *
+ * \FB is a target register that will contain the blended pixel values.
+ *
+ * \ODD is either 0 or 1 and indicates if we're blending the lower or
+ *      upper 16-bit pixels in DREG into FB
+ *
+ *
+ * clobbered: w6, w7, w16, w17, w18
+ *
+ */
+
+.macro pixel,   DREG, SRC, FB, ODD
+
+    // SRC = 0xAABBGGRR
+    lsr     w7, \SRC, #24               // sA
+    add     w7, w7, w7, lsr #7          // sA + (sA >> 7)
+    mov     w6, #0x100
+    sub     w7, w6, w7                  // sA = 0x100 - (sA+(sA>>7))
+
+1:
+
+.if \ODD //Blending odd pixel present in top 16 bits of DREG register
+
+    // red
+    lsr     w16, \DREG, #(16 + 11)
+    mul     w16, w7, w16
+    lsr     w6, \SRC, #3
+    and     w6, w6, #0x1F
+    add     w16, w6, w16, lsr #8
+    cmp     w16, #0x1F
+    orr     w17, \FB, #(0x1F<<(16 + 11))
+    orr     w18, \FB, w16, lsl #(16 + 11)
+    csel    \FB, w17, w18, hi
+        // green
+        and     w6, \DREG, #(0x3F<<(16 + 5))
+        lsr     w17,w6,#(16+5)
+        mul     w6, w7, w17
+        lsr     w16, \SRC, #(8+2)
+        and     w16, w16, #0x3F
+        add     w6, w16, w6, lsr #8
+        cmp     w6, #0x3F
+        orr     w17, \FB, #(0x3F<<(16 + 5))
+        orr     w18, \FB, w6, lsl #(16 + 5)
+        csel    \FB, w17, w18, hi
+            // blue
+            and     w16, \DREG, #(0x1F << 16)
+            lsr     w17,w16,#16
+            mul     w16, w7, w17
+            lsr     w6, \SRC, #(8+8+3)
+            and     w6, w6, #0x1F
+            add     w16, w6, w16, lsr #8
+            cmp     w16, #0x1F
+            orr     w17, \FB, #(0x1F << 16)
+            orr     w18, \FB, w16, lsl #16
+            csel    \FB, w17, w18, hi
+
+.else //Blending even pixel present in bottom 16 bits of DREG register
+
+    // red
+    lsr     w16, \DREG, #11
+    and     w16, w16, #0x1F
+    mul     w16, w7, w16
+    lsr     w6, \SRC, #3
+    and     w6, w6, #0x1F
+    add     w16, w6, w16, lsr #8
+    cmp     w16, #0x1F
+    mov     w17, #(0x1F<<11)
+    lsl     w18, w16, #11
+    csel    \FB, w17, w18, hi
+
+
+        // green
+        and     w6, \DREG, #(0x3F<<5)
+        mul     w6, w7, w6
+        lsr     w16, \SRC, #(8+2)
+        and     w16, w16, #0x3F
+        add     w6, w16, w6, lsr #(5+8)
+        cmp     w6, #0x3F
+        orr     w17, \FB, #(0x3F<<5)
+        orr     w18, \FB, w6, lsl #5
+        csel    \FB, w17, w18, hi
+
+            // blue
+            and     w16, \DREG, #0x1F
+            mul     w16, w7, w16
+            lsr     w6, \SRC, #(8+8+3)
+            and     w6, w6, #0x1F
+            add     w16, w6, w16, lsr #8
+            cmp     w16, #0x1F
+            orr     w17, \FB, #0x1F
+            orr     w18, \FB, w16
+            csel    \FB, w17, w18, hi
+
+.endif // End of blending even pixel
+
+.endm // End of pixel macro
+
+
+// x0:  dst ptr
+// x1:  src ptr
+// w2:  count
+// w3:  d
+// w4:  s0
+// w5:  s1
+// w6:  pixel
+// w7:  pixel
+// w8:  free
+// w9:  free
+// w10: free
+// w11: free
+// w12: scratch
+// w14: pixel
+
+scanline_t32cb16blend_aarch64:
+
+    // align DST to 32 bits
+    tst     x0, #0x3
+    b.eq    aligned
+    subs    w2, w2, #1
+    b.lo    return
+
+last:
+    ldr     w4, [x1], #4
+    ldrh    w3, [x0]
+    pixel   w3, w4, w12, 0
+    strh    w12, [x0], #2
+
+aligned:
+    subs    w2, w2, #2
+    b.lo    9f
+
+    // The main loop is unrolled twice and processes 4 pixels
+8:
+    ldp   w4,w5, [x1], #8
+    add     x0, x0, #4
+    // it's all zero, skip this pixel
+    orr     w3, w4, w5
+    cbz     w3, 7f
+
+    // load the destination
+    ldr     w3, [x0, #-4]
+    // stream the destination
+    pixel   w3, w4, w12, 0
+    pixel   w3, w5, w12, 1
+    str     w12, [x0, #-4]
+
+    // 2nd iteration of the loop, don't stream anything
+    subs    w2, w2, #2
+    csel    w4, w5, w4, lt
+    blt     9f
+    ldp     w4,w5, [x1], #8
+    add     x0, x0, #4
+    orr     w3, w4, w5
+    cbz     w3, 7f
+    ldr     w3, [x0, #-4]
+    pixel   w3, w4, w12, 0
+    pixel   w3, w5, w12, 1
+    str     w12, [x0, #-4]
+
+7:  subs    w2, w2, #2
+    bhs     8b
+    mov     w4, w5
+
+9:  adds    w2, w2, #1
+    b.lo    return
+    b       last
+
+return:
+    ret
diff --git a/libpixelflinger/codeflinger/ARMAssemblerInterface.h b/libpixelflinger/codeflinger/ARMAssemblerInterface.h
index e5a9a26..6e0d7c6 100644
--- a/libpixelflinger/codeflinger/ARMAssemblerInterface.h
+++ b/libpixelflinger/codeflinger/ARMAssemblerInterface.h
@@ -63,7 +63,7 @@
     };
 
     enum {
-        CODEGEN_ARCH_ARM = 1, CODEGEN_ARCH_MIPS
+        CODEGEN_ARCH_ARM = 1, CODEGEN_ARCH_MIPS, CODEGEN_ARCH_AARCH64
     };
 
     // -----------------------------------------------------------------------
diff --git a/libpixelflinger/codeflinger/Aarch64Assembler.cpp b/libpixelflinger/codeflinger/Aarch64Assembler.cpp
new file mode 100644
index 0000000..0e4f7df
--- /dev/null
+++ b/libpixelflinger/codeflinger/Aarch64Assembler.cpp
@@ -0,0 +1,1242 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#define LOG_TAG "ArmToAarch64Assembler"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <cutils/log.h>
+#include <cutils/properties.h>
+#include <private/pixelflinger/ggl_context.h>
+
+#include "codeflinger/Aarch64Assembler.h"
+#include "codeflinger/CodeCache.h"
+#include "codeflinger/Aarch64Disassembler.h"
+
+
+/*
+** --------------------------------------------
+** Support for Aarch64 in GGLAssembler JIT
+** --------------------------------------------
+**
+** Approach
+** - GGLAssembler and associated files are largely un-changed.
+** - A translator class maps ArmAssemblerInterface calls to
+**   generate AArch64 instructions.
+**
+** ----------------------
+** ArmToAarch64Assembler
+** ----------------------
+**
+** - Subclassed from ArmAssemblerInterface
+**
+** - Translates each ArmAssemblerInterface call to generate
+**   one or more Aarch64 instructions  as necessary.
+**
+** - Does not implement ArmAssemblerInterface portions unused by GGLAssembler
+**   It calls NOT_IMPLEMENTED() for such cases, which in turn logs
+**    a fatal message.
+**
+** - Uses A64_.. series of functions to generate instruction machine code
+**   for Aarch64 instructions. These functions also log the instruction
+**   to LOG, if AARCH64_ASM_DEBUG define is set to 1
+**
+** - Dumps machine code and eqvt assembly if "debug.pf.disasm" option is set
+**   It uses aarch64_disassemble to perform disassembly
+**
+** - Uses register 13 (SP in ARM), 15 (PC in ARM), 16, 17 for storing
+**   intermediate results. GGLAssembler does not use SP and PC as these
+**   registers are marked as reserved. The temporary registers are not
+**   saved/restored on stack as these are caller-saved registers in Aarch64
+**
+** - Uses CSEL instruction to support conditional execution. The result is
+**   stored in a temporary register and then copied to the target register
+**   if the condition is true.
+**
+** - In the case of conditional data transfer instructions, conditional
+**   branch is used to skip over instruction, if the condition is false
+**
+** - Wherever possible, immediate values are transferred to temporary
+**   register prior to processing. This simplifies overall implementation
+**   as instructions requiring immediate values are converted to
+**   move immediate instructions followed by register-register instruction.
+**
+** --------------------------------------------
+** ArmToAarch64Assembler unit test bench
+** --------------------------------------------
+**
+** - Tests ArmToAarch64Assembler interface for all the possible
+**   ways in which GGLAssembler uses ArmAssemblerInterface interface.
+**
+** - Uses test jacket (written in assembly) to set the registers,
+**   condition flags prior to calling generated instruction. It also
+**   copies registers and flags at the end of execution. Caller then
+**   checks if generated code performed correct operation based on
+**   output registers and flags.
+**
+** - Broadly contains three type of tests, (i) data operation tests
+**   (ii) data transfer tests and (iii) LDM/STM tests.
+**
+** ----------------------
+** Aarch64 disassembler
+** ----------------------
+** - This disassembler disassembles only those machine codes which can be
+**   generated by ArmToAarch64Assembler. It has a unit testbench which
+**   tests all the instructions supported by the disassembler.
+**
+** ------------------------------------------------------------------
+** ARMAssembler/ARMAssemblerInterface/ARMAssemblerProxy changes
+** ------------------------------------------------------------------
+**
+** - In existing code, addresses were being handled as 32 bit values at
+**   certain places.
+**
+** - Added a new set of functions for address load/store/manipulation.
+**   These are ADDR_LDR, ADDR_STR, ADDR_ADD, ADDR_SUB and they map to
+**   default 32 bit implementations in ARMAssemblerInterface.
+**
+** - ArmToAarch64Assembler maps these functions to appropriate 64 bit
+**   functions.
+**
+** ----------------------
+** GGLAssembler changes
+** ----------------------
+** - Since ArmToAarch64Assembler can generate 4 Aarch64 instructions for
+**   each call in worst case, the memory required is set to 4 times
+**   ARM memory
+**
+** - Address load/store/manipulation were changed to use new functions
+**   added in the ARMAssemblerInterface.
+**
+*/
+
+
+#define NOT_IMPLEMENTED()  LOG_FATAL("Arm instruction %s not yet implemented\n", __func__)
+
+#define AARCH64_ASM_DEBUG 0
+
+#if AARCH64_ASM_DEBUG
+    #define LOG_INSTR(...) ALOGD("\t" __VA_ARGS__)
+    #define LOG_LABEL(...) ALOGD(__VA_ARGS__)
+#else
+    #define LOG_INSTR(...) ((void)0)
+    #define LOG_LABEL(...) ((void)0)
+#endif
+
+namespace android {
+
+static const char* shift_codes[] =
+{
+    "LSL", "LSR", "ASR", "ROR"
+};
+static const char *cc_codes[] =
+{
+    "EQ", "NE", "CS", "CC", "MI",
+    "PL", "VS", "VC", "HI", "LS",
+    "GE", "LT", "GT", "LE", "AL", "NV"
+};
+
+ArmToAarch64Assembler::ArmToAarch64Assembler(const sp<Assembly>& assembly)
+    :   ARMAssemblerInterface(),
+        mAssembly(assembly)
+{
+    mBase = mPC = (uint32_t *)assembly->base();
+    mDuration = ggl_system_time();
+    mZeroReg = 13;
+    mTmpReg1 = 15;
+    mTmpReg2 = 16;
+    mTmpReg3 = 17;
+}
+
+ArmToAarch64Assembler::ArmToAarch64Assembler(void *base)
+    :   ARMAssemblerInterface(), mAssembly(NULL)
+{
+    mBase = mPC = (uint32_t *)base;
+    mDuration = ggl_system_time();
+    // Regs 13, 15, 16, 17 are used as temporary registers
+    mZeroReg = 13;
+    mTmpReg1 = 15;
+    mTmpReg2 = 16;
+    mTmpReg3 = 17;
+}
+
+ArmToAarch64Assembler::~ArmToAarch64Assembler()
+{
+}
+
+uint32_t* ArmToAarch64Assembler::pc() const
+{
+    return mPC;
+}
+
+uint32_t* ArmToAarch64Assembler::base() const
+{
+    return mBase;
+}
+
+void ArmToAarch64Assembler::reset()
+{
+    if(mAssembly == NULL)
+        mPC = mBase;
+    else
+        mBase = mPC = (uint32_t *)mAssembly->base();
+    mBranchTargets.clear();
+    mLabels.clear();
+    mLabelsInverseMapping.clear();
+    mComments.clear();
+#if AARCH64_ASM_DEBUG
+    ALOGI("RESET\n");
+#endif
+}
+
+int ArmToAarch64Assembler::getCodegenArch()
+{
+    return CODEGEN_ARCH_AARCH64;
+}
+
+// ----------------------------------------------------------------------------
+
+void ArmToAarch64Assembler::disassemble(const char* name)
+{
+    if(name)
+    {
+        printf("%s:\n", name);
+    }
+    size_t count = pc()-base();
+    uint32_t* i = base();
+    while (count--)
+    {
+        ssize_t label = mLabelsInverseMapping.indexOfKey(i);
+        if (label >= 0)
+        {
+            printf("%s:\n", mLabelsInverseMapping.valueAt(label));
+        }
+        ssize_t comment = mComments.indexOfKey(i);
+        if (comment >= 0)
+        {
+            printf("; %s\n", mComments.valueAt(comment));
+        }
+        printf("%p:    %08x    ", i, uint32_t(i[0]));
+        {
+            char instr[256];
+            ::aarch64_disassemble(*i, instr);
+            printf("%s\n", instr);
+        }
+        i++;
+    }
+}
+
+void ArmToAarch64Assembler::comment(const char* string)
+{
+    mComments.add(mPC, string);
+    LOG_INSTR("//%s\n", string);
+}
+
+void ArmToAarch64Assembler::label(const char* theLabel)
+{
+    mLabels.add(theLabel, mPC);
+    mLabelsInverseMapping.add(mPC, theLabel);
+    LOG_LABEL("%s:\n", theLabel);
+}
+
+void ArmToAarch64Assembler::B(int cc, const char* label)
+{
+    mBranchTargets.add(branch_target_t(label, mPC));
+    LOG_INSTR("B%s %s\n", cc_codes[cc], label );
+    *mPC++ = (0x54 << 24) | cc;
+}
+
+void ArmToAarch64Assembler::BL(int cc, const char* label)
+{
+    NOT_IMPLEMENTED(); //Not Required
+}
+
+// ----------------------------------------------------------------------------
+//Prolog/Epilog & Generate...
+// ----------------------------------------------------------------------------
+
+void ArmToAarch64Assembler::prolog()
+{
+    // write prolog code
+    mPrologPC = mPC;
+    *mPC++ = A64_MOVZ_X(mZeroReg,0,0);
+}
+
+void ArmToAarch64Assembler::epilog(uint32_t touched)
+{
+    // write epilog code
+    static const int XLR = 30;
+    *mPC++ = A64_RET(XLR);
+}
+
+int ArmToAarch64Assembler::generate(const char* name)
+{
+    // fixup all the branches
+    size_t count = mBranchTargets.size();
+    while (count--)
+    {
+        const branch_target_t& bt = mBranchTargets[count];
+        uint32_t* target_pc = mLabels.valueFor(bt.label);
+        LOG_ALWAYS_FATAL_IF(!target_pc,
+                "error resolving branch targets, target_pc is null");
+        int32_t offset = int32_t(target_pc - bt.pc);
+        *bt.pc |= (offset & 0x7FFFF) << 5;
+    }
+
+    if(mAssembly != NULL)
+        mAssembly->resize( int(pc()-base())*4 );
+
+    // the instruction cache is flushed by CodeCache
+    const int64_t duration = ggl_system_time() - mDuration;
+    const char * const format = "generated %s (%d ins) at [%p:%p] in %ld ns\n";
+    ALOGI(format, name, int(pc()-base()), base(), pc(), duration);
+
+
+    char value[PROPERTY_VALUE_MAX];
+    property_get("debug.pf.disasm", value, "0");
+    if (atoi(value) != 0)
+    {
+        printf(format, name, int(pc()-base()), base(), pc(), duration);
+        disassemble(name);
+    }
+    return NO_ERROR;
+}
+
+uint32_t* ArmToAarch64Assembler::pcForLabel(const char* label)
+{
+    return mLabels.valueFor(label);
+}
+
+// ----------------------------------------------------------------------------
+// Data Processing...
+// ----------------------------------------------------------------------------
+void ArmToAarch64Assembler::dataProcessingCommon(int opcode,
+        int s, int Rd, int Rn, uint32_t Op2)
+{
+    if(opcode != opSUB && s == 1)
+    {
+        NOT_IMPLEMENTED(); //Not required
+        return;
+    }
+
+    if(opcode != opSUB && opcode != opADD && opcode != opAND &&
+       opcode != opORR && opcode != opMVN)
+    {
+        NOT_IMPLEMENTED(); //Not required
+        return;
+    }
+
+    if(Op2 == OPERAND_REG_IMM && mAddrMode.reg_imm_shift > 31)
+        {
+        NOT_IMPLEMENTED();
+        return;
+    }
+
+    //Store immediate in temporary register and convert
+    //immediate operation into register operation
+    if(Op2 == OPERAND_IMM)
+    {
+        int imm = mAddrMode.immediate;
+        *mPC++ = A64_MOVZ_W(mTmpReg2, imm & 0x0000FFFF, 0);
+        *mPC++ = A64_MOVK_W(mTmpReg2, (imm >> 16) & 0x0000FFFF, 16);
+        Op2 = mTmpReg2;
+    }
+
+
+    {
+        uint32_t shift;
+        uint32_t amount;
+        uint32_t Rm;
+
+        if(Op2 == OPERAND_REG_IMM)
+        {
+            shift   = mAddrMode.reg_imm_type;
+            amount  = mAddrMode.reg_imm_shift;
+            Rm      = mAddrMode.reg_imm_Rm;
+        }
+        else if(Op2 < OPERAND_REG)
+        {
+            shift   = 0;
+            amount  = 0;
+            Rm      = Op2;
+        }
+        else
+        {
+            NOT_IMPLEMENTED(); //Not required
+            return;
+        }
+
+        switch(opcode)
+        {
+            case opADD: *mPC++ = A64_ADD_W(Rd, Rn, Rm, shift, amount); break;
+            case opAND: *mPC++ = A64_AND_W(Rd, Rn, Rm, shift, amount); break;
+            case opORR: *mPC++ = A64_ORR_W(Rd, Rn, Rm, shift, amount); break;
+            case opMVN: *mPC++ = A64_ORN_W(Rd, Rn, Rm, shift, amount); break;
+            case opSUB: *mPC++ = A64_SUB_W(Rd, Rn, Rm, shift, amount, s);break;
+        };
+
+    }
+}
+
+void ArmToAarch64Assembler::dataProcessing(int opcode, int cc,
+        int s, int Rd, int Rn, uint32_t Op2)
+{
+    uint32_t Wd;
+
+    if(cc != AL)
+        Wd = mTmpReg1;
+    else
+        Wd = Rd;
+
+    if(opcode == opADD || opcode == opAND || opcode == opORR ||opcode == opSUB)
+    {
+        dataProcessingCommon(opcode, s, Wd, Rn, Op2);
+    }
+    else if(opcode == opCMP)
+    {
+        dataProcessingCommon(opSUB, 1, mTmpReg3, Rn, Op2);
+    }
+    else if(opcode == opRSB)
+    {
+        dataProcessingCommon(opSUB, s, Wd, Rn, Op2);
+        dataProcessingCommon(opSUB, s, Wd, mZeroReg, Wd);
+    }
+    else if(opcode == opMOV)
+    {
+        dataProcessingCommon(opORR, 0, Wd, mZeroReg, Op2);
+        if(s == 1)
+        {
+            dataProcessingCommon(opSUB, 1, mTmpReg3, Wd, mZeroReg);
+        }
+    }
+    else if(opcode == opMVN)
+    {
+        dataProcessingCommon(opMVN, s, Wd, mZeroReg, Op2);
+    }
+    else if(opcode == opBIC)
+    {
+        dataProcessingCommon(opMVN, s, mTmpReg3, mZeroReg, Op2);
+        dataProcessingCommon(opAND, s, Wd, Rn, mTmpReg3);
+    }
+    else
+    {
+        NOT_IMPLEMENTED();
+        return;
+    }
+
+    if(cc != AL)
+    {
+        *mPC++ = A64_CSEL_W(Rd, mTmpReg1, Rd, cc);
+    }
+}
+// ----------------------------------------------------------------------------
+// Address Processing...
+// ----------------------------------------------------------------------------
+
+void ArmToAarch64Assembler::ADDR_ADD(int cc,
+        int s, int Rd, int Rn, uint32_t Op2)
+{
+    if(cc != AL){ NOT_IMPLEMENTED(); return;} //Not required
+    if(s  != 0) { NOT_IMPLEMENTED(); return;} //Not required
+
+
+    if(Op2 == OPERAND_REG_IMM && mAddrMode.reg_imm_type == LSL)
+    {
+        int Rm = mAddrMode.reg_imm_Rm;
+        int amount = mAddrMode.reg_imm_shift;
+        *mPC++ = A64_ADD_X_Wm_SXTW(Rd, Rn, Rm, amount);
+    }
+    else if(Op2 < OPERAND_REG)
+    {
+        int Rm = Op2;
+        int amount = 0;
+        *mPC++ = A64_ADD_X_Wm_SXTW(Rd, Rn, Rm, amount);
+    }
+    else if(Op2 == OPERAND_IMM)
+    {
+        int imm = mAddrMode.immediate;
+        *mPC++ = A64_MOVZ_W(mTmpReg1, imm & 0x0000FFFF, 0);
+        *mPC++ = A64_MOVK_W(mTmpReg1, (imm >> 16) & 0x0000FFFF, 16);
+
+        int Rm = mTmpReg1;
+        int amount = 0;
+        *mPC++ = A64_ADD_X_Wm_SXTW(Rd, Rn, Rm, amount);
+    }
+    else
+    {
+        NOT_IMPLEMENTED(); //Not required
+    }
+}
+
+void ArmToAarch64Assembler::ADDR_SUB(int cc,
+        int s, int Rd, int Rn, uint32_t Op2)
+{
+    if(cc != AL){ NOT_IMPLEMENTED(); return;} //Not required
+    if(s  != 0) { NOT_IMPLEMENTED(); return;} //Not required
+
+    if(Op2 == OPERAND_REG_IMM && mAddrMode.reg_imm_type == LSR)
+    {
+        *mPC++ = A64_ADD_W(mTmpReg1, mZeroReg, mAddrMode.reg_imm_Rm,
+                           LSR, mAddrMode.reg_imm_shift);
+        *mPC++ = A64_SUB_X_Wm_SXTW(Rd, Rn, mTmpReg1, 0);
+    }
+    else
+    {
+        NOT_IMPLEMENTED(); //Not required
+    }
+}
+
+// ----------------------------------------------------------------------------
+// multiply...
+// ----------------------------------------------------------------------------
+void ArmToAarch64Assembler::MLA(int cc, int s,int Rd, int Rm, int Rs, int Rn)
+{
+    if(cc != AL){ NOT_IMPLEMENTED(); return;} //Not required
+
+    *mPC++ = A64_MADD_W(Rd, Rm, Rs, Rn);
+    if(s == 1)
+        dataProcessingCommon(opSUB, 1, mTmpReg1, Rd, mZeroReg);
+}
+void ArmToAarch64Assembler::MUL(int cc, int s, int Rd, int Rm, int Rs)
+{
+    if(cc != AL){ NOT_IMPLEMENTED(); return;} //Not required
+    if(s  != 0) { NOT_IMPLEMENTED(); return;} //Not required
+    *mPC++ = A64_MADD_W(Rd, Rm, Rs, mZeroReg);
+}
+void ArmToAarch64Assembler::UMULL(int cc, int s,
+        int RdLo, int RdHi, int Rm, int Rs)
+{
+    NOT_IMPLEMENTED(); //Not required
+}
+void ArmToAarch64Assembler::UMUAL(int cc, int s,
+        int RdLo, int RdHi, int Rm, int Rs)
+{
+    NOT_IMPLEMENTED(); //Not required
+}
+void ArmToAarch64Assembler::SMULL(int cc, int s,
+        int RdLo, int RdHi, int Rm, int Rs)
+{
+    NOT_IMPLEMENTED(); //Not required
+}
+void ArmToAarch64Assembler::SMUAL(int cc, int s,
+        int RdLo, int RdHi, int Rm, int Rs)
+{
+    NOT_IMPLEMENTED(); //Not required
+}
+
+// ----------------------------------------------------------------------------
+// branches relative to PC...
+// ----------------------------------------------------------------------------
+void ArmToAarch64Assembler::B(int cc, uint32_t* pc){
+    NOT_IMPLEMENTED(); //Not required
+}
+
+void ArmToAarch64Assembler::BL(int cc, uint32_t* pc){
+    NOT_IMPLEMENTED(); //Not required
+}
+
+void ArmToAarch64Assembler::BX(int cc, int Rn){
+    NOT_IMPLEMENTED(); //Not required
+}
+
+// ----------------------------------------------------------------------------
+// data transfer...
+// ----------------------------------------------------------------------------
+enum dataTransferOp
+{
+    opLDR,opLDRB,opLDRH,opSTR,opSTRB,opSTRH
+};
+
+void ArmToAarch64Assembler::dataTransfer(int op, int cc,
+                            int Rd, int Rn, uint32_t op_type, uint32_t size)
+{
+    const int XSP = 31;
+    if(Rn == SP)
+        Rn = XSP;
+
+    if(op_type == OPERAND_IMM)
+    {
+        int addrReg;
+        int imm = mAddrMode.immediate;
+        if(imm >= 0 && imm < (1<<12))
+            *mPC++ = A64_ADD_IMM_X(mTmpReg1, mZeroReg, imm, 0);
+        else if(imm < 0 && -imm < (1<<12))
+            *mPC++ = A64_SUB_IMM_X(mTmpReg1, mZeroReg, -imm, 0);
+        else
+        {
+            NOT_IMPLEMENTED();
+            return;
+        }
+
+        addrReg = Rn;
+        if(mAddrMode.preindex == true || mAddrMode.postindex == true)
+        {
+            *mPC++ = A64_ADD_X(mTmpReg2, addrReg, mTmpReg1);
+            if(mAddrMode.preindex == true)
+                addrReg = mTmpReg2;
+        }
+
+        if(cc != AL)
+            *mPC++ = A64_B_COND(cc^1, 8);
+
+        *mPC++ = A64_LDRSTR_Wm_SXTW_0(op, size, Rd, addrReg, mZeroReg);
+
+        if(mAddrMode.writeback == true)
+            *mPC++ = A64_CSEL_X(Rn, mTmpReg2, Rn, cc);
+    }
+    else if(op_type == OPERAND_REG_OFFSET)
+    {
+        if(cc != AL)
+            *mPC++ = A64_B_COND(cc^1, 8);
+        *mPC++ = A64_LDRSTR_Wm_SXTW_0(op, size, Rd, Rn, mAddrMode.reg_offset);
+
+    }
+    else if(op_type > OPERAND_UNSUPPORTED)
+    {
+        if(cc != AL)
+            *mPC++ = A64_B_COND(cc^1, 8);
+        *mPC++ = A64_LDRSTR_Wm_SXTW_0(op, size, Rd, Rn, mZeroReg);
+    }
+    else
+    {
+        NOT_IMPLEMENTED(); // Not required
+    }
+    return;
+
+}
+void ArmToAarch64Assembler::ADDR_LDR(int cc, int Rd, int Rn, uint32_t op_type)
+{
+    return dataTransfer(opLDR, cc, Rd, Rn, op_type, 64);
+}
+void ArmToAarch64Assembler::ADDR_STR(int cc, int Rd, int Rn, uint32_t op_type)
+{
+    return dataTransfer(opSTR, cc, Rd, Rn, op_type, 64);
+}
+void ArmToAarch64Assembler::LDR(int cc, int Rd, int Rn, uint32_t op_type)
+{
+    return dataTransfer(opLDR, cc, Rd, Rn, op_type);
+}
+void ArmToAarch64Assembler::LDRB(int cc, int Rd, int Rn, uint32_t op_type)
+{
+    return dataTransfer(opLDRB, cc, Rd, Rn, op_type);
+}
+void ArmToAarch64Assembler::STR(int cc, int Rd, int Rn, uint32_t op_type)
+{
+    return dataTransfer(opSTR, cc, Rd, Rn, op_type);
+}
+
+void ArmToAarch64Assembler::STRB(int cc, int Rd, int Rn, uint32_t op_type)
+{
+    return dataTransfer(opSTRB, cc, Rd, Rn, op_type);
+}
+
+void ArmToAarch64Assembler::LDRH(int cc, int Rd, int Rn, uint32_t op_type)
+{
+    return dataTransfer(opLDRH, cc, Rd, Rn, op_type);
+}
+void ArmToAarch64Assembler::LDRSB(int cc, int Rd, int Rn, uint32_t offset)
+{
+    NOT_IMPLEMENTED(); //Not required
+}
+void ArmToAarch64Assembler::LDRSH(int cc, int Rd, int Rn, uint32_t offset)
+{
+    NOT_IMPLEMENTED(); //Not required
+}
+
+void ArmToAarch64Assembler::STRH(int cc, int Rd, int Rn, uint32_t op_type)
+{
+    return dataTransfer(opSTRH, cc, Rd, Rn, op_type);
+}
+
+// ----------------------------------------------------------------------------
+// block data transfer...
+// ----------------------------------------------------------------------------
+void ArmToAarch64Assembler::LDM(int cc, int dir,
+        int Rn, int W, uint32_t reg_list)
+{
+    const int XSP = 31;
+    if(cc != AL || dir != IA || W == 0 || Rn != SP)
+    {
+        NOT_IMPLEMENTED();
+        return;
+    }
+
+    for(int i = 0; i < 32; ++i)
+    {
+        if((reg_list & (1 << i)))
+        {
+            int reg = i;
+            int size = 16;
+            *mPC++ = A64_LDR_IMM_PostIndex(reg, XSP, size);
+        }
+    }
+}
+
+void ArmToAarch64Assembler::STM(int cc, int dir,
+        int Rn, int W, uint32_t reg_list)
+{
+    const int XSP = 31;
+    if(cc != AL || dir != DB || W == 0 || Rn != SP)
+    {
+        NOT_IMPLEMENTED();
+        return;
+    }
+
+    for(int i = 31; i >= 0; --i)
+    {
+        if((reg_list & (1 << i)))
+        {
+            int size = -16;
+            int reg  = i;
+            *mPC++ = A64_STR_IMM_PreIndex(reg, XSP, size);
+        }
+    }
+}
+
+// ----------------------------------------------------------------------------
+// special...
+// ----------------------------------------------------------------------------
+void ArmToAarch64Assembler::SWP(int cc, int Rn, int Rd, int Rm)
+{
+    NOT_IMPLEMENTED(); //Not required
+}
+void ArmToAarch64Assembler::SWPB(int cc, int Rn, int Rd, int Rm)
+{
+    NOT_IMPLEMENTED(); //Not required
+}
+void ArmToAarch64Assembler::SWI(int cc, uint32_t comment)
+{
+    NOT_IMPLEMENTED(); //Not required
+}
+
+// ----------------------------------------------------------------------------
+// DSP instructions...
+// ----------------------------------------------------------------------------
+void ArmToAarch64Assembler::PLD(int Rn, uint32_t offset) {
+    NOT_IMPLEMENTED(); //Not required
+}
+
+void ArmToAarch64Assembler::CLZ(int cc, int Rd, int Rm)
+{
+    NOT_IMPLEMENTED(); //Not required
+}
+
+void ArmToAarch64Assembler::QADD(int cc,  int Rd, int Rm, int Rn)
+{
+    NOT_IMPLEMENTED(); //Not required
+}
+
+void ArmToAarch64Assembler::QDADD(int cc,  int Rd, int Rm, int Rn)
+{
+    NOT_IMPLEMENTED(); //Not required
+}
+
+void ArmToAarch64Assembler::QSUB(int cc,  int Rd, int Rm, int Rn)
+{
+    NOT_IMPLEMENTED(); //Not required
+}
+
+void ArmToAarch64Assembler::QDSUB(int cc,  int Rd, int Rm, int Rn)
+{
+    NOT_IMPLEMENTED(); //Not required
+}
+
+// ----------------------------------------------------------------------------
+// 16 x 16 multiplication
+// ----------------------------------------------------------------------------
+void ArmToAarch64Assembler::SMUL(int cc, int xy,
+                int Rd, int Rm, int Rs)
+{
+    if(cc != AL){ NOT_IMPLEMENTED(); return;} //Not required
+
+    if (xy & xyTB)
+        *mPC++ = A64_SBFM_W(mTmpReg1, Rm, 16, 31);
+    else
+        *mPC++ = A64_SBFM_W(mTmpReg1, Rm, 0, 15);
+
+    if (xy & xyBT)
+        *mPC++ = A64_SBFM_W(mTmpReg2, Rs, 16, 31);
+    else
+        *mPC++ = A64_SBFM_W(mTmpReg2, Rs, 0, 15);
+
+    *mPC++ = A64_MADD_W(Rd,mTmpReg1,mTmpReg2, mZeroReg);
+}
+// ----------------------------------------------------------------------------
+// 32 x 16 multiplication
+// ----------------------------------------------------------------------------
+void ArmToAarch64Assembler::SMULW(int cc, int y, int Rd, int Rm, int Rs)
+{
+    if(cc != AL){ NOT_IMPLEMENTED(); return;} //Not required
+
+    if (y & yT)
+        *mPC++ = A64_SBFM_W(mTmpReg1, Rs, 16, 31);
+    else
+        *mPC++ = A64_SBFM_W(mTmpReg1, Rs, 0, 15);
+
+    *mPC++ = A64_SBFM_W(mTmpReg2, Rm, 0, 31);
+    *mPC++ = A64_SMADDL(mTmpReg3,mTmpReg1,mTmpReg2, mZeroReg);
+    *mPC++ = A64_UBFM_X(Rd,mTmpReg3, 16, 47);
+}
+// ----------------------------------------------------------------------------
+// 16 x 16 multiplication and accumulate
+// ----------------------------------------------------------------------------
+void ArmToAarch64Assembler::SMLA(int cc, int xy, int Rd, int Rm, int Rs, int Rn)
+{
+    if(cc != AL){ NOT_IMPLEMENTED(); return;} //Not required
+    if(xy != xyBB) { NOT_IMPLEMENTED(); return;} //Not required
+
+    *mPC++ = A64_SBFM_W(mTmpReg1, Rm, 0, 15);
+    *mPC++ = A64_SBFM_W(mTmpReg2, Rs, 0, 15);
+    *mPC++ = A64_MADD_W(Rd, mTmpReg1, mTmpReg2, Rn);
+}
+
+void ArmToAarch64Assembler::SMLAL(int cc, int xy,
+                int RdHi, int RdLo, int Rs, int Rm)
+{
+    NOT_IMPLEMENTED(); //Not required
+    return;
+}
+
+void ArmToAarch64Assembler::SMLAW(int cc, int y,
+                int Rd, int Rm, int Rs, int Rn)
+{
+    NOT_IMPLEMENTED(); //Not required
+    return;
+}
+
+// ----------------------------------------------------------------------------
+// Byte/half word extract and extend
+// ----------------------------------------------------------------------------
+void ArmToAarch64Assembler::UXTB16(int cc, int Rd, int Rm, int rotate)
+{
+    if(cc != AL){ NOT_IMPLEMENTED(); return;} //Not required
+
+    *mPC++ = A64_EXTR_W(mTmpReg1, Rm, Rm, rotate * 8);
+
+    uint32_t imm = 0x00FF00FF;
+    *mPC++ = A64_MOVZ_W(mTmpReg2, imm & 0xFFFF, 0);
+    *mPC++ = A64_MOVK_W(mTmpReg2, (imm >> 16) & 0x0000FFFF, 16);
+    *mPC++ = A64_AND_W(Rd,mTmpReg1, mTmpReg2);
+}
+
+// ----------------------------------------------------------------------------
+// Bit manipulation
+// ----------------------------------------------------------------------------
+void ArmToAarch64Assembler::UBFX(int cc, int Rd, int Rn, int lsb, int width)
+{
+    if(cc != AL){ NOT_IMPLEMENTED(); return;} //Not required
+    *mPC++ = A64_UBFM_W(Rd, Rn, lsb, lsb + width - 1);
+}
+// ----------------------------------------------------------------------------
+// Shifters...
+// ----------------------------------------------------------------------------
+int ArmToAarch64Assembler::buildImmediate(
+        uint32_t immediate, uint32_t& rot, uint32_t& imm)
+{
+    rot = 0;
+    imm = immediate;
+    return 0; // Always true
+}
+
+
+bool ArmToAarch64Assembler::isValidImmediate(uint32_t immediate)
+{
+    uint32_t rot, imm;
+    return buildImmediate(immediate, rot, imm) == 0;
+}
+
+uint32_t ArmToAarch64Assembler::imm(uint32_t immediate)
+{
+    mAddrMode.immediate = immediate;
+    mAddrMode.writeback = false;
+    mAddrMode.preindex  = false;
+    mAddrMode.postindex = false;
+    return OPERAND_IMM;
+
+}
+
+uint32_t ArmToAarch64Assembler::reg_imm(int Rm, int type, uint32_t shift)
+{
+    mAddrMode.reg_imm_Rm = Rm;
+    mAddrMode.reg_imm_type = type;
+    mAddrMode.reg_imm_shift = shift;
+    return OPERAND_REG_IMM;
+}
+
+uint32_t ArmToAarch64Assembler::reg_rrx(int Rm)
+{
+    NOT_IMPLEMENTED();
+    return OPERAND_UNSUPPORTED;
+}
+
+uint32_t ArmToAarch64Assembler::reg_reg(int Rm, int type, int Rs)
+{
+    NOT_IMPLEMENTED(); //Not required
+    return OPERAND_UNSUPPORTED;
+}
+// ----------------------------------------------------------------------------
+// Addressing modes...
+// ----------------------------------------------------------------------------
+uint32_t ArmToAarch64Assembler::immed12_pre(int32_t immed12, int W)
+{
+    mAddrMode.immediate = immed12;
+    mAddrMode.writeback = W;
+    mAddrMode.preindex  = true;
+    mAddrMode.postindex = false;
+    return OPERAND_IMM;
+}
+
+uint32_t ArmToAarch64Assembler::immed12_post(int32_t immed12)
+{
+    mAddrMode.immediate = immed12;
+    mAddrMode.writeback = true;
+    mAddrMode.preindex  = false;
+    mAddrMode.postindex = true;
+    return OPERAND_IMM;
+}
+
+uint32_t ArmToAarch64Assembler::reg_scale_pre(int Rm, int type,
+        uint32_t shift, int W)
+{
+    if(type != 0 || shift != 0 || W != 0)
+    {
+        NOT_IMPLEMENTED(); //Not required
+        return OPERAND_UNSUPPORTED;
+    }
+    else
+    {
+        mAddrMode.reg_offset = Rm;
+        return OPERAND_REG_OFFSET;
+    }
+}
+
+uint32_t ArmToAarch64Assembler::reg_scale_post(int Rm, int type, uint32_t shift)
+{
+    NOT_IMPLEMENTED(); //Not required
+    return OPERAND_UNSUPPORTED;
+}
+
+uint32_t ArmToAarch64Assembler::immed8_pre(int32_t immed8, int W)
+{
+    mAddrMode.immediate = immed8;
+    mAddrMode.writeback = W;
+    mAddrMode.preindex  = true;
+    mAddrMode.postindex = false;
+    return OPERAND_IMM;
+}
+
+uint32_t ArmToAarch64Assembler::immed8_post(int32_t immed8)
+{
+    mAddrMode.immediate = immed8;
+    mAddrMode.writeback = true;
+    mAddrMode.preindex  = false;
+    mAddrMode.postindex = true;
+    return OPERAND_IMM;
+}
+
+uint32_t ArmToAarch64Assembler::reg_pre(int Rm, int W)
+{
+    if(W != 0)
+    {
+        NOT_IMPLEMENTED(); //Not required
+        return OPERAND_UNSUPPORTED;
+    }
+    else
+    {
+        mAddrMode.reg_offset = Rm;
+        return OPERAND_REG_OFFSET;
+    }
+}
+
+uint32_t ArmToAarch64Assembler::reg_post(int Rm)
+{
+    NOT_IMPLEMENTED(); //Not required
+    return OPERAND_UNSUPPORTED;
+}
+
+// ----------------------------------------------------------------------------
+// A64 instructions
+// ----------------------------------------------------------------------------
+
+static const char * dataTransferOpName[] =
+{
+    "LDR","LDRB","LDRH","STR","STRB","STRH"
+};
+
+static const uint32_t dataTransferOpCode [] =
+{
+    ((0xB8u << 24) | (0x3 << 21) | (0x6 << 13) | (0x0 << 12) |(0x1 << 11)),
+    ((0x38u << 24) | (0x3 << 21) | (0x6 << 13) | (0x1 << 12) |(0x1 << 11)),
+    ((0x78u << 24) | (0x3 << 21) | (0x6 << 13) | (0x0 << 12) |(0x1 << 11)),
+    ((0xB8u << 24) | (0x1 << 21) | (0x6 << 13) | (0x0 << 12) |(0x1 << 11)),
+    ((0x38u << 24) | (0x1 << 21) | (0x6 << 13) | (0x1 << 12) |(0x1 << 11)),
+    ((0x78u << 24) | (0x1 << 21) | (0x6 << 13) | (0x0 << 12) |(0x1 << 11))
+};
+uint32_t ArmToAarch64Assembler::A64_LDRSTR_Wm_SXTW_0(uint32_t op,
+                            uint32_t size, uint32_t Rt,
+                            uint32_t Rn, uint32_t Rm)
+{
+    if(size == 32)
+    {
+        LOG_INSTR("%s W%d, [X%d, W%d, SXTW #0]\n",
+                   dataTransferOpName[op], Rt, Rn, Rm);
+        return(dataTransferOpCode[op] | (Rm << 16) | (Rn << 5) | Rt);
+    }
+    else
+    {
+        LOG_INSTR("%s X%d, [X%d, W%d, SXTW #0]\n",
+                  dataTransferOpName[op], Rt, Rn, Rm);
+        return(dataTransferOpCode[op] | (0x1<<30) | (Rm<<16) | (Rn<<5)|Rt);
+    }
+}
+
+uint32_t ArmToAarch64Assembler::A64_STR_IMM_PreIndex(uint32_t Rt,
+                            uint32_t Rn, int32_t simm)
+{
+    if(Rn == 31)
+        LOG_INSTR("STR W%d, [SP, #%d]!\n", Rt, simm);
+    else
+        LOG_INSTR("STR W%d, [X%d, #%d]!\n", Rt, Rn, simm);
+
+    uint32_t imm9 = (unsigned)(simm) & 0x01FF;
+    return (0xB8 << 24) | (imm9 << 12) | (0x3 << 10) | (Rn << 5) | Rt;
+}
+
+uint32_t ArmToAarch64Assembler::A64_LDR_IMM_PostIndex(uint32_t Rt,
+                            uint32_t Rn, int32_t simm)
+{
+    if(Rn == 31)
+        LOG_INSTR("LDR W%d, [SP], #%d\n",Rt,simm);
+    else
+        LOG_INSTR("LDR W%d, [X%d], #%d\n",Rt, Rn, simm);
+
+    uint32_t imm9 = (unsigned)(simm) & 0x01FF;
+    return (0xB8 << 24) | (0x1 << 22) |
+             (imm9 << 12) | (0x1 << 10) | (Rn << 5) | Rt;
+
+}
+uint32_t ArmToAarch64Assembler::A64_ADD_X_Wm_SXTW(uint32_t Rd,
+                               uint32_t Rn,
+                               uint32_t Rm,
+                               uint32_t amount)
+{
+    LOG_INSTR("ADD X%d, X%d, W%d, SXTW #%d\n", Rd, Rn, Rm, amount);
+    return ((0x8B << 24) | (0x1 << 21) |(Rm << 16) |
+              (0x6 << 13) | (amount << 10) | (Rn << 5) | Rd);
+
+}
+
+uint32_t ArmToAarch64Assembler::A64_SUB_X_Wm_SXTW(uint32_t Rd,
+                               uint32_t Rn,
+                               uint32_t Rm,
+                               uint32_t amount)
+{
+    LOG_INSTR("SUB X%d, X%d, W%d, SXTW #%d\n", Rd, Rn, Rm, amount);
+    return ((0xCB << 24) | (0x1 << 21) |(Rm << 16) |
+            (0x6 << 13) | (amount << 10) | (Rn << 5) | Rd);
+
+}
+
+uint32_t ArmToAarch64Assembler::A64_B_COND(uint32_t cc, uint32_t offset)
+{
+    LOG_INSTR("B.%s #.+%d\n", cc_codes[cc], offset);
+    return (0x54 << 24) | ((offset/4) << 5) | (cc);
+
+}
+uint32_t ArmToAarch64Assembler::A64_ADD_X(uint32_t Rd, uint32_t Rn,
+                                          uint32_t Rm, uint32_t shift,
+                                          uint32_t amount)
+{
+    LOG_INSTR("ADD X%d, X%d, X%d, %s #%d\n",
+               Rd, Rn, Rm, shift_codes[shift], amount);
+    return ((0x8B << 24) | (shift << 22) | ( Rm << 16) |
+            (amount << 10) |(Rn << 5) | Rd);
+}
+uint32_t ArmToAarch64Assembler::A64_ADD_IMM_X(uint32_t Rd, uint32_t Rn,
+                                          uint32_t imm, uint32_t shift)
+{
+    LOG_INSTR("ADD X%d, X%d, #%d, LSL #%d\n", Rd, Rn, imm, shift);
+    return (0x91 << 24) | ((shift/12) << 22) | (imm << 10) | (Rn << 5) | Rd;
+}
+
+uint32_t ArmToAarch64Assembler::A64_SUB_IMM_X(uint32_t Rd, uint32_t Rn,
+                                          uint32_t imm, uint32_t shift)
+{
+    LOG_INSTR("SUB X%d, X%d, #%d, LSL #%d\n", Rd, Rn, imm, shift);
+    return (0xD1 << 24) | ((shift/12) << 22) | (imm << 10) | (Rn << 5) | Rd;
+}
+
+uint32_t ArmToAarch64Assembler::A64_ADD_W(uint32_t Rd, uint32_t Rn,
+                                          uint32_t Rm, uint32_t shift,
+                                          uint32_t amount)
+{
+    LOG_INSTR("ADD W%d, W%d, W%d, %s #%d\n",
+               Rd, Rn, Rm, shift_codes[shift], amount);
+    return ((0x0B << 24) | (shift << 22) | ( Rm << 16) |
+            (amount << 10) |(Rn << 5) | Rd);
+}
+
+uint32_t ArmToAarch64Assembler::A64_SUB_W(uint32_t Rd, uint32_t Rn,
+                                          uint32_t Rm, uint32_t shift,
+                                          uint32_t amount,
+                                          uint32_t setflag)
+{
+    if(setflag == 0)
+    {
+        LOG_INSTR("SUB W%d, W%d, W%d, %s #%d\n",
+               Rd, Rn, Rm, shift_codes[shift], amount);
+        return ((0x4B << 24) | (shift << 22) | ( Rm << 16) |
+                (amount << 10) |(Rn << 5) | Rd);
+    }
+    else
+    {
+        LOG_INSTR("SUBS W%d, W%d, W%d, %s #%d\n",
+                   Rd, Rn, Rm, shift_codes[shift], amount);
+        return ((0x6B << 24) | (shift << 22) | ( Rm << 16) |
+                (amount << 10) |(Rn << 5) | Rd);
+    }
+}
+
+uint32_t ArmToAarch64Assembler::A64_AND_W(uint32_t Rd, uint32_t Rn,
+                                          uint32_t Rm, uint32_t shift,
+                                          uint32_t amount)
+{
+    LOG_INSTR("AND W%d, W%d, W%d, %s #%d\n",
+               Rd, Rn, Rm, shift_codes[shift], amount);
+    return ((0x0A << 24) | (shift << 22) | ( Rm << 16) |
+            (amount << 10) |(Rn << 5) | Rd);
+}
+
+uint32_t ArmToAarch64Assembler::A64_ORR_W(uint32_t Rd, uint32_t Rn,
+                                          uint32_t Rm, uint32_t shift,
+                                          uint32_t amount)
+{
+    LOG_INSTR("ORR W%d, W%d, W%d, %s #%d\n",
+               Rd, Rn, Rm, shift_codes[shift], amount);
+    return ((0x2A << 24) | (shift << 22) | ( Rm << 16) |
+            (amount << 10) |(Rn << 5) | Rd);
+}
+
+uint32_t ArmToAarch64Assembler::A64_ORN_W(uint32_t Rd, uint32_t Rn,
+                                          uint32_t Rm, uint32_t shift,
+                                          uint32_t amount)
+{
+    LOG_INSTR("ORN W%d, W%d, W%d, %s #%d\n",
+               Rd, Rn, Rm, shift_codes[shift], amount);
+    return ((0x2A << 24) | (shift << 22) | (0x1 << 21) | ( Rm << 16) |
+            (amount << 10) |(Rn << 5) | Rd);
+}
+
+uint32_t ArmToAarch64Assembler::A64_CSEL_X(uint32_t Rd, uint32_t Rn,
+                                           uint32_t Rm, uint32_t cond)
+{
+    LOG_INSTR("CSEL X%d, X%d, X%d, %s\n", Rd, Rn, Rm, cc_codes[cond]);
+    return ((0x9A << 24)|(0x1 << 23)|(Rm << 16) |(cond << 12)| (Rn << 5) | Rd);
+}
+
+uint32_t ArmToAarch64Assembler::A64_CSEL_W(uint32_t Rd, uint32_t Rn,
+                                           uint32_t Rm, uint32_t cond)
+{
+    LOG_INSTR("CSEL W%d, W%d, W%d, %s\n", Rd, Rn, Rm, cc_codes[cond]);
+    return ((0x1A << 24)|(0x1 << 23)|(Rm << 16) |(cond << 12)| (Rn << 5) | Rd);
+}
+
+uint32_t ArmToAarch64Assembler::A64_RET(uint32_t Rn)
+{
+    LOG_INSTR("RET X%d\n", Rn);
+    return ((0xD6 << 24) | (0x1 << 22) | (0x1F << 16) | (Rn << 5));
+}
+
+uint32_t ArmToAarch64Assembler::A64_MOVZ_X(uint32_t Rd, uint32_t imm,
+                                         uint32_t shift)
+{
+    LOG_INSTR("MOVZ X%d, #0x%x, LSL #%d\n", Rd, imm, shift);
+    return(0xD2 << 24) | (0x1 << 23) | ((shift/16) << 21) |  (imm << 5) | Rd;
+}
+
+uint32_t ArmToAarch64Assembler::A64_MOVK_W(uint32_t Rd, uint32_t imm,
+                                         uint32_t shift)
+{
+    LOG_INSTR("MOVK W%d, #0x%x, LSL #%d\n", Rd, imm, shift);
+    return (0x72 << 24) | (0x1 << 23) | ((shift/16) << 21) | (imm << 5) | Rd;
+}
+
+uint32_t ArmToAarch64Assembler::A64_MOVZ_W(uint32_t Rd, uint32_t imm,
+                                         uint32_t shift)
+{
+    LOG_INSTR("MOVZ W%d, #0x%x, LSL #%d\n", Rd, imm, shift);
+    return(0x52 << 24) | (0x1 << 23) | ((shift/16) << 21) |  (imm << 5) | Rd;
+}
+
+uint32_t ArmToAarch64Assembler::A64_SMADDL(uint32_t Rd, uint32_t Rn,
+                                           uint32_t Rm, uint32_t Ra)
+{
+    LOG_INSTR("SMADDL X%d, W%d, W%d, X%d\n",Rd, Rn, Rm, Ra);
+    return ((0x9B << 24) | (0x1 << 21) | (Rm << 16)|(Ra << 10)|(Rn << 5) | Rd);
+}
+
+uint32_t ArmToAarch64Assembler::A64_MADD_W(uint32_t Rd, uint32_t Rn,
+                                           uint32_t Rm, uint32_t Ra)
+{
+    LOG_INSTR("MADD W%d, W%d, W%d, W%d\n",Rd, Rn, Rm, Ra);
+    return ((0x1B << 24) | (Rm << 16) | (Ra << 10) |(Rn << 5) | Rd);
+}
+
+uint32_t ArmToAarch64Assembler::A64_SBFM_W(uint32_t Rd, uint32_t Rn,
+                                           uint32_t immr, uint32_t imms)
+{
+    LOG_INSTR("SBFM W%d, W%d, #%d, #%d\n", Rd, Rn, immr, imms);
+    return ((0x13 << 24) | (immr << 16) | (imms << 10) | (Rn << 5) | Rd);
+
+}
+uint32_t ArmToAarch64Assembler::A64_UBFM_W(uint32_t Rd, uint32_t Rn,
+                                           uint32_t immr, uint32_t imms)
+{
+    LOG_INSTR("UBFM W%d, W%d, #%d, #%d\n", Rd, Rn, immr, imms);
+    return ((0x53 << 24) | (immr << 16) | (imms << 10) | (Rn << 5) | Rd);
+
+}
+uint32_t ArmToAarch64Assembler::A64_UBFM_X(uint32_t Rd, uint32_t Rn,
+                                           uint32_t immr, uint32_t imms)
+{
+    LOG_INSTR("UBFM X%d, X%d, #%d, #%d\n", Rd, Rn, immr, imms);
+    return ((0xD3 << 24) | (0x1 << 22) |
+            (immr << 16) | (imms << 10) | (Rn << 5) | Rd);
+
+}
+uint32_t ArmToAarch64Assembler::A64_EXTR_W(uint32_t Rd, uint32_t Rn,
+                                           uint32_t Rm, uint32_t lsb)
+{
+    LOG_INSTR("EXTR W%d, W%d, W%d, #%d\n", Rd, Rn, Rm, lsb);
+    return (0x13 << 24)|(0x1 << 23) | (Rm << 16) | (lsb << 10)|(Rn << 5) | Rd;
+}
+
+}; // namespace android
+
diff --git a/libpixelflinger/codeflinger/Aarch64Assembler.h b/libpixelflinger/codeflinger/Aarch64Assembler.h
new file mode 100644
index 0000000..79c912b
--- /dev/null
+++ b/libpixelflinger/codeflinger/Aarch64Assembler.h
@@ -0,0 +1,290 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef ANDROID_ARMTOAARCH64ASSEMBLER_H
+#define ANDROID_ARMTOAARCH64ASSEMBLER_H
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "tinyutils/Vector.h"
+#include "tinyutils/KeyedVector.h"
+#include "tinyutils/smartpointer.h"
+
+#include "tinyutils/smartpointer.h"
+#include "codeflinger/ARMAssemblerInterface.h"
+#include "codeflinger/CodeCache.h"
+
+namespace android {
+
+// ----------------------------------------------------------------------------
+
+class ArmToAarch64Assembler : public ARMAssemblerInterface
+{
+public:
+                ArmToAarch64Assembler(const sp<Assembly>& assembly);
+                ArmToAarch64Assembler(void *base);
+    virtual     ~ArmToAarch64Assembler();
+
+    uint32_t*   base() const;
+    uint32_t*   pc() const;
+
+
+    void        disassemble(const char* name);
+
+    // ------------------------------------------------------------------------
+    // ARMAssemblerInterface...
+    // ------------------------------------------------------------------------
+
+    virtual void    reset();
+
+    virtual int     generate(const char* name);
+    virtual int     getCodegenArch();
+
+    virtual void    prolog();
+    virtual void    epilog(uint32_t touched);
+    virtual void    comment(const char* string);
+
+
+    // -----------------------------------------------------------------------
+    // shifters and addressing modes
+    // -----------------------------------------------------------------------
+
+    // shifters...
+    virtual bool        isValidImmediate(uint32_t immed);
+    virtual int         buildImmediate(uint32_t i, uint32_t& rot, uint32_t& imm);
+
+    virtual uint32_t    imm(uint32_t immediate);
+    virtual uint32_t    reg_imm(int Rm, int type, uint32_t shift);
+    virtual uint32_t    reg_rrx(int Rm);
+    virtual uint32_t    reg_reg(int Rm, int type, int Rs);
+
+    // addressing modes...
+    virtual uint32_t    immed12_pre(int32_t immed12, int W=0);
+    virtual uint32_t    immed12_post(int32_t immed12);
+    virtual uint32_t    reg_scale_pre(int Rm, int type=0, uint32_t shift=0, int W=0);
+    virtual uint32_t    reg_scale_post(int Rm, int type=0, uint32_t shift=0);
+    virtual uint32_t    immed8_pre(int32_t immed8, int W=0);
+    virtual uint32_t    immed8_post(int32_t immed8);
+    virtual uint32_t    reg_pre(int Rm, int W=0);
+    virtual uint32_t    reg_post(int Rm);
+
+
+    virtual void    dataProcessing(int opcode, int cc, int s,
+                                int Rd, int Rn,
+                                uint32_t Op2);
+    virtual void MLA(int cc, int s,
+                int Rd, int Rm, int Rs, int Rn);
+    virtual void MUL(int cc, int s,
+                int Rd, int Rm, int Rs);
+    virtual void UMULL(int cc, int s,
+                int RdLo, int RdHi, int Rm, int Rs);
+    virtual void UMUAL(int cc, int s,
+                int RdLo, int RdHi, int Rm, int Rs);
+    virtual void SMULL(int cc, int s,
+                int RdLo, int RdHi, int Rm, int Rs);
+    virtual void SMUAL(int cc, int s,
+                int RdLo, int RdHi, int Rm, int Rs);
+
+    virtual void B(int cc, uint32_t* pc);
+    virtual void BL(int cc, uint32_t* pc);
+    virtual void BX(int cc, int Rn);
+    virtual void label(const char* theLabel);
+    virtual void B(int cc, const char* label);
+    virtual void BL(int cc, const char* label);
+
+    virtual uint32_t* pcForLabel(const char* label);
+
+    virtual void ADDR_LDR(int cc, int Rd,
+                int Rn, uint32_t offset = 0);
+    virtual void ADDR_ADD(int cc, int s, int Rd,
+                int Rn, uint32_t Op2);
+    virtual void ADDR_SUB(int cc, int s, int Rd,
+                int Rn, uint32_t Op2);
+    virtual void ADDR_STR (int cc, int Rd,
+                int Rn, uint32_t offset = 0);
+
+    virtual void LDR (int cc, int Rd,
+                int Rn, uint32_t offset = 0);
+    virtual void LDRB(int cc, int Rd,
+                int Rn, uint32_t offset = 0);
+    virtual void STR (int cc, int Rd,
+                int Rn, uint32_t offset = 0);
+    virtual void STRB(int cc, int Rd,
+                int Rn, uint32_t offset = 0);
+    virtual void LDRH (int cc, int Rd,
+                int Rn, uint32_t offset = 0);
+    virtual void LDRSB(int cc, int Rd,
+                int Rn, uint32_t offset = 0);
+    virtual void LDRSH(int cc, int Rd,
+                int Rn, uint32_t offset = 0);
+    virtual void STRH (int cc, int Rd,
+                int Rn, uint32_t offset = 0);
+
+
+    virtual void LDM(int cc, int dir,
+                int Rn, int W, uint32_t reg_list);
+    virtual void STM(int cc, int dir,
+                int Rn, int W, uint32_t reg_list);
+
+    virtual void SWP(int cc, int Rn, int Rd, int Rm);
+    virtual void SWPB(int cc, int Rn, int Rd, int Rm);
+    virtual void SWI(int cc, uint32_t comment);
+
+    virtual void PLD(int Rn, uint32_t offset);
+    virtual void CLZ(int cc, int Rd, int Rm);
+    virtual void QADD(int cc, int Rd, int Rm, int Rn);
+    virtual void QDADD(int cc, int Rd, int Rm, int Rn);
+    virtual void QSUB(int cc, int Rd, int Rm, int Rn);
+    virtual void QDSUB(int cc, int Rd, int Rm, int Rn);
+    virtual void SMUL(int cc, int xy,
+                int Rd, int Rm, int Rs);
+    virtual void SMULW(int cc, int y,
+                int Rd, int Rm, int Rs);
+    virtual void SMLA(int cc, int xy,
+                int Rd, int Rm, int Rs, int Rn);
+    virtual void SMLAL(int cc, int xy,
+                int RdHi, int RdLo, int Rs, int Rm);
+    virtual void SMLAW(int cc, int y,
+                int Rd, int Rm, int Rs, int Rn);
+    virtual void UXTB16(int cc, int Rd, int Rm, int rotate);
+    virtual void UBFX(int cc, int Rd, int Rn, int lsb, int width);
+
+private:
+    ArmToAarch64Assembler(const ArmToAarch64Assembler& rhs);
+    ArmToAarch64Assembler& operator = (const ArmToAarch64Assembler& rhs);
+
+    // -----------------------------------------------------------------------
+    // helper functions
+    // -----------------------------------------------------------------------
+
+    void dataTransfer(int operation, int cc, int Rd, int Rn,
+                      uint32_t operand_type, uint32_t size = 32);
+    void dataProcessingCommon(int opcode, int s,
+                      int Rd, int Rn, uint32_t Op2);
+
+    // -----------------------------------------------------------------------
+    // Aarch64 instructions
+    // -----------------------------------------------------------------------
+    uint32_t A64_B_COND(uint32_t cc, uint32_t offset);
+    uint32_t A64_RET(uint32_t Rn);
+
+    uint32_t A64_LDRSTR_Wm_SXTW_0(uint32_t operation,
+                                uint32_t size, uint32_t Rt,
+                                uint32_t Rn, uint32_t Rm);
+
+    uint32_t A64_STR_IMM_PreIndex(uint32_t Rt, uint32_t Rn, int32_t simm);
+    uint32_t A64_LDR_IMM_PostIndex(uint32_t Rt,uint32_t Rn, int32_t simm);
+
+    uint32_t A64_ADD_X_Wm_SXTW(uint32_t Rd, uint32_t Rn, uint32_t Rm,
+                               uint32_t amount);
+    uint32_t A64_SUB_X_Wm_SXTW(uint32_t Rd, uint32_t Rn, uint32_t Rm,
+                               uint32_t amount);
+
+    uint32_t A64_ADD_IMM_X(uint32_t Rd, uint32_t Rn,
+                           uint32_t imm, uint32_t shift = 0);
+    uint32_t A64_SUB_IMM_X(uint32_t Rd, uint32_t Rn,
+                           uint32_t imm, uint32_t shift = 0);
+
+    uint32_t A64_ADD_X(uint32_t Rd, uint32_t Rn,
+                       uint32_t Rm, uint32_t shift = 0, uint32_t amount = 0);
+    uint32_t A64_ADD_W(uint32_t Rd, uint32_t Rn, uint32_t Rm,
+                       uint32_t shift = 0, uint32_t amount = 0);
+    uint32_t A64_SUB_W(uint32_t Rd, uint32_t Rn, uint32_t Rm,
+                       uint32_t shift = 0, uint32_t amount = 0,
+                       uint32_t setflag = 0);
+    uint32_t A64_AND_W(uint32_t Rd, uint32_t Rn,
+                       uint32_t Rm, uint32_t shift = 0, uint32_t amount = 0);
+    uint32_t A64_ORR_W(uint32_t Rd, uint32_t Rn,
+                       uint32_t Rm, uint32_t shift = 0, uint32_t amount = 0);
+    uint32_t A64_ORN_W(uint32_t Rd, uint32_t Rn,
+                       uint32_t Rm, uint32_t shift = 0, uint32_t amount = 0);
+
+    uint32_t A64_MOVZ_W(uint32_t Rd, uint32_t imm, uint32_t shift);
+    uint32_t A64_MOVZ_X(uint32_t Rd, uint32_t imm, uint32_t shift);
+    uint32_t A64_MOVK_W(uint32_t Rd, uint32_t imm, uint32_t shift);
+
+    uint32_t A64_SMADDL(uint32_t Rd, uint32_t Rn, uint32_t Rm, uint32_t Ra);
+    uint32_t A64_MADD_W(uint32_t Rd, uint32_t Rn, uint32_t Rm, uint32_t Ra);
+
+    uint32_t A64_SBFM_W(uint32_t Rd, uint32_t Rn,
+                        uint32_t immr, uint32_t imms);
+    uint32_t A64_UBFM_W(uint32_t Rd, uint32_t Rn,
+                        uint32_t immr, uint32_t imms);
+    uint32_t A64_UBFM_X(uint32_t Rd, uint32_t Rn,
+                        uint32_t immr, uint32_t imms);
+
+    uint32_t A64_EXTR_W(uint32_t Rd, uint32_t Rn, uint32_t Rm, uint32_t lsb);
+    uint32_t A64_CSEL_X(uint32_t Rd, uint32_t Rn, uint32_t Rm, uint32_t cond);
+    uint32_t A64_CSEL_W(uint32_t Rd, uint32_t Rn, uint32_t Rm, uint32_t cond);
+
+    uint32_t*       mBase;
+    uint32_t*       mPC;
+    uint32_t*       mPrologPC;
+    int64_t         mDuration;
+    uint32_t        mTmpReg1, mTmpReg2, mTmpReg3, mZeroReg;
+
+    struct branch_target_t {
+        inline branch_target_t() : label(0), pc(0) { }
+        inline branch_target_t(const char* l, uint32_t* p)
+            : label(l), pc(p) { }
+        const char* label;
+        uint32_t*   pc;
+    };
+
+    sp<Assembly>    mAssembly;
+    Vector<branch_target_t>                 mBranchTargets;
+    KeyedVector< const char*, uint32_t* >   mLabels;
+    KeyedVector< uint32_t*, const char* >   mLabelsInverseMapping;
+    KeyedVector< uint32_t*, const char* >   mComments;
+
+    enum operand_type_t
+    {
+        OPERAND_REG = 0x20,
+        OPERAND_IMM,
+        OPERAND_REG_IMM,
+        OPERAND_REG_OFFSET,
+        OPERAND_UNSUPPORTED
+    };
+
+    struct addr_mode_t {
+        int32_t         immediate;
+        bool            writeback;
+        bool            preindex;
+        bool            postindex;
+        int32_t         reg_imm_Rm;
+        int32_t         reg_imm_type;
+        uint32_t        reg_imm_shift;
+        int32_t         reg_offset;
+    } mAddrMode;
+
+};
+
+}; // namespace android
+
+#endif //ANDROID_AARCH64ASSEMBLER_H
diff --git a/libpixelflinger/codeflinger/Aarch64Disassembler.cpp b/libpixelflinger/codeflinger/Aarch64Disassembler.cpp
new file mode 100644
index 0000000..4bb97b4
--- /dev/null
+++ b/libpixelflinger/codeflinger/Aarch64Disassembler.cpp
@@ -0,0 +1,316 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <inttypes.h>
+#include <string.h>
+
+struct disasm_table_entry_t
+{
+    uint32_t       mask;
+    uint32_t       value;
+    const char*    instr_template;
+};
+
+
+static disasm_table_entry_t disasm_table[] =
+{
+    {0xff000000, 0x91000000, "add <xd|sp>, <xn|sp>, #<imm1>, <shift1>"},
+    {0xff000000, 0xd1000000, "sub <xd|sp>, <xn|sp>, #<imm1>, <shift1>"},
+    {0xff200000, 0x8b000000, "add <xd>, <xn>, <xm>, <shift2> #<amt1>"},
+    {0xff200000, 0x0b000000, "add <wd>, <wn>, <wm>, <shift2> #<amt1>"},
+    {0xff200000, 0x4b000000, "sub <wd>, <wn>, <wm>, <shift2> #<amt1>"},
+    {0xff200000, 0x6b000000, "subs <wd>, <wn>, <wm>, <shift2> #<amt1>"},
+    {0xff200000, 0x0a000000, "and <wd>, <wn>, <wm>, <shift2> #<amt1>"},
+    {0xff200000, 0x2a000000, "orr <wd>, <wn>, <wm>, <shift2> #<amt1>"},
+    {0xff200000, 0x2a200000, "orn <wd>, <wn>, <wm>, <shift2> #<amt1>"},
+    {0xff800000, 0x72800000, "movk <wd>, #<imm2>, lsl #<shift3>"},
+    {0xff800000, 0x52800000, "movz <wd>, #<imm2>, lsl #<shift3>"},
+    {0xff800000, 0xd2800000, "movz <xd>, #<imm2>, lsl #<shift3>"},
+    {0xffe00c00, 0x1a800000, "csel <wd>, <wn>, <wm>, <cond1>"},
+    {0xffe00c00, 0x9a800000, "csel <xd>, <xn>, <xm>, <cond1>"},
+    {0xffe00c00, 0x5a800000, "csinv <wd>, <wn>, <wm>, <cond1>"},
+    {0xffe08000, 0x1b000000, "madd <wd>, <wn>, <wm>, <wa>"},
+    {0xffe08000, 0x9b200000, "smaddl <xd>, <wn>, <wm>, <xa>"},
+    {0xffe04c00, 0xb8604800, "ldr <wt>, [<xn|sp>, <r1><m1>, <ext1> #<amt2>]"},
+    {0xffe04c00, 0xb8204800, "str <wt>, [<xn|sp>, <r1><m1>, <ext1> #<amt2>]"},
+    {0xffe04c00, 0xf8604800, "ldr <xt>, [<xn|sp>, <r1><m1>, <ext1> #<amt3>]"},
+    {0xffe04c00, 0xf8204800, "str <xt>, [<xn|sp>, <r1><m1>, <ext1> #<amt3>]"},
+    {0xffe04c00, 0x38604800, "ldrb <wt>, [<xn|sp>, <r1><m1>, <ext1> <amt5>]"},
+    {0xffe04c00, 0x38204800, "strb <wt>, [<xn|sp>, <r1><m1>, <ext1> <amt5>]"},
+    {0xffe04c00, 0x78604800, "ldrh <wt>, [<xn|sp>, <r1><m1>, <ext1> #<amt6>]"},
+    {0xffe04c00, 0x78204800, "strh <wt>, [<xn|sp>, <r1><m1>, <ext1> #<amt6>]"},
+    {0xffe00c00, 0xb8400400, "ldr <wt>, [<xn|sp>], #<simm1>"},
+    {0xffe00c00, 0xb8000c00, "str <wt>, [<xn|sp>, #<simm1>]!"},
+    {0xffc00000, 0x13000000, "sbfm <wd>, <wn>, #<immr1>, #<imms1>"},
+    {0xffc00000, 0x53000000, "ubfm <wd>, <wn>, #<immr1>, #<imms1>"},
+    {0xffc00000, 0xd3400000, "ubfm <xd>, <xn>, #<immr1>, #<imms1>"},
+    {0xffe00000, 0x13800000, "extr <wd>, <wn>, <wm>, #<lsb1>"},
+    {0xff000000, 0x54000000, "b.<cond2> <label1>"},
+    {0xfffffc1f, 0xd65f0000, "ret <xn>"},
+    {0xffe00000, 0x8b200000, "add <xd|sp>, <xn|sp>, <r2><m1>, <ext2> #<amt4>"},
+    {0xffe00000, 0xcb200000, "sub <xd|sp>, <xn|sp>, <r2><m1>, <ext2> #<amt4>"}
+};
+
+static int32_t bits_signed(uint32_t instr, uint32_t msb, uint32_t lsb)
+{
+    int32_t value;
+    value   = ((int32_t)instr) << (31 - msb);
+    value >>= (31 - msb);
+    value >>= lsb;
+    return value;
+}
+static uint32_t bits_unsigned(uint32_t instr, uint32_t msb, uint32_t lsb)
+{
+    uint32_t width = msb - lsb + 1;
+    uint32_t mask  = (1 << width) - 1;
+    return ((instr >> lsb) & mask);
+}
+
+static void get_token(const char *instr, uint32_t index, char *token)
+{
+    uint32_t i, j;
+    for(i = index, j = 0; i < strlen(instr); ++i)
+    {
+        if(instr[index] == '<' && instr[i] == '>')
+        {
+            token[j++] = instr[i];
+            break;
+        }
+        else if(instr[index] != '<' && instr[i] == '<')
+        {
+            break;
+        }
+        else
+        {
+            token[j++] = instr[i];
+        }
+    }
+    token[j] = '\0';
+    return;
+}
+
+
+static const char * token_cc_table[] =
+{
+    "eq", "ne", "cs", "cc", "mi",
+    "pl", "vs", "vc", "hi", "ls",
+    "ge", "lt", "gt", "le", "al", "nv"
+};
+
+static void decode_rx_zr_token(uint32_t reg, const char *prefix, char *instr_part)
+{
+    if(reg == 31)
+        sprintf(instr_part, "%s%s", prefix, "zr");
+    else
+        sprintf(instr_part, "%s%d", prefix, reg);
+}
+
+static void decode_token(uint32_t code, char *token, char *instr_part)
+{
+    if(strcmp(token, "<imm1>") == 0)
+        sprintf(instr_part, "0x%x", bits_unsigned(code, 21,10));
+    else if(strcmp(token, "<imm2>") == 0)
+        sprintf(instr_part, "0x%x", bits_unsigned(code, 20,5));
+    else if(strcmp(token, "<shift1>") == 0)
+        sprintf(instr_part, "lsl #%d", bits_unsigned(code, 23,22) * 12);
+    else if(strcmp(token, "<shift2>") == 0)
+    {
+        static const char * shift2_table[] = { "lsl", "lsr", "asr", "ror"};
+        sprintf(instr_part, "%s", shift2_table[bits_unsigned(code, 23,22)]);
+    }
+    else if(strcmp(token, "<shift3>") == 0)
+        sprintf(instr_part, "%d", bits_unsigned(code, 22,21) * 16);
+    else if(strcmp(token, "<amt1>") == 0)
+        sprintf(instr_part, "%d", bits_unsigned(code, 15,10));
+    else if(strcmp(token, "<amt2>") == 0)
+        sprintf(instr_part, "%d", bits_unsigned(code, 12,12) * 2);
+    else if(strcmp(token, "<amt3>") == 0)
+        sprintf(instr_part, "%d", bits_unsigned(code, 12,12) * 3);
+    else if(strcmp(token, "<amt4>") == 0)
+        sprintf(instr_part, "%d", bits_unsigned(code, 12,10));
+    else if(strcmp(token, "<amt5>") == 0)
+    {
+        static const char * amt5_table[] = {"", "#0"};
+        sprintf(instr_part, "%s", amt5_table[bits_unsigned(code, 12,12)]);
+    }
+    else if(strcmp(token, "<amt6>") == 0)
+        sprintf(instr_part, "%d", bits_unsigned(code, 12,12));
+    else if(strcmp(token, "<simm1>") == 0)
+        sprintf(instr_part, "%d", bits_signed(code, 20,12));
+    else if(strcmp(token, "<immr1>") == 0)
+        sprintf(instr_part, "%d", bits_unsigned(code, 21,16));
+    else if(strcmp(token, "<imms1>") == 0)
+        sprintf(instr_part, "%d", bits_unsigned(code, 15,10));
+    else if(strcmp(token, "<lsb1>") == 0)
+        sprintf(instr_part, "%d", bits_unsigned(code, 15,10));
+    else if(strcmp(token, "<cond1>") == 0)
+        sprintf(instr_part, "%s", token_cc_table[bits_unsigned(code, 15,12)]);
+    else if(strcmp(token, "<cond2>") == 0)
+        sprintf(instr_part, "%s", token_cc_table[bits_unsigned(code, 4,0)]);
+    else if(strcmp(token, "<r1>") == 0)
+    {
+        const char * token_r1_table[] =
+        {
+            "reserved", "reserved", "w", "x",
+            "reserved", "reserved", "w", "x"
+        };
+        sprintf(instr_part, "%s", token_r1_table[bits_unsigned(code, 15,13)]);
+    }
+    else if(strcmp(token, "<r2>") == 0)
+    {
+        static const char * token_r2_table[] =
+        {
+                "w","w","w", "x", "w", "w", "w", "x"
+        };
+        sprintf(instr_part, "%s", token_r2_table[bits_unsigned(code, 15,13)]);
+    }
+    else if(strcmp(token, "<m1>") == 0)
+    {
+        uint32_t reg = bits_unsigned(code, 20,16);
+        if(reg == 31)
+            sprintf(instr_part, "%s", "zr");
+        else
+            sprintf(instr_part, "%d", reg);
+    }
+    else if(strcmp(token, "<ext1>") == 0)
+    {
+        static const char * token_ext1_table[] =
+        {
+             "reserved","reserved","uxtw", "lsl",
+             "reserved","reserved", "sxtw", "sxtx"
+        };
+        sprintf(instr_part, "%s", token_ext1_table[bits_unsigned(code, 15,13)]);
+    }
+    else if(strcmp(token, "<ext2>") == 0)
+    {
+        static const char * token_ext2_table[] =
+        {
+                "uxtb","uxth","uxtw","uxtx",
+                "sxtb","sxth","sxtw","sxtx"
+        };
+        sprintf(instr_part, "%s", token_ext2_table[bits_unsigned(code, 15,13)]);
+    }
+    else if (strcmp(token, "<label1>") == 0)
+    {
+        int32_t offset = bits_signed(code, 23,5) * 4;
+        if(offset > 0)
+            sprintf(instr_part, "#.+%d", offset);
+        else
+            sprintf(instr_part, "#.-%d", -offset);
+    }
+    else if (strcmp(token, "<xn|sp>") == 0)
+    {
+        uint32_t reg = bits_unsigned(code, 9, 5);
+        if(reg == 31)
+            sprintf(instr_part, "%s", "sp");
+        else
+            sprintf(instr_part, "x%d", reg);
+    }
+    else if (strcmp(token, "<xd|sp>") == 0)
+    {
+        uint32_t reg = bits_unsigned(code, 4, 0);
+        if(reg == 31)
+            sprintf(instr_part, "%s", "sp");
+        else
+            sprintf(instr_part, "x%d", reg);
+    }
+    else if (strcmp(token, "<xn>") == 0)
+        decode_rx_zr_token(bits_unsigned(code, 9, 5), "x", instr_part);
+    else if (strcmp(token, "<xd>") == 0)
+        decode_rx_zr_token(bits_unsigned(code, 4, 0), "x", instr_part);
+    else if (strcmp(token, "<xm>") == 0)
+        decode_rx_zr_token(bits_unsigned(code, 20, 16), "x", instr_part);
+    else if (strcmp(token, "<xa>") == 0)
+        decode_rx_zr_token(bits_unsigned(code, 14, 10), "x", instr_part);
+    else if (strcmp(token, "<xt>") == 0)
+        decode_rx_zr_token(bits_unsigned(code, 4, 0), "x", instr_part);
+    else if (strcmp(token, "<wn>") == 0)
+        decode_rx_zr_token(bits_unsigned(code, 9, 5), "w", instr_part);
+    else if (strcmp(token, "<wd>") == 0)
+        decode_rx_zr_token(bits_unsigned(code, 4, 0), "w", instr_part);
+    else if (strcmp(token, "<wm>") == 0)
+        decode_rx_zr_token(bits_unsigned(code, 20, 16), "w", instr_part);
+    else if (strcmp(token, "<wa>") == 0)
+        decode_rx_zr_token(bits_unsigned(code, 14, 10), "w", instr_part);
+    else if (strcmp(token, "<wt>") == 0)
+        decode_rx_zr_token(bits_unsigned(code, 4, 0), "w", instr_part);
+    else
+    {
+        sprintf(instr_part, "error");
+    }
+    return;
+}
+
+int aarch64_disassemble(uint32_t code, char* instr)
+{
+    uint32_t i;
+    char token[256];
+    char instr_part[256];
+
+    if(instr == NULL)
+        return -1;
+
+    bool matched = false;
+    disasm_table_entry_t *entry = NULL;
+    for(i = 0; i < sizeof(disasm_table)/sizeof(disasm_table_entry_t); ++i)
+    {
+        entry = &disasm_table[i];
+        if((code & entry->mask) == entry->value)
+        {
+            matched = true;
+            break;
+        }
+    }
+    if(matched == false)
+    {
+        strcpy(instr, "Unknown Instruction");
+        return -1;
+    }
+    else
+    {
+        uint32_t index = 0;
+        uint32_t length = strlen(entry->instr_template);
+        instr[0] = '\0';
+        do
+        {
+            get_token(entry->instr_template, index, token);
+            if(token[0] == '<')
+            {
+                decode_token(code, token, instr_part);
+                strcat(instr, instr_part);
+            }
+            else
+            {
+                strcat(instr, token);
+            }
+            index += strlen(token);
+        }while(index < length);
+        return 0;
+    }
+}
diff --git a/libpixelflinger/codeflinger/Aarch64Disassembler.h b/libpixelflinger/codeflinger/Aarch64Disassembler.h
new file mode 100644
index 0000000..177d692
--- /dev/null
+++ b/libpixelflinger/codeflinger/Aarch64Disassembler.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef ANDROID_AARCH64DISASSEMBLER_H
+#define ANDROID_AARCH64DISASSEMBLER_H
+
+#include <inttypes.h>
+int aarch64_disassemble(uint32_t code, char* instr);
+
+#endif //ANDROID_AARCH64ASSEMBLER_H
diff --git a/libpixelflinger/codeflinger/CodeCache.cpp b/libpixelflinger/codeflinger/CodeCache.cpp
index 58fde7e..4fe30d9 100644
--- a/libpixelflinger/codeflinger/CodeCache.cpp
+++ b/libpixelflinger/codeflinger/CodeCache.cpp
@@ -34,7 +34,7 @@
 
 // ----------------------------------------------------------------------------
 
-#if defined(__arm__)
+#if defined(__arm__) || defined(__aarch64__)
 #include <unistd.h>
 #include <errno.h>
 #endif
@@ -201,7 +201,7 @@
         mCacheInUse += assemblySize;
         mWhen++;
         // synchronize caches...
-#if defined(__arm__) || defined(__mips__)
+#if defined(__arm__) || defined(__mips__) || defined(__aarch64__)
         const long base = long(assembly->base());
         const long curr = base + long(assembly->size());
         err = cacheflush(base, curr, 0);
diff --git a/libpixelflinger/codeflinger/GGLAssembler.cpp b/libpixelflinger/codeflinger/GGLAssembler.cpp
index 725495f..7f088db 100644
--- a/libpixelflinger/codeflinger/GGLAssembler.cpp
+++ b/libpixelflinger/codeflinger/GGLAssembler.cpp
@@ -901,6 +901,10 @@
         AND( AL, 0, d, s, imm(mask) );
         return;
     }
+    else if (getCodegenArch() == CODEGEN_ARCH_AARCH64) {
+        AND( AL, 0, d, s, imm(mask) );
+        return;
+    }
 
     int negative_logic = !isValidImmediate(mask);
     if (negative_logic) {
diff --git a/libpixelflinger/scanline.cpp b/libpixelflinger/scanline.cpp
index 96a71f3..bc774f3 100644
--- a/libpixelflinger/scanline.cpp
+++ b/libpixelflinger/scanline.cpp
@@ -31,8 +31,11 @@
 
 #include "codeflinger/CodeCache.h"
 #include "codeflinger/GGLAssembler.h"
+#if defined(__arm__)
 #include "codeflinger/ARMAssembler.h"
-#if defined(__mips__)
+#elif defined(__aarch64__)
+#include "codeflinger/Aarch64Assembler.h"
+#elif defined(__mips__)
 #include "codeflinger/MIPSAssembler.h"
 #endif
 //#include "codeflinger/ARMAssemblerOptimizer.h"
@@ -52,7 +55,7 @@
 #   define ANDROID_CODEGEN      ANDROID_CODEGEN_GENERATED
 #endif
 
-#if defined(__arm__) || defined(__mips__)
+#if defined(__arm__) || defined(__mips__) || defined(__aarch64__)
 #   define ANDROID_ARM_CODEGEN  1
 #else
 #   define ANDROID_ARM_CODEGEN  0
@@ -68,6 +71,8 @@
 
 #ifdef __mips__
 #define ASSEMBLY_SCRATCH_SIZE   4096
+#elif defined(__aarch64__)
+#define ASSEMBLY_SCRATCH_SIZE   8192
 #else
 #define ASSEMBLY_SCRATCH_SIZE   2048
 #endif
@@ -122,6 +127,9 @@
 extern "C" void scanline_t32cb16_arm(uint16_t *dst, uint32_t *src, size_t ct);
 extern "C" void scanline_col32cb16blend_neon(uint16_t *dst, uint32_t *col, size_t ct);
 extern "C" void scanline_col32cb16blend_arm(uint16_t *dst, uint32_t col, size_t ct);
+#elif defined(__aarch64__)
+extern "C" void scanline_t32cb16blend_aarch64(uint16_t*, uint32_t*, size_t);
+extern "C" void scanline_col32cb16blend_aarch64(uint16_t *dst, uint32_t col, size_t ct);
 #elif defined(__mips__)
 extern "C" void scanline_t32cb16blend_mips(uint16_t*, uint32_t*, size_t);
 #endif
@@ -276,6 +284,8 @@
 
 #if defined(__mips__)
 static CodeCache gCodeCache(32 * 1024);
+#elif defined(__aarch64__)
+static CodeCache gCodeCache(48 * 1024);
 #else
 static CodeCache gCodeCache(12 * 1024);
 #endif
@@ -394,6 +404,8 @@
 #endif
 #if defined(__mips__)
         GGLAssembler assembler( new ArmToMipsAssembler(a) );
+#elif defined(__aarch64__)
+        GGLAssembler assembler( new ArmToAarch64Assembler(a) );
 #endif
         // generate the scanline code for the given needs
         int err = assembler.scanline(c->state.needs, c);
@@ -2085,6 +2097,8 @@
 #else  // defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN
     scanline_col32cb16blend_arm(dst, GGL_RGBA_TO_HOST(c->packed8888), ct);
 #endif // defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN
+#elif ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__aarch64__))
+    scanline_col32cb16blend_aarch64(dst, GGL_RGBA_TO_HOST(c->packed8888), ct);
 #else
     uint32_t s = GGL_RGBA_TO_HOST(c->packed8888);
     int sA = (s>>24);
@@ -2157,7 +2171,7 @@
 
 void scanline_t32cb16blend(context_t* c)
 {
-#if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && (defined(__arm__) || defined(__mips)))
+#if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && (defined(__arm__) || defined(__mips__) || defined(__aarch64__)))
     int32_t x = c->iterators.xl;
     size_t ct = c->iterators.xr - x;
     int32_t y = c->iterators.y;
@@ -2171,7 +2185,9 @@
 
 #ifdef __arm__
     scanline_t32cb16blend_arm(dst, src, ct);
-#else
+#elif defined(__aarch64__)
+    scanline_t32cb16blend_aarch64(dst, src, ct);
+#elif defined(__mips__)
     scanline_t32cb16blend_mips(dst, src, ct);
 #endif
 #else
diff --git a/libpixelflinger/tests/arch-aarch64/Android.mk b/libpixelflinger/tests/arch-aarch64/Android.mk
new file mode 100644
index 0000000..f096491
--- /dev/null
+++ b/libpixelflinger/tests/arch-aarch64/Android.mk
@@ -0,0 +1,3 @@
+ifeq ($(TARGET_ARCH),aarch64)
+include $(all-subdir-makefiles)
+endif
diff --git a/libpixelflinger/tests/arch-aarch64/assembler/Android.mk b/libpixelflinger/tests/arch-aarch64/assembler/Android.mk
new file mode 100644
index 0000000..10e06c4
--- /dev/null
+++ b/libpixelflinger/tests/arch-aarch64/assembler/Android.mk
@@ -0,0 +1,19 @@
+LOCAL_PATH:= $(call my-dir)
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES:= \
+    aarch64_assembler_test.cpp\
+    asm_test_jacket.S
+
+LOCAL_SHARED_LIBRARIES := \
+    libcutils \
+    libpixelflinger
+
+LOCAL_C_INCLUDES := \
+    system/core/libpixelflinger
+
+LOCAL_MODULE:= test-pixelflinger-aarch64-assembler-test
+
+LOCAL_MODULE_TAGS := tests
+
+include $(BUILD_EXECUTABLE)
diff --git a/libpixelflinger/tests/arch-aarch64/assembler/aarch64_assembler_test.cpp b/libpixelflinger/tests/arch-aarch64/assembler/aarch64_assembler_test.cpp
new file mode 100644
index 0000000..d3e57b3
--- /dev/null
+++ b/libpixelflinger/tests/arch-aarch64/assembler/aarch64_assembler_test.cpp
@@ -0,0 +1,782 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include <sys/mman.h>
+#include <cutils/ashmem.h>
+#include <cutils/atomic.h>
+
+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
+
+#include "codeflinger/ARMAssemblerInterface.h"
+#include "codeflinger/Aarch64Assembler.h"
+using namespace android;
+
+#define TESTS_DATAOP_ENABLE             1
+#define TESTS_DATATRANSFER_ENABLE       1
+#define TESTS_LDMSTM_ENABLE             1
+#define TESTS_REG_CORRUPTION_ENABLE     0
+
+void *instrMem;
+uint32_t  instrMemSize = 128 * 1024;
+char     dataMem[8192];
+
+typedef void (*asm_function_t)();
+extern "C" void asm_test_jacket(asm_function_t function,
+                                int64_t regs[], int32_t flags[]);
+
+#define MAX_32BIT (uint32_t)(((uint64_t)1 << 32) - 1)
+const uint32_t NA = 0;
+const uint32_t NUM_REGS = 32;
+const uint32_t NUM_FLAGS = 16;
+
+enum instr_t
+{
+    INSTR_ADD,
+    INSTR_SUB,
+    INSTR_AND,
+    INSTR_ORR,
+    INSTR_RSB,
+    INSTR_BIC,
+    INSTR_CMP,
+    INSTR_MOV,
+    INSTR_MVN,
+    INSTR_MUL,
+    INSTR_MLA,
+    INSTR_SMULBB,
+    INSTR_SMULBT,
+    INSTR_SMULTB,
+    INSTR_SMULTT,
+    INSTR_SMULWB,
+    INSTR_SMULWT,
+    INSTR_SMLABB,
+    INSTR_UXTB16,
+    INSTR_UBFX,
+    INSTR_ADDR_ADD,
+    INSTR_ADDR_SUB,
+    INSTR_LDR,
+    INSTR_LDRB,
+    INSTR_LDRH,
+    INSTR_ADDR_LDR,
+    INSTR_LDM,
+    INSTR_STR,
+    INSTR_STRB,
+    INSTR_STRH,
+    INSTR_ADDR_STR,
+    INSTR_STM
+};
+
+enum shift_t
+{
+    SHIFT_LSL,
+    SHIFT_LSR,
+    SHIFT_ASR,
+    SHIFT_ROR,
+    SHIFT_NONE
+};
+
+enum offset_t
+{
+    REG_SCALE_OFFSET,
+    REG_OFFSET,
+    IMM8_OFFSET,
+    IMM12_OFFSET,
+    NO_OFFSET
+};
+
+enum cond_t
+{
+    EQ, NE, CS, CC, MI, PL, VS, VC, HI, LS, GE, LT, GT, LE, AL, NV,
+    HS = CS,
+    LO = CC
+};
+
+const char * cc_code[] =
+{
+    "EQ", "NE", "CS", "CC", "MI", "PL", "VS", "VC",
+    "HI", "LS","GE","LT", "GT", "LE", "AL", "NV"
+};
+
+
+struct dataOpTest_t
+{
+    uint32_t id;
+    instr_t  op;
+    uint32_t preFlag;
+    cond_t   cond;
+    bool     setFlags;
+    uint64_t RnValue;
+    uint64_t RsValue;
+    bool     immediate;
+    uint32_t immValue;
+    uint64_t RmValue;
+    uint32_t shiftMode;
+    uint32_t shiftAmount;
+    uint64_t RdValue;
+    bool     checkRd;
+    uint64_t postRdValue;
+    bool     checkFlag;
+    uint32_t postFlag;
+};
+
+struct dataTransferTest_t
+{
+    uint32_t id;
+    instr_t op;
+    uint32_t preFlag;
+    cond_t   cond;
+    bool     setMem;
+    uint64_t memOffset;
+    uint64_t memValue;
+    uint64_t RnValue;
+    offset_t offsetType;
+    uint64_t RmValue;
+    uint32_t immValue;
+    bool     writeBack;
+    bool     preIndex;
+    bool     postIndex;
+    uint64_t RdValue;
+    uint64_t postRdValue;
+    uint64_t postRnValue;
+    bool     checkMem;
+    uint64_t postMemOffset;
+    uint32_t postMemLength;
+    uint64_t postMemValue;
+};
+
+
+dataOpTest_t dataOpTests [] =
+{
+     {0xA000,INSTR_ADD,AL,AL,0,1,NA,1,MAX_32BIT ,NA,NA,NA,NA,1,0,0,0},
+     {0xA001,INSTR_ADD,AL,AL,0,1,NA,1,MAX_32BIT -1,NA,NA,NA,NA,1,MAX_32BIT,0,0},
+     {0xA002,INSTR_ADD,AL,AL,0,1,NA,0,NA,MAX_32BIT ,NA,NA,NA,1,0,0,0},
+     {0xA003,INSTR_ADD,AL,AL,0,1,NA,0,NA,MAX_32BIT -1,NA,NA,NA,1,MAX_32BIT,0,0},
+     {0xA004,INSTR_ADD,AL,AL,0,1,NA,0,0,MAX_32BIT ,SHIFT_LSL,0,NA,1,0,0,0},
+     {0xA005,INSTR_ADD,AL,AL,0,1,NA,0,0,MAX_32BIT ,SHIFT_LSL,31,NA,1,0x80000001,0,0},
+     {0xA006,INSTR_ADD,AL,AL,0,1,NA,0,0,3,SHIFT_LSR,1,NA,1,2,0,0},
+     {0xA007,INSTR_ADD,AL,AL,0,1,NA,0,0,MAX_32BIT ,SHIFT_LSR,31,NA,1,2,0,0},
+     {0xA008,INSTR_ADD,AL,AL,0,0,NA,0,0,3,SHIFT_ASR,1,NA,1,1,0,0},
+     {0xA009,INSTR_ADD,AL,AL,0,1,NA,0,0,MAX_32BIT ,SHIFT_ASR,31,NA,1,0,0,0},
+     {0xA010,INSTR_AND,AL,AL,0,1,NA,1,MAX_32BIT ,0,0,0,NA,1,1,0,0},
+     {0xA011,INSTR_AND,AL,AL,0,1,NA,1,MAX_32BIT -1,0,0,0,NA,1,0,0,0},
+     {0xA012,INSTR_AND,AL,AL,0,1,NA,0,0,MAX_32BIT ,0,0,NA,1,1,0,0},
+     {0xA013,INSTR_AND,AL,AL,0,1,NA,0,0,MAX_32BIT -1,0,0,NA,1,0,0,0},
+     {0xA014,INSTR_AND,AL,AL,0,1,NA,0,0,MAX_32BIT ,SHIFT_LSL,0,NA,1,1,0,0},
+     {0xA015,INSTR_AND,AL,AL,0,1,NA,0,0,MAX_32BIT ,SHIFT_LSL,31,NA,1,0,0,0},
+     {0xA016,INSTR_AND,AL,AL,0,1,NA,0,0,3,SHIFT_LSR,1,NA,1,1,0,0},
+     {0xA017,INSTR_AND,AL,AL,0,1,NA,0,0,MAX_32BIT ,SHIFT_LSR,31,NA,1,1,0,0},
+     {0xA018,INSTR_AND,AL,AL,0,0,NA,0,0,3,SHIFT_ASR,1,NA,1,0,0,0},
+     {0xA019,INSTR_AND,AL,AL,0,1,NA,0,0,MAX_32BIT ,SHIFT_ASR,31,NA,1,1,0,0},
+     {0xA020,INSTR_ORR,AL,AL,0,3,NA,1,MAX_32BIT ,0,0,0,NA,1,MAX_32BIT,0,0},
+     {0xA021,INSTR_ORR,AL,AL,0,2,NA,1,MAX_32BIT -1,0,0,0,NA,1,MAX_32BIT-1,0,0},
+     {0xA022,INSTR_ORR,AL,AL,0,3,NA,0,0,MAX_32BIT ,0,0,NA,1,MAX_32BIT,0,0},
+     {0xA023,INSTR_ORR,AL,AL,0,2,NA,0,0,MAX_32BIT -1,0,0,NA,1,MAX_32BIT-1,0,0},
+     {0xA024,INSTR_ORR,AL,AL,0,1,NA,0,0,MAX_32BIT ,SHIFT_LSL,0,NA,1,MAX_32BIT,0,0},
+     {0xA025,INSTR_ORR,AL,AL,0,1,NA,0,0,MAX_32BIT ,SHIFT_LSL,31,NA,1,0x80000001,0,0},
+     {0xA026,INSTR_ORR,AL,AL,0,1,NA,0,0,3,SHIFT_LSR,1,NA,1,1,0,0},
+     {0xA027,INSTR_ORR,AL,AL,0,0,NA,0,0,MAX_32BIT ,SHIFT_LSR,31,NA,1,1,0,0},
+     {0xA028,INSTR_ORR,AL,AL,0,0,NA,0,0,3,SHIFT_ASR,1,NA,1,1,0,0},
+     {0xA029,INSTR_ORR,AL,AL,0,1,NA,0,0,MAX_32BIT ,SHIFT_ASR,31,NA,1,MAX_32BIT ,0,0},
+     {0xA030,INSTR_CMP,AL,AL,1,0x10000,NA,1,0x10000,0,0,0,NA,0,0,1,HS},
+     {0xA031,INSTR_CMP,AL,AL,1,0x00000,NA,1,0x10000,0,0,0,NA,0,0,1,CC},
+     {0xA032,INSTR_CMP,AL,AL,1,0x00000,NA,0,0,0x10000,0,0,NA,0,0,1,LT},
+     {0xA033,INSTR_CMP,AL,AL,1,0x10000,NA,0,0,0x10000,0,0,NA,0,0,1,EQ},
+     {0xA034,INSTR_CMP,AL,AL,1,0x00000,NA,0,0,0x10000,0,0,NA,0,0,1,LS},
+     {0xA035,INSTR_CMP,AL,AL,1,0x10000,NA,0,0,0x10000,0,0,NA,0,0,1,LS},
+     {0xA036,INSTR_CMP,AL,AL,1,0x10000,NA,0,0,0x00000,0,0,NA,0,0,1,HI},
+     {0xA037,INSTR_CMP,AL,AL,1,0x10000,NA,0,0,0x10000,0,0,NA,0,0,1,HS},
+     {0xA038,INSTR_CMP,AL,AL,1,0x10000,NA,0,0,0x00000,0,0,NA,0,0,1,HS},
+     {0xA039,INSTR_CMP,AL,AL,1,0x10000,NA,0,0,0x00000,0,0,NA,0,0,1,NE},
+     {0xA040,INSTR_CMP,AL,AL,1,0,NA,0,0,MAX_32BIT ,SHIFT_LSR,1,NA,0,0,1,LT},
+     {0xA041,INSTR_CMP,AL,AL,1,1,NA,0,0,MAX_32BIT ,SHIFT_LSR,31,NA,0,0,1,EQ},
+     {0xA042,INSTR_CMP,AL,AL,1,0,NA,0,0,0x10000,SHIFT_LSR,31,NA,0,0,1,LS},
+     {0xA043,INSTR_CMP,AL,AL,1,0x10000,NA,0,0,0x30000,SHIFT_LSR,1,NA,0,0,1,LS},
+     {0xA044,INSTR_CMP,AL,AL,1,0x10000,NA,0,0,0x00000,SHIFT_LSR,31,NA,0,0,1,HI},
+     {0xA045,INSTR_CMP,AL,AL,1,1,NA,0,0,MAX_32BIT ,SHIFT_LSR,31,NA,0,0,1,HS},
+     {0xA046,INSTR_CMP,AL,AL,1,0x10000,NA,0,0,0x2000,SHIFT_LSR,1,NA,0,0,1,HS},
+     {0xA047,INSTR_CMP,AL,AL,1,0,NA,0,0,MAX_32BIT ,SHIFT_LSR,1,NA,0,0,1,NE},
+     {0xA048,INSTR_CMP,AL,AL,1,0,NA,0,0,0x10000,SHIFT_ASR,2,NA,0,0,1,LT},
+     {0xA049,INSTR_CMP,AL,AL,1,MAX_32BIT ,NA,0,0,MAX_32BIT ,SHIFT_ASR,1,NA,0,0,1,EQ},
+     {0xA050,INSTR_CMP,AL,AL,1,MAX_32BIT ,NA,0,0,MAX_32BIT ,SHIFT_ASR,31,NA,0,0,1,LS},
+     {0xA051,INSTR_CMP,AL,AL,1,0,NA,0,0,0x10000,SHIFT_ASR,1,NA,0,0,1,LS},
+     {0xA052,INSTR_CMP,AL,AL,1,0x10000,NA,0,0,0x10000,SHIFT_ASR,1,NA,0,0,1,HI},
+     {0xA053,INSTR_CMP,AL,AL,1,1,NA,0,0,0x10000,SHIFT_ASR,31,NA,0,0,1,HS},
+     {0xA054,INSTR_CMP,AL,AL,1,1,NA,0,0,0x10000,SHIFT_ASR,16,NA,0,0,1,HS},
+     {0xA055,INSTR_CMP,AL,AL,1,1,NA,0,0,MAX_32BIT ,SHIFT_ASR,1,NA,0,0,1,NE},
+     {0xA056,INSTR_MUL,AL,AL,0,0,0x10000,0,0,0x10000,0,0,NA,1,0,0,0},
+     {0xA057,INSTR_MUL,AL,AL,0,0,0x1000,0,0,0x10000,0,0,NA,1,0x10000000,0,0},
+     {0xA058,INSTR_MUL,AL,AL,0,0,MAX_32BIT ,0,0,1,0,0,NA,1,MAX_32BIT ,0,0},
+     {0xA059,INSTR_MLA,AL,AL,0,0x10000,0x10000,0,0,0x10000,0,0,NA,1,0x10000,0,0},
+     {0xA060,INSTR_MLA,AL,AL,0,0x10000,0x1000,0,0,0x10000,0,0,NA,1,0x10010000,0,0},
+     {0xA061,INSTR_MLA,AL,AL,1,1,MAX_32BIT ,0,0,1,0,0,NA,1,0,1,PL},
+     {0xA062,INSTR_MLA,AL,AL,1,0,MAX_32BIT ,0,0,1,0,0,NA,1,MAX_32BIT ,1,MI},
+     {0xA063,INSTR_SUB,AL,AL,1,1 << 16,NA,1,1 << 16,NA,NA,NA,NA,1,0,1,PL},
+     {0xA064,INSTR_SUB,AL,AL,1,(1 << 16) + 1,NA,1,1 << 16,NA,NA,NA,NA,1,1,1,PL},
+     {0xA065,INSTR_SUB,AL,AL,1,0,NA,1,1 << 16,NA,NA,NA,NA,1,(uint32_t)(0 - (1<<16)),1,MI},
+     {0xA066,INSTR_SUB,MI,MI,0,2,NA,0,NA,1,NA,NA,2,1,1,0,NA},
+     {0xA067,INSTR_SUB,EQ,MI,0,2,NA,0,NA,1,NA,NA,2,1,2,0,NA},
+     {0xA068,INSTR_SUB,GT,GE,0,2,NA,1,1,NA,NA,NA,2,1,1,0,NA},
+     {0xA069,INSTR_SUB,LT,GE,0,2,NA,1,1,NA,NA,NA,2,1,2,0,NA},
+     {0xA070,INSTR_SUB,CS,HS,0,2,NA,1,1,NA,NA,NA,2,1,1,0,NA},
+     {0xA071,INSTR_SUB,CC,HS,0,2,NA,1,1,NA,NA,NA,2,1,2,0,NA},
+     {0xA072,INSTR_SUB,AL,AL,0,1,NA,1,1 << 16,0,0,0,NA,1,(uint32_t)(1 - (1 << 16)),0,NA},
+     {0xA073,INSTR_SUB,AL,AL,0,MAX_32BIT,NA,1,1,0,0,0,NA,1,MAX_32BIT  - 1,0,NA},
+     {0xA074,INSTR_SUB,AL,AL,0,1,NA,1,1,0,0,0,NA,1,0,0,NA},
+     {0xA075,INSTR_SUB,AL,AL,0,1,NA,0,NA,1 << 16,0,0,NA,1,(uint32_t)(1 - (1 << 16)),0,NA},
+     {0xA076,INSTR_SUB,AL,AL,0,MAX_32BIT,NA,0,NA,1,0,0,NA,1,MAX_32BIT  - 1,0,NA},
+     {0xA077,INSTR_SUB,AL,AL,0,1,NA,0,NA,1,0,0,NA,1,0,0,NA},
+     {0xA078,INSTR_SUB,AL,AL,0,1,NA,0,NA,1,SHIFT_LSL,16,NA,1,(uint32_t)(1 - (1 << 16)),0,NA},
+     {0xA079,INSTR_SUB,AL,AL,0,0x80000001,NA,0,NA,MAX_32BIT ,SHIFT_LSL,31,NA,1,1,0,NA},
+     {0xA080,INSTR_SUB,AL,AL,0,1,NA,0,NA,3,SHIFT_LSR,1,NA,1,0,0,NA},
+     {0xA081,INSTR_SUB,AL,AL,0,1,NA,0,NA,MAX_32BIT ,SHIFT_LSR,31,NA,1,0,0,NA},
+     {0xA082,INSTR_RSB,GT,GE,0,2,NA,1,0,NA,NA,NA,2,1,(uint32_t)-2,0,NA},
+     {0xA083,INSTR_RSB,LT,GE,0,2,NA,1,0,NA,NA,NA,2,1,2,0,NA},
+     {0xA084,INSTR_RSB,AL,AL,0,1,NA,1,1 << 16,NA,NA,NA,NA,1,(1 << 16) - 1,0,NA},
+     {0xA085,INSTR_RSB,AL,AL,0,MAX_32BIT,NA,1,1,NA,NA,NA,NA,1,(uint32_t) (1 - MAX_32BIT),0,NA},
+     {0xA086,INSTR_RSB,AL,AL,0,1,NA,1,1,NA,NA,NA,NA,1,0,0,NA},
+     {0xA087,INSTR_RSB,AL,AL,0,1,NA,0,NA,1 << 16,0,0,NA,1,(1 << 16) - 1,0,NA},
+     {0xA088,INSTR_RSB,AL,AL,0,MAX_32BIT,NA,0,NA,1,0,0,NA,1,(uint32_t) (1 - MAX_32BIT),0,NA},
+     {0xA089,INSTR_RSB,AL,AL,0,1,NA,0,NA,1,0,0,NA,1,0,0,NA},
+     {0xA090,INSTR_RSB,AL,AL,0,1,NA,0,NA,1,SHIFT_LSL,16,NA,1,(1 << 16) - 1,0,NA},
+     {0xA091,INSTR_RSB,AL,AL,0,0x80000001,NA,0,NA,MAX_32BIT ,SHIFT_LSL,31,NA,1,(uint32_t)-1,0,NA},
+     {0xA092,INSTR_RSB,AL,AL,0,1,NA,0,NA,3,SHIFT_LSR,1,NA,1,0,0,NA},
+     {0xA093,INSTR_RSB,AL,AL,0,1,NA,0,NA,MAX_32BIT ,SHIFT_LSR,31,NA,1,0,0,NA},
+     {0xA094,INSTR_MOV,AL,AL,0,NA,NA,1,0x80000001,NA,NA,NA,NA,1,0x80000001,0,0},
+     {0xA095,INSTR_MOV,AL,AL,0,NA,NA,0,0,0x80000001,0,0,NA,1,0x80000001,0,0},
+     {0xA096,INSTR_MOV,AL,AL,0,NA,NA,0,0,MAX_32BIT ,SHIFT_LSL,1,NA,1,MAX_32BIT -1,0,0},
+     {0xA097,INSTR_MOV,AL,AL,0,NA,NA,0,0,MAX_32BIT ,SHIFT_LSL,31,NA,1,0x80000000,0,0},
+     {0xA098,INSTR_MOV,AL,AL,0,NA,NA,0,0,3,SHIFT_LSR,1,NA,1,1,0,0},
+     {0xA099,INSTR_MOV,AL,AL,0,NA,NA,0,0,MAX_32BIT ,SHIFT_LSR,31,NA,1,1,0,0},
+     {0xA100,INSTR_MOV,AL,AL,0,NA,NA,0,0,3,SHIFT_ASR,1,NA,1,1,0,0},
+     {0xA101,INSTR_MOV,AL,AL,0,NA,NA,0,0,MAX_32BIT ,SHIFT_ASR,31,NA,1,MAX_32BIT ,0,0},
+     {0xA102,INSTR_MOV,AL,AL,0,NA,NA,0,0,3,SHIFT_ROR,1,NA,1,0x80000001,0,0},
+     {0xA103,INSTR_MOV,AL,AL,0,NA,NA,0,0,0x80000001,SHIFT_ROR,31,NA,1,3,0,0},
+     {0xA104,INSTR_MOV,AL,AL,1,NA,NA,0,0,MAX_32BIT -1,SHIFT_ASR,1,NA,1,MAX_32BIT,1,MI},
+     {0xA105,INSTR_MOV,AL,AL,1,NA,NA,0,0,3,SHIFT_ASR,1,NA,1,1,1,PL},
+     {0xA106,INSTR_MOV,PL,MI,0,NA,NA,1,0x80000001,NA,NA,NA,2,1,2,0,0},
+     {0xA107,INSTR_MOV,MI,MI,0,NA,NA,0,0,0x80000001,0,0,2,1,0x80000001,0,0},
+     {0xA108,INSTR_MOV,EQ,LT,0,NA,NA,1,0x80000001,NA,NA,NA,2,1,2,0,0},
+     {0xA109,INSTR_MOV,LT,LT,0,NA,NA,1,0x80000001,NA,NA,NA,2,1,0x80000001,0,0},
+     {0xA110,INSTR_MOV,GT,GE,0,NA,NA,0,0,MAX_32BIT ,SHIFT_LSL,1,2,1,MAX_32BIT -1,0,0},
+     {0xA111,INSTR_MOV,EQ,GE,0,NA,NA,0,0,MAX_32BIT ,SHIFT_LSL,31,2,1,0x80000000,0,0},
+     {0xA112,INSTR_MOV,LT,GE,0,NA,NA,0,0,MAX_32BIT ,SHIFT_LSL,31,2,1,2,0,0},
+     {0xA113,INSTR_MOV,GT,LE,0,NA,NA,0,0,MAX_32BIT ,SHIFT_LSL,1,2,1,2,0,0},
+     {0xA114,INSTR_MOV,EQ,LE,0,NA,NA,1,0x80000001,NA,NA,NA,2,1,0x80000001,0,0},
+     {0xA115,INSTR_MOV,LT,LE,0,NA,NA,0,0,MAX_32BIT ,SHIFT_LSL,31,2,1,0x80000000,0,0},
+     {0xA116,INSTR_MOV,EQ,GT,0,NA,NA,1,0x80000001,NA,NA,NA,2,1,2,0,0},
+     {0xA117,INSTR_MOV,GT,GT,0,NA,NA,1,0x80000001,NA,NA,NA,2,1,0x80000001,0,0},
+     {0xA118,INSTR_MOV,LE,GT,0,NA,NA,1,0x80000001,NA,NA,NA,2,1,2,0,0},
+     {0xA119,INSTR_MOV,EQ,GT,0,NA,NA,0,0,0x80000001,0,0,2,1,2,0,0},
+     {0xA120,INSTR_MOV,GT,GT,0,NA,NA,0,0,0x80000001,0,0,2,1,0x80000001,0,0},
+     {0xA121,INSTR_MOV,LE,GT,0,NA,NA,0,0,0x80000001,0,0,2,1,2,0,0},
+     {0xA122,INSTR_MOV,EQ,GT,0,NA,NA,0,0,MAX_32BIT ,SHIFT_LSL,1,2,1,2,0,0},
+     {0xA123,INSTR_MOV,GT,GT,0,NA,NA,0,0,MAX_32BIT ,SHIFT_LSL,1,2,1,MAX_32BIT -1,0,0},
+     {0xA124,INSTR_MOV,LE,GT,0,NA,NA,0,0,MAX_32BIT ,SHIFT_LSL,1,2,1,2,0,0},
+     {0xA125,INSTR_MOV,LO,HS,0,NA,NA,1,0x80000001,NA,NA,NA,2,1,2,0,0},
+     {0xA126,INSTR_MOV,HS,HS,0,NA,NA,1,0x80000001,NA,NA,NA,2,1,0x80000001,0,0},
+     {0xA127,INSTR_MVN,LO,HS,0,NA,NA,1,MAX_32BIT -1,NA,NA,NA,2,1,2,0,0},
+     {0xA128,INSTR_MVN,HS,HS,0,NA,NA,1,MAX_32BIT -1,NA,NA,NA,2,1,1,0,0},
+     {0xA129,INSTR_MVN,AL,AL,0,NA,NA,1,0,NA,NA,NA,2,1,MAX_32BIT,0,NA},
+     {0xA130,INSTR_MVN,AL,AL,0,NA,NA,0,NA,MAX_32BIT -1,NA,0,2,1,1,0,NA},
+     {0xA131,INSTR_MVN,AL,AL,0,NA,NA,0,NA,0x80000001,NA,0,2,1,0x7FFFFFFE,0,NA},
+     {0xA132,INSTR_BIC,AL,AL,0,1,NA,1,MAX_32BIT ,NA,NA,NA,NA,1,0,0,0},
+     {0xA133,INSTR_BIC,AL,AL,0,1,NA,1,MAX_32BIT -1,NA,NA,NA,NA,1,1,0,0},
+     {0xA134,INSTR_BIC,AL,AL,0,1,NA,0,0,MAX_32BIT ,0,0,NA,1,0,0,0},
+     {0xA135,INSTR_BIC,AL,AL,0,1,NA,0,0,MAX_32BIT -1,0,0,NA,1,1,0,0},
+     {0xA136,INSTR_BIC,AL,AL,0,0xF0,NA,0,0,3,SHIFT_ASR,1,NA,1,0xF0,0,0},
+     {0xA137,INSTR_BIC,AL,AL,0,0xF0,NA,0,0,MAX_32BIT ,SHIFT_ASR,31,NA,1,0,0,0},
+     {0xA138,INSTR_SMULBB,AL,AL,0,NA,0xABCDFFFF,0,NA,0xABCD0001,NA,NA,NA,1,0xFFFFFFFF,0,0},
+     {0xA139,INSTR_SMULBB,AL,AL,0,NA,0xABCD0001,0,NA,0xABCD0FFF,NA,NA,NA,1,0x00000FFF,0,0},
+     {0xA140,INSTR_SMULBB,AL,AL,0,NA,0xABCD0001,0,NA,0xABCDFFFF,NA,NA,NA,1,0xFFFFFFFF,0,0},
+     {0xA141,INSTR_SMULBB,AL,AL,0,NA,0xABCDFFFF,0,NA,0xABCDFFFF,NA,NA,NA,1,1,0,0},
+     {0xA142,INSTR_SMULBT,AL,AL,0,NA,0xFFFFABCD,0,NA,0xABCD0001,NA,NA,NA,1,0xFFFFFFFF,0,0},
+     {0xA143,INSTR_SMULBT,AL,AL,0,NA,0x0001ABCD,0,NA,0xABCD0FFF,NA,NA,NA,1,0x00000FFF,0,0},
+     {0xA144,INSTR_SMULBT,AL,AL,0,NA,0x0001ABCD,0,NA,0xABCDFFFF,NA,NA,NA,1,0xFFFFFFFF,0,0},
+     {0xA145,INSTR_SMULBT,AL,AL,0,NA,0xFFFFABCD,0,NA,0xABCDFFFF,NA,NA,NA,1,1,0,0},
+     {0xA146,INSTR_SMULTB,AL,AL,0,NA,0xABCDFFFF,0,NA,0x0001ABCD,NA,NA,NA,1,0xFFFFFFFF,0,0},
+     {0xA147,INSTR_SMULTB,AL,AL,0,NA,0xABCD0001,0,NA,0x0FFFABCD,NA,NA,NA,1,0x00000FFF,0,0},
+     {0xA148,INSTR_SMULTB,AL,AL,0,NA,0xABCD0001,0,NA,0xFFFFABCD,NA,NA,NA,1,0xFFFFFFFF,0,0},
+     {0xA149,INSTR_SMULTB,AL,AL,0,NA,0xABCDFFFF,0,NA,0xFFFFABCD,NA,NA,NA,1,1,0,0},
+     {0xA150,INSTR_SMULTT,AL,AL,0,NA,0xFFFFABCD,0,NA,0x0001ABCD,NA,NA,NA,1,0xFFFFFFFF,0,0},
+     {0xA151,INSTR_SMULTT,AL,AL,0,NA,0x0001ABCD,0,NA,0x0FFFABCD,NA,NA,NA,1,0x00000FFF,0,0},
+     {0xA152,INSTR_SMULTT,AL,AL,0,NA,0x0001ABCD,0,NA,0xFFFFABCD,NA,NA,NA,1,0xFFFFFFFF,0,0},
+     {0xA153,INSTR_SMULTT,AL,AL,0,NA,0xFFFFABCD,0,NA,0xFFFFABCD,NA,NA,NA,1,1,0,0},
+     {0xA154,INSTR_SMULWB,AL,AL,0,NA,0xABCDFFFF,0,NA,0x0001ABCD,NA,NA,NA,1,0xFFFFFFFE,0,0},
+     {0xA155,INSTR_SMULWB,AL,AL,0,NA,0xABCD0001,0,NA,0x0FFFABCD,NA,NA,NA,1,0x00000FFF,0,0},
+     {0xA156,INSTR_SMULWB,AL,AL,0,NA,0xABCD0001,0,NA,0xFFFFABCD,NA,NA,NA,1,0xFFFFFFFF,0,0},
+     {0xA157,INSTR_SMULWB,AL,AL,0,NA,0xABCDFFFF,0,NA,0xFFFFABCD,NA,NA,NA,1,0,0,0},
+     {0xA158,INSTR_SMULWT,AL,AL,0,NA,0xFFFFABCD,0,NA,0x0001ABCD,NA,NA,NA,1,0xFFFFFFFE,0,0},
+     {0xA159,INSTR_SMULWT,AL,AL,0,NA,0x0001ABCD,0,NA,0x0FFFABCD,NA,NA,NA,1,0x00000FFF,0,0},
+     {0xA160,INSTR_SMULWT,AL,AL,0,NA,0x0001ABCD,0,NA,0xFFFFABCD,NA,NA,NA,1,0xFFFFFFFF,0,0},
+     {0xA161,INSTR_SMULWT,AL,AL,0,NA,0xFFFFABCD,0,NA,0xFFFFABCD,NA,NA,NA,1,0,0,0},
+     {0xA162,INSTR_SMLABB,AL,AL,0,1,0xABCDFFFF,0,NA,0xABCD0001,NA,NA,NA,1,0,0,0},
+     {0xA163,INSTR_SMLABB,AL,AL,0,1,0xABCD0001,0,NA,0xABCD0FFF,NA,NA,NA,1,0x00001000,0,0},
+     {0xA164,INSTR_SMLABB,AL,AL,0,0xFFFFFFFF,0xABCD0001,0,NA,0xABCDFFFF,NA,NA,NA,1,0xFFFFFFFE,0,0},
+     {0xA165,INSTR_SMLABB,AL,AL,0,0xFFFFFFFF,0xABCDFFFF,0,NA,0xABCDFFFF,NA,NA,NA,1,0,0,0},
+     {0xA166,INSTR_UXTB16,AL,AL,0,NA,NA,0,NA,0xABCDEF01,SHIFT_ROR,0,NA,1,0x00CD0001,0,0},
+     {0xA167,INSTR_UXTB16,AL,AL,0,NA,NA,0,NA,0xABCDEF01,SHIFT_ROR,1,NA,1,0x00AB00EF,0,0},
+     {0xA168,INSTR_UXTB16,AL,AL,0,NA,NA,0,NA,0xABCDEF01,SHIFT_ROR,2,NA,1,0x000100CD,0,0},
+     {0xA169,INSTR_UXTB16,AL,AL,0,NA,NA,0,NA,0xABCDEF01,SHIFT_ROR,3,NA,1,0x00EF00AB,0,0},
+     {0xA170,INSTR_UBFX,AL,AL,0,0xABCDEF01,4,0,NA,24,NA,NA,NA,1,0x00BCDEF0,0,0},
+     {0xA171,INSTR_UBFX,AL,AL,0,0xABCDEF01,1,0,NA,2,NA,NA,NA,1,0,0,0},
+     {0xA172,INSTR_UBFX,AL,AL,0,0xABCDEF01,16,0,NA,8,NA,NA,NA,1,0xCD,0,0},
+     {0xA173,INSTR_UBFX,AL,AL,0,0xABCDEF01,31,0,NA,1,NA,NA,NA,1,1,0,0},
+     {0xA174,INSTR_ADDR_ADD,AL,AL,0,0xCFFFFFFFF,NA,0,NA,0x1,SHIFT_LSL,1,NA,1,0xD00000001,0,0},
+     {0xA175,INSTR_ADDR_ADD,AL,AL,0,0x01,NA,0,NA,0x1,SHIFT_LSL,2,NA,1,0x5,0,0},
+     {0xA176,INSTR_ADDR_ADD,AL,AL,0,0xCFFFFFFFF,NA,0,NA,0x1,NA,0,NA,1,0xD00000000,0,0},
+     {0xA177,INSTR_ADDR_SUB,AL,AL,0,0xD00000001,NA,0,NA,0x010000,SHIFT_LSR,15,NA,1,0xCFFFFFFFF,0,0},
+     {0xA178,INSTR_ADDR_SUB,AL,AL,0,0xCFFFFFFFF,NA,0,NA,0x020000,SHIFT_LSR,15,NA,1,0xCFFFFFFFB,0,0},
+     {0xA179,INSTR_ADDR_SUB,AL,AL,0,3,NA,0,NA,0x010000,SHIFT_LSR,15,NA,1,1,0,0},
+};
+
+dataTransferTest_t dataTransferTests [] =
+{
+    {0xB000,INSTR_LDR,AL,AL,1,24,0xABCDEF0123456789,0,REG_SCALE_OFFSET,24,NA,NA,NA,NA,NA,0x23456789,0,0,NA,NA,NA},
+    {0xB001,INSTR_LDR,AL,AL,1,4064,0xABCDEF0123456789,0,IMM12_OFFSET,NA,4068,0,1,0,NA,0xABCDEF01,0,0,NA,NA,NA},
+    {0xB002,INSTR_LDR,AL,AL,1,0,0xABCDEF0123456789,0,IMM12_OFFSET,NA,4,1,0,1,NA,0x23456789,4,0,NA,NA,NA},
+    {0xB003,INSTR_LDR,AL,AL,1,0,0xABCDEF0123456789,0,NO_OFFSET,NA,NA,0,0,0,NA,0x23456789,0,0,NA,NA,NA},
+    {0xB004,INSTR_LDRB,AL,AL,1,4064,0xABCDEF0123456789,0,REG_SCALE_OFFSET,4064,NA,NA,NA,NA,NA,0x89,0,0,NA,NA,NA},
+    {0xB005,INSTR_LDRB,AL,AL,1,4064,0xABCDEF0123456789,0,IMM12_OFFSET,NA,4065,0,1,0,NA,0x67,0,0,NA,NA,NA},
+    {0xB006,INSTR_LDRB,AL,AL,1,4064,0xABCDEF0123456789,4065,IMM12_OFFSET,NA,0,0,1,0,NA,0x67,4065,0,NA,NA,NA},
+    {0xB007,INSTR_LDRB,AL,AL,1,4064,0xABCDEF0123456789,4065,IMM12_OFFSET,NA,1,0,1,0,NA,0x45,4065,0,NA,NA,NA},
+    {0xB008,INSTR_LDRB,AL,AL,1,4064,0xABCDEF0123456789,4065,IMM12_OFFSET,NA,2,0,1,0,NA,0x23,4065,0,NA,NA,NA},
+    {0xB009,INSTR_LDRB,AL,AL,1,4064,0xABCDEF0123456789,4065,IMM12_OFFSET,NA,1,1,0,1,NA,0x67,4066,0,NA,NA,NA},
+    {0xB010,INSTR_LDRB,AL,AL,1,4064,0xABCDEF0123456789,0,NO_OFFSET,NA,NA,0,0,0,NA,0x89,0,0,NA,NA,NA},
+    {0xB011,INSTR_LDRH,AL,AL,1,0,0xABCDEF0123456789,0,IMM8_OFFSET,NA,2,1,0,1,NA,0x6789,2,0,NA,NA,NA},
+    {0xB012,INSTR_LDRH,AL,AL,1,4064,0xABCDEF0123456789,0,REG_OFFSET,4064,0,0,1,0,NA,0x6789,0,0,NA,NA,NA},
+    {0xB013,INSTR_LDRH,AL,AL,1,4064,0xABCDEF0123456789,0,REG_OFFSET,4066,0,0,1,0,NA,0x2345,0,0,NA,NA,NA},
+    {0xB014,INSTR_LDRH,AL,AL,1,0,0xABCDEF0123456789,0,NO_OFFSET,NA,0,0,0,0,NA,0x6789,0,0,NA,NA,NA},
+    {0xB015,INSTR_LDRH,AL,AL,1,0,0xABCDEF0123456789,2,NO_OFFSET,NA,0,0,0,0,NA,0x2345,2,0,NA,NA,NA},
+    {0xB016,INSTR_ADDR_LDR,AL,AL,1,4064,0xABCDEF0123456789,0,IMM12_OFFSET,NA,4064,0,1,0,NA,0xABCDEF0123456789,0,0,NA,NA,NA},
+    {0xB017,INSTR_STR,AL,AL,1,2,0xDEADBEEFDEADBEEF,4,IMM12_OFFSET,NA,4,1,0,1,0xABCDEF0123456789,0xABCDEF0123456789,8,1,2,8,0xDEAD23456789BEEF},
+    {0xB018,INSTR_STR,AL,AL,1,2,0xDEADBEEFDEADBEEF,4,NO_OFFSET,NA,NA,0,0,0,0xABCDEF0123456789,0xABCDEF0123456789,4,1,2,8,0xDEAD23456789BEEF},
+    {0xB019,INSTR_STR,AL,AL,1,4066,0xDEADBEEFDEADBEEF,4,IMM12_OFFSET,NA,4064,0,1,0,0xABCDEF0123456789,0xABCDEF0123456789,4,1,4066,8,0xDEAD23456789BEEF},
+    {0xB020,INSTR_STRB,AL,AL,1,0,0xDEADBEEFDEADBEEF,1,IMM12_OFFSET,NA,0,0,1,0,0xABCDEF0123456789,0xABCDEF0123456789,1,1,0,8,0xDEADBEEFDEAD89EF},
+    {0xB021,INSTR_STRB,AL,AL,1,0,0xDEADBEEFDEADBEEF,1,IMM12_OFFSET,NA,1,0,1,0,0xABCDEF0123456789,0xABCDEF0123456789,1,1,0,8,0xDEADBEEFDE89BEEF},
+    {0xB022,INSTR_STRB,AL,AL,1,0,0xDEADBEEFDEADBEEF,1,IMM12_OFFSET,NA,2,0,1,0,0xABCDEF0123456789,0xABCDEF0123456789,1,1,0,8,0xDEADBEEF89ADBEEF},
+    {0xB023,INSTR_STRB,AL,AL,1,0,0xDEADBEEFDEADBEEF,1,IMM12_OFFSET,NA,4,1,0,1,0xABCDEF0123456789,0xABCDEF0123456789,5,1,0,8,0xDEADBEEFDEAD89EF},
+    {0xB024,INSTR_STRB,AL,AL,1,0,0xDEADBEEFDEADBEEF,1,NO_OFFSET,NA,NA,0,0,0,0xABCDEF0123456789,0xABCDEF0123456789,1,1,0,8,0xDEADBEEFDEAD89EF},
+    {0xB025,INSTR_STRH,AL,AL,1,4066,0xDEADBEEFDEADBEEF,4070,IMM12_OFFSET,NA,2,1,0,1,0xABCDEF0123456789,0xABCDEF0123456789,4072,1,4066,8,0xDEAD6789DEADBEEF},
+    {0xB026,INSTR_STRH,AL,AL,1,4066,0xDEADBEEFDEADBEEF,4070,NO_OFFSET,NA,NA,0,0,0,0xABCDEF0123456789,0xABCDEF0123456789,4070,1,4066,8,0xDEAD6789DEADBEEF},
+    {0xB027,INSTR_STRH,EQ,NE,1,4066,0xDEADBEEFDEADBEEF,4070,NO_OFFSET,NA,NA,0,0,0,0xABCDEF0123456789,0xABCDEF0123456789,4070,1,4066,8,0xDEADBEEFDEADBEEF},
+    {0xB028,INSTR_STRH,NE,NE,1,4066,0xDEADBEEFDEADBEEF,4070,NO_OFFSET,NA,NA,0,0,0,0xABCDEF0123456789,0xABCDEF0123456789,4070,1,4066,8,0xDEAD6789DEADBEEF},
+    {0xB029,INSTR_STRH,NE,EQ,1,4066,0xDEADBEEFDEADBEEF,4070,NO_OFFSET,NA,NA,0,0,0,0xABCDEF0123456789,0xABCDEF0123456789,4070,1,4066,8,0xDEADBEEFDEADBEEF},
+    {0xB030,INSTR_STRH,EQ,EQ,1,4066,0xDEADBEEFDEADBEEF,4070,NO_OFFSET,NA,NA,0,0,0,0xABCDEF0123456789,0xABCDEF0123456789,4070,1,4066,8,0xDEAD6789DEADBEEF},
+    {0xB031,INSTR_STRH,HI,LS,1,4066,0xDEADBEEFDEADBEEF,4070,NO_OFFSET,NA,NA,0,0,0,0xABCDEF0123456789,0xABCDEF0123456789,4070,1,4066,8,0xDEADBEEFDEADBEEF},
+    {0xB032,INSTR_STRH,LS,LS,1,4066,0xDEADBEEFDEADBEEF,4070,NO_OFFSET,NA,NA,0,0,0,0xABCDEF0123456789,0xABCDEF0123456789,4070,1,4066,8,0xDEAD6789DEADBEEF},
+    {0xB033,INSTR_STRH,LS,HI,1,4066,0xDEADBEEFDEADBEEF,4070,NO_OFFSET,NA,NA,0,0,0,0xABCDEF0123456789,0xABCDEF0123456789,4070,1,4066,8,0xDEADBEEFDEADBEEF},
+    {0xB034,INSTR_STRH,HI,HI,1,4066,0xDEADBEEFDEADBEEF,4070,NO_OFFSET,NA,NA,0,0,0,0xABCDEF0123456789,0xABCDEF0123456789,4070,1,4066,8,0xDEAD6789DEADBEEF},
+    {0xB035,INSTR_STRH,CC,HS,1,4066,0xDEADBEEFDEADBEEF,4070,NO_OFFSET,NA,NA,0,0,0,0xABCDEF0123456789,0xABCDEF0123456789,4070,1,4066,8,0xDEADBEEFDEADBEEF},
+    {0xB036,INSTR_STRH,CS,HS,1,4066,0xDEADBEEFDEADBEEF,4070,NO_OFFSET,NA,NA,0,0,0,0xABCDEF0123456789,0xABCDEF0123456789,4070,1,4066,8,0xDEAD6789DEADBEEF},
+    {0xB037,INSTR_STRH,GE,LT,1,4066,0xDEADBEEFDEADBEEF,4070,NO_OFFSET,NA,NA,0,0,0,0xABCDEF0123456789,0xABCDEF0123456789,4070,1,4066,8,0xDEADBEEFDEADBEEF},
+    {0xB038,INSTR_STRH,LT,LT,1,4066,0xDEADBEEFDEADBEEF,4070,NO_OFFSET,NA,NA,0,0,0,0xABCDEF0123456789,0xABCDEF0123456789,4070,1,4066,8,0xDEAD6789DEADBEEF},
+    {0xB039,INSTR_ADDR_STR,AL,AL,1,4064,0xDEADBEEFDEADBEEF,4,IMM12_OFFSET,NA,4060,0,1,0,0xABCDEF0123456789,0xABCDEF0123456789,4,1,4064,8,0xABCDEF0123456789},
+};
+
+
+int flushcache()
+{
+    const long base = long(instrMem);
+    const long curr = base + long(instrMemSize);
+    return cacheflush(base, curr, 0);
+}
+void dataOpTest(dataOpTest_t test, ARMAssemblerInterface *a64asm, uint32_t Rd = 0,
+                uint32_t Rn = 1, uint32_t Rm = 2, uint32_t Rs = 3)
+{
+    int64_t  regs[NUM_REGS] = {0};
+    int32_t  flags[NUM_FLAGS] = {0};
+    int64_t  savedRegs[NUM_REGS] = {0};
+    uint32_t i;
+    uint32_t op2;
+
+    for(i = 0; i < NUM_REGS; ++i)
+    {
+        regs[i] = i;
+    }
+
+    regs[Rd] = test.RdValue;
+    regs[Rn] = test.RnValue;
+    regs[Rs] = test.RsValue;
+    flags[test.preFlag] = 1;
+    a64asm->reset();
+    a64asm->prolog();
+    if(test.immediate == true)
+    {
+        op2 = a64asm->imm(test.immValue);
+    }
+    else if(test.immediate == false && test.shiftAmount == 0)
+    {
+        op2 = Rm;
+        regs[Rm] = test.RmValue;
+    }
+    else
+    {
+        op2 = a64asm->reg_imm(Rm, test.shiftMode, test.shiftAmount);
+        regs[Rm] = test.RmValue;
+    }
+    switch(test.op)
+    {
+    case INSTR_ADD: a64asm->ADD(test.cond, test.setFlags, Rd,Rn,op2); break;
+    case INSTR_SUB: a64asm->SUB(test.cond, test.setFlags, Rd,Rn,op2); break;
+    case INSTR_RSB: a64asm->RSB(test.cond, test.setFlags, Rd,Rn,op2); break;
+    case INSTR_AND: a64asm->AND(test.cond, test.setFlags, Rd,Rn,op2); break;
+    case INSTR_ORR: a64asm->ORR(test.cond, test.setFlags, Rd,Rn,op2); break;
+    case INSTR_BIC: a64asm->BIC(test.cond, test.setFlags, Rd,Rn,op2); break;
+    case INSTR_MUL: a64asm->MUL(test.cond, test.setFlags, Rd,Rm,Rs); break;
+    case INSTR_MLA: a64asm->MLA(test.cond, test.setFlags, Rd,Rm,Rs,Rn); break;
+    case INSTR_CMP: a64asm->CMP(test.cond, Rn,op2); break;
+    case INSTR_MOV: a64asm->MOV(test.cond, test.setFlags,Rd,op2); break;
+    case INSTR_MVN: a64asm->MVN(test.cond, test.setFlags,Rd,op2); break;
+    case INSTR_SMULBB:a64asm->SMULBB(test.cond, Rd,Rm,Rs); break;
+    case INSTR_SMULBT:a64asm->SMULBT(test.cond, Rd,Rm,Rs); break;
+    case INSTR_SMULTB:a64asm->SMULTB(test.cond, Rd,Rm,Rs); break;
+    case INSTR_SMULTT:a64asm->SMULTT(test.cond, Rd,Rm,Rs); break;
+    case INSTR_SMULWB:a64asm->SMULWB(test.cond, Rd,Rm,Rs); break;
+    case INSTR_SMULWT:a64asm->SMULWT(test.cond, Rd,Rm,Rs); break;
+    case INSTR_SMLABB:a64asm->SMLABB(test.cond, Rd,Rm,Rs,Rn); break;
+    case INSTR_UXTB16:a64asm->UXTB16(test.cond, Rd,Rm,test.shiftAmount); break;
+    case INSTR_UBFX:
+    {
+        int32_t lsb   = test.RsValue;
+        int32_t width = test.RmValue;
+        a64asm->UBFX(test.cond, Rd,Rn,lsb, width);
+        break;
+    }
+    case INSTR_ADDR_ADD: a64asm->ADDR_ADD(test.cond, test.setFlags, Rd,Rn,op2); break;
+    case INSTR_ADDR_SUB: a64asm->ADDR_SUB(test.cond, test.setFlags, Rd,Rn,op2); break;
+    default: printf("Error"); return;
+    }
+    a64asm->epilog(0);
+    flushcache();
+
+    asm_function_t asm_function = (asm_function_t)(instrMem);
+
+    for(i = 0; i < NUM_REGS; ++i)
+        savedRegs[i] = regs[i];
+
+    asm_test_jacket(asm_function, regs, flags);
+
+    /* Check if all regs except Rd is same */
+    for(i = 0; i < NUM_REGS; ++i)
+    {
+        if(i == Rd) continue;
+        if(regs[i] != savedRegs[i])
+        {
+            printf("Test %x failed Reg(%d) tampered Expected(0x%"PRIx64"),"
+                   "Actual(0x%"PRIx64") t\n", test.id, i, savedRegs[i], regs[i]);
+            return;
+        }
+    }
+
+    if(test.checkRd == 1 && (uint64_t)regs[Rd] != test.postRdValue)
+    {
+        printf("Test %x failed, Expected(%"PRIx64"), Actual(%"PRIx64")\n",
+                test.id, test.postRdValue, regs[Rd]);
+    }
+    else if(test.checkFlag == 1 && flags[test.postFlag] == 0)
+    {
+        printf("Test %x failed Flag(%s) NOT set\n",
+                test.id,cc_code[test.postFlag]);
+    }
+    else
+    {
+        printf("Test %x passed\n", test.id);
+    }
+}
+
+
+void dataTransferTest(dataTransferTest_t test, ARMAssemblerInterface *a64asm,
+                      uint32_t Rd = 0, uint32_t Rn = 1,uint32_t Rm = 2)
+{
+    int64_t regs[NUM_REGS] = {0};
+    int64_t savedRegs[NUM_REGS] = {0};
+    int32_t flags[NUM_FLAGS] = {0};
+    uint32_t i;
+    for(i = 0; i < NUM_REGS; ++i)
+    {
+        regs[i] = i;
+    }
+
+    uint32_t op2;
+
+    regs[Rd] = test.RdValue;
+    regs[Rn] = (uint64_t)(&dataMem[test.RnValue]);
+    regs[Rm] = test.RmValue;
+    flags[test.preFlag] = 1;
+
+    if(test.setMem == true)
+    {
+        unsigned char *mem = (unsigned char *)&dataMem[test.memOffset];
+        uint64_t value = test.memValue;
+        for(int j = 0; j < 8; ++j)
+        {
+            mem[j] = value & 0x00FF;
+            value >>= 8;
+        }
+    }
+    a64asm->reset();
+    a64asm->prolog();
+    if(test.offsetType == REG_SCALE_OFFSET)
+    {
+        op2 = a64asm->reg_scale_pre(Rm);
+    }
+    else if(test.offsetType == REG_OFFSET)
+    {
+        op2 = a64asm->reg_pre(Rm);
+    }
+    else if(test.offsetType == IMM12_OFFSET && test.preIndex == true)
+    {
+        op2 = a64asm->immed12_pre(test.immValue, test.writeBack);
+    }
+    else if(test.offsetType == IMM12_OFFSET && test.postIndex == true)
+    {
+        op2 = a64asm->immed12_post(test.immValue);
+    }
+    else if(test.offsetType == IMM8_OFFSET && test.preIndex == true)
+    {
+        op2 = a64asm->immed8_pre(test.immValue, test.writeBack);
+    }
+    else if(test.offsetType == IMM8_OFFSET && test.postIndex == true)
+    {
+        op2 = a64asm->immed8_post(test.immValue);
+    }
+    else if(test.offsetType == NO_OFFSET)
+    {
+        op2 = a64asm->__immed12_pre(0);
+    }
+    else
+    {
+        printf("Error - Unknown offset\n"); return;
+    }
+
+    switch(test.op)
+    {
+    case INSTR_LDR:  a64asm->LDR(test.cond, Rd,Rn,op2); break;
+    case INSTR_LDRB: a64asm->LDRB(test.cond, Rd,Rn,op2); break;
+    case INSTR_LDRH: a64asm->LDRH(test.cond, Rd,Rn,op2); break;
+    case INSTR_ADDR_LDR: a64asm->ADDR_LDR(test.cond, Rd,Rn,op2); break;
+    case INSTR_STR:  a64asm->STR(test.cond, Rd,Rn,op2); break;
+    case INSTR_STRB: a64asm->STRB(test.cond, Rd,Rn,op2); break;
+    case INSTR_STRH: a64asm->STRH(test.cond, Rd,Rn,op2); break;
+    case INSTR_ADDR_STR: a64asm->ADDR_STR(test.cond, Rd,Rn,op2); break;
+    default: printf("Error"); return;
+    }
+    a64asm->epilog(0);
+    flushcache();
+
+    asm_function_t asm_function = (asm_function_t)(instrMem);
+
+    for(i = 0; i < NUM_REGS; ++i)
+        savedRegs[i] = regs[i];
+
+
+    asm_test_jacket(asm_function, regs, flags);
+
+    /* Check if all regs except Rd/Rn are same */
+    for(i = 0; i < NUM_REGS; ++i)
+    {
+        if(i == Rd || i == Rn) continue;
+        if(regs[i] != savedRegs[i])
+        {
+            printf("Test %x failed Reg(%d) tampered"
+                   " Expected(0x%"PRIx64"), Actual(0x%"PRIx64") t\n",
+                   test.id, i, savedRegs[i], regs[i]);
+            return;
+        }
+    }
+
+    if((uint64_t)regs[Rd] != test.postRdValue)
+    {
+        printf("Test %x failed, "
+               "Expected in Rd(0x%"PRIx64"), Actual(0x%"PRIx64")\n",
+               test.id, test.postRdValue, regs[Rd]);
+    }
+    else if((uint64_t)regs[Rn] != (uint64_t)(&dataMem[test.postRnValue]))
+    {
+        printf("Test %x failed, "
+               "Expected in Rn(0x%"PRIx64"), Actual(0x%"PRIx64")\n",
+               test.id, test.postRnValue, regs[Rn] - (uint64_t)dataMem);
+    }
+    else if(test.checkMem == true)
+    {
+        unsigned char *addr = (unsigned char *)&dataMem[test.postMemOffset];
+        uint64_t value;
+        value = 0;
+        for(uint32_t j = 0; j < test.postMemLength; ++j)
+            value = (value << 8) | addr[test.postMemLength-j-1];
+        if(value != test.postMemValue)
+        {
+            printf("Test %x failed, "
+                   "Expected in Mem(0x%"PRIx64"), Actual(0x%"PRIx64")\n",
+                   test.id, test.postMemValue, value);
+        }
+        else
+        {
+            printf("Test %x passed\n", test.id);
+        }
+    }
+    else
+    {
+        printf("Test %x passed\n", test.id);
+    }
+}
+
+void dataTransferLDMSTM(ARMAssemblerInterface *a64asm)
+{
+    int64_t regs[NUM_REGS] = {0};
+    int32_t flags[NUM_FLAGS] = {0};
+    const uint32_t numArmv7Regs = 16;
+
+    uint32_t Rn = ARMAssemblerInterface::SP;
+
+    uint32_t patterns[] =
+    {
+        0x5A03,
+        0x4CF0,
+        0x1EA6,
+        0x0DBF,
+    };
+
+    uint32_t i, j;
+    for(i = 0; i < sizeof(patterns)/sizeof(uint32_t); ++i)
+    {
+        for(j = 0; j < NUM_REGS; ++j)
+        {
+            regs[j] = j;
+        }
+        a64asm->reset();
+        a64asm->prolog();
+        a64asm->STM(AL,ARMAssemblerInterface::DB,Rn,1,patterns[i]);
+        for(j = 0; j < numArmv7Regs; ++j)
+        {
+            uint32_t op2 = a64asm->imm(0x31);
+            a64asm->MOV(AL, 0,j,op2);
+        }
+        a64asm->LDM(AL,ARMAssemblerInterface::IA,Rn,1,patterns[i]);
+        a64asm->epilog(0);
+        flushcache();
+
+        asm_function_t asm_function = (asm_function_t)(instrMem);
+        asm_test_jacket(asm_function, regs, flags);
+
+        for(j = 0; j < numArmv7Regs; ++j)
+        {
+            if((1 << j) & patterns[i])
+            {
+                if(regs[j] != j)
+                {
+                    printf("LDM/STM Test %x failed "
+                           "Reg%d expected(0x%x) Actual(0x%"PRIx64") \n",
+                            patterns[i],j,j,regs[j]);
+                    break;
+                }
+            }
+        }
+        if(j == numArmv7Regs)
+            printf("LDM/STM Test %x passed\n", patterns[i]);
+    }
+}
+
+int main(void)
+{
+    uint32_t i;
+
+    /* Allocate memory to store instructions generated by ArmToAarch64Assembler */
+    {
+        int fd = ashmem_create_region("code cache", instrMemSize);
+        if(fd < 0)
+            printf("Creating code cache, ashmem_create_region "
+                                "failed with error '%s'", strerror(errno));
+        instrMem = mmap(NULL, instrMemSize,
+                                    PROT_READ | PROT_WRITE | PROT_EXEC,
+                                MAP_PRIVATE, fd, 0);
+    }
+
+    ArmToAarch64Assembler a64asm(instrMem);
+
+    if(TESTS_DATAOP_ENABLE)
+    {
+        printf("Running data processing tests\n");
+        for(i = 0; i < sizeof(dataOpTests)/sizeof(dataOpTest_t); ++i)
+            dataOpTest(dataOpTests[i], &a64asm);
+    }
+
+    if(TESTS_DATATRANSFER_ENABLE)
+    {
+        printf("Running data transfer tests\n");
+        for(i = 0; i < sizeof(dataTransferTests)/sizeof(dataTransferTest_t); ++i)
+            dataTransferTest(dataTransferTests[i], &a64asm);
+    }
+
+    if(TESTS_LDMSTM_ENABLE)
+    {
+        printf("Running LDM/STM tests\n");
+        dataTransferLDMSTM(&a64asm);
+    }
+
+
+    if(TESTS_REG_CORRUPTION_ENABLE)
+    {
+        uint32_t reg_list[] = {0,1,12,14};
+        uint32_t Rd, Rm, Rs, Rn;
+        uint32_t i;
+        uint32_t numRegs = sizeof(reg_list)/sizeof(uint32_t);
+
+        printf("Running Register corruption tests\n");
+        for(i = 0; i < sizeof(dataOpTests)/sizeof(dataOpTest_t); ++i)
+        {
+            for(Rd = 0; Rd < numRegs; ++Rd)
+            {
+                for(Rn = 0; Rn < numRegs; ++Rn)
+                {
+                    for(Rm = 0; Rm < numRegs; ++Rm)
+                    {
+                        for(Rs = 0; Rs < numRegs;++Rs)
+                        {
+                            if(Rd == Rn || Rd == Rm || Rd == Rs) continue;
+                            if(Rn == Rm || Rn == Rs) continue;
+                            if(Rm == Rs) continue;
+                            printf("Testing combination Rd(%d), Rn(%d),"
+                                   " Rm(%d), Rs(%d): ",
+                                   reg_list[Rd], reg_list[Rn], reg_list[Rm], reg_list[Rs]);
+                            dataOpTest(dataOpTests[i], &a64asm, reg_list[Rd],
+                                       reg_list[Rn], reg_list[Rm], reg_list[Rs]);
+                        }
+                    }
+                }
+            }
+        }
+    }
+    return 0;
+}
diff --git a/libpixelflinger/tests/arch-aarch64/assembler/asm_test_jacket.S b/libpixelflinger/tests/arch-aarch64/assembler/asm_test_jacket.S
new file mode 100644
index 0000000..a1392c2
--- /dev/null
+++ b/libpixelflinger/tests/arch-aarch64/assembler/asm_test_jacket.S
@@ -0,0 +1,221 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+    .text
+    .align
+
+    .global asm_test_jacket
+
+    // Set the register and flag values
+    // Calls the asm function
+    // Reads the register/flag values to output register
+
+    // Parameters
+    // X0 - Function to jump
+    // X1 - register values array
+    // X2 - flag values array
+asm_test_jacket:
+    // Save registers to stack
+    stp    x29, x30, [sp,#-16]!
+    stp    x27, x28, [sp,#-16]!
+
+    mov x30, x0
+    mov x28, x1
+    mov x27, x2
+
+    //Set the flags based on flag array
+    //EQ
+    ldr w0, [x27,#0]
+    cmp w0, #1
+    b.ne bt_aeq
+    cmp w0,#1
+    b bt_end
+bt_aeq:
+
+    //NE
+    ldr w0, [x27,#4]
+    cmp w0, #1
+    b.ne bt_ane
+    cmp w0,#2
+    b bt_end
+bt_ane:
+
+    //CS
+    ldr w0, [x27,#8]
+    cmp w0, #1
+    b.ne bt_acs
+    cmp w0,#0
+    b bt_end
+bt_acs:
+
+    //CC
+    ldr w0, [x27,#12]
+    cmp w0, #1
+    b.ne bt_acc
+    cmp w0,#2
+    b bt_end
+bt_acc:
+
+    //MI
+    ldr w0, [x27,#16]
+    cmp w0, #1
+    b.ne bt_ami
+    subs w0,w0,#2
+    b bt_end
+bt_ami:
+
+    //PL
+    ldr w0, [x27,#20]
+    cmp w0, #1
+    b.ne bt_apl
+    subs w0,w0,#0
+    b bt_end
+bt_apl:
+    //HI - (C==1) && (Z==0)
+    ldr w0, [x27,#32]
+    cmp w0, #1
+    b.ne bt_ahi
+    cmp w0,#0
+    b bt_end
+bt_ahi:
+
+    //LS - (C==0) || (Z==1)
+    ldr w0, [x27,#36]
+    cmp w0, #1
+    b.ne bt_als
+    cmp w0,#1
+    b bt_end
+bt_als:
+
+    //GE
+    ldr w0, [x27,#40]
+    cmp w0, #1
+    b.ne bt_age
+    cmp w0,#0
+    b bt_end
+bt_age:
+
+    //LT
+    ldr w0, [x27,#44]
+    cmp w0, #1
+    b.ne bt_alt
+    cmp w0,#2
+    b bt_end
+bt_alt:
+
+    //GT
+    ldr w0, [x27,#48]
+    cmp w0, #1
+    b.ne bt_agt
+    cmp w0,#0
+    b bt_end
+bt_agt:
+
+    //LE
+    ldr w0, [x27,#52]
+    cmp w0, #1
+    b.ne bt_ale
+    cmp w0,#2
+    b bt_end
+bt_ale:
+
+
+bt_end:
+
+    // Load the registers from reg array
+    ldr x0, [x28,#0]
+    ldr x1, [x28,#8]
+    ldr x2, [x28,#16]
+    ldr x3, [x28,#24]
+    ldr x4, [x28,#32]
+    ldr x5, [x28,#40]
+    ldr x6, [x28,#48]
+    ldr x7, [x28,#56]
+    ldr x8, [x28,#64]
+    ldr x9, [x28,#72]
+    ldr x10, [x28,#80]
+    ldr x11, [x28,#88]
+    ldr x12, [x28,#96]
+    ldr x14, [x28,#112]
+
+    // Call the function
+    blr X30
+
+    // Save the registers to reg array
+    str x0, [x28,#0]
+    str x1, [x28,#8]
+    str x2, [x28,#16]
+    str x3, [x28,#24]
+    str x4, [x28,#32]
+    str x5, [x28,#40]
+    str x6, [x28,#48]
+    str x7, [x28,#56]
+    str x8, [x28,#64]
+    str x9, [x28,#72]
+    str x10, [x28,#80]
+    str x11, [x28,#88]
+    str x12, [x28,#96]
+    str x14, [x28,#112]
+
+    //Set the flags array based on result flags
+    movz w0, #0
+    movz w1, #1
+    csel w2, w1, w0, EQ
+    str w2, [x27,#0]
+    csel w2, w1, w0, NE
+    str w2, [x27,#4]
+    csel w2, w1, w0, CS
+    str w2, [x27,#8]
+    csel w2, w1, w0, CC
+    str w2, [x27,#12]
+    csel w2, w1, w0, MI
+    str w2, [x27,#16]
+    csel w2, w1, w0, PL
+    str w2, [x27,#20]
+    csel w2, w1, w0, VS
+    str w2, [x27,#24]
+    csel w2, w1, w0, VC
+    str w2, [x27,#28]
+    csel w2, w1, w0, HI
+    str w2, [x27,#32]
+    csel w2, w1, w0, LS
+    str w2, [x27,#36]
+    csel w2, w1, w0, GE
+    str w2, [x27,#40]
+    csel w2, w1, w0, LT
+    str w2, [x27,#44]
+    csel w2, w1, w0, GT
+    str w2, [x27,#48]
+    csel w2, w1, w0, LE
+    str w2, [x27,#52]
+
+    // Restore registers from stack
+    ldp    x27, x28, [sp],#16
+    ldp    x29, x30, [sp],#16
+    ret
+
diff --git a/libpixelflinger/tests/arch-aarch64/col32cb16blend/Android.mk b/libpixelflinger/tests/arch-aarch64/col32cb16blend/Android.mk
new file mode 100644
index 0000000..7445fc8
--- /dev/null
+++ b/libpixelflinger/tests/arch-aarch64/col32cb16blend/Android.mk
@@ -0,0 +1,16 @@
+LOCAL_PATH:= $(call my-dir)
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES:= \
+    col32cb16blend_test.c \
+    ../../../arch-aarch64/col32cb16blend.S
+
+LOCAL_SHARED_LIBRARIES :=
+
+LOCAL_C_INCLUDES :=
+
+LOCAL_MODULE:= test-pixelflinger-aarch64-col32cb16blend
+
+LOCAL_MODULE_TAGS := tests
+
+include $(BUILD_EXECUTABLE)
diff --git a/libpixelflinger/tests/arch-aarch64/col32cb16blend/col32cb16blend_test.c b/libpixelflinger/tests/arch-aarch64/col32cb16blend/col32cb16blend_test.c
new file mode 100644
index 0000000..f057884
--- /dev/null
+++ b/libpixelflinger/tests/arch-aarch64/col32cb16blend/col32cb16blend_test.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+
+
+#define ARGB_8888_MAX   0xFFFFFFFF
+#define ARGB_8888_MIN   0x00000000
+#define RGB_565_MAX 0xFFFF
+#define RGB_565_MIN 0x0000
+
+struct test_t
+{
+    char name[256];
+    uint32_t dst_color;
+    uint32_t src_color;
+    size_t count;
+};
+
+struct test_t tests[] =
+{
+    {"Count 1, Src=Max, Dst=Min", ARGB_8888_MAX, RGB_565_MIN, 1},
+    {"Count 2, Src=Min, Dst=Max", ARGB_8888_MIN, RGB_565_MAX, 2},
+    {"Count 3, Src=Max, Dst=Max", ARGB_8888_MAX, RGB_565_MAX, 3},
+    {"Count 4, Src=Min, Dst=Min", ARGB_8888_MAX, RGB_565_MAX, 4},
+    {"Count 1, Src=Rand, Dst=Rand", 0x12345678, 0x9ABC, 1},
+    {"Count 2, Src=Rand, Dst=Rand", 0xABCDEF12, 0x2345, 2},
+    {"Count 3, Src=Rand, Dst=Rand", 0x11111111, 0xEDFE, 3},
+    {"Count 4, Src=Rand, Dst=Rand", 0x12345678, 0x9ABC, 4},
+    {"Count 5, Src=Rand, Dst=Rand", 0xEFEFFEFE, 0xFACC, 5},
+    {"Count 10, Src=Rand, Dst=Rand", 0x12345678, 0x9ABC, 10}
+};
+
+void scanline_col32cb16blend_aarch64(uint16_t *dst, int32_t src, size_t count);
+void scanline_col32cb16blend_c(uint16_t * dst, int32_t src, size_t count)
+{
+    int srcAlpha = (src>>24);
+    int f = 0x100 - (srcAlpha + (srcAlpha>>7));
+    while (count--)
+    {
+        uint16_t d = *dst;
+        int dstR = (d>>11)&0x1f;
+        int dstG = (d>>5)&0x3f;
+        int dstB = (d)&0x1f;
+        int srcR = (src >> (   3))&0x1F;
+        int srcG = (src >> ( 8+2))&0x3F;
+        int srcB = (src >> (16+3))&0x1F;
+        srcR += (f*dstR)>>8;
+        srcG += (f*dstG)>>8;
+        srcB += (f*dstB)>>8;
+        *dst++ = (uint16_t)((srcR<<11)|(srcG<<5)|srcB);
+    }
+}
+
+void scanline_col32cb16blend_test()
+{
+    uint16_t dst_c[16], dst_asm[16];
+    uint32_t i, j;
+
+    for(i = 0; i < sizeof(tests)/sizeof(struct test_t); ++i)
+    {
+        struct test_t test = tests[i];
+
+        printf("Testing - %s:",test.name);
+
+        memset(dst_c, 0, sizeof(dst_c));
+        memset(dst_asm, 0, sizeof(dst_asm));
+
+        for(j = 0; j < test.count; ++j)
+        {
+            dst_c[j]   = test.dst_color;
+            dst_asm[j] = test.dst_color;
+        }
+
+
+        scanline_col32cb16blend_c(dst_c, test.src_color, test.count);
+        scanline_col32cb16blend_aarch64(dst_asm, test.src_color, test.count);
+
+
+        if(memcmp(dst_c, dst_asm, sizeof(dst_c)) == 0)
+            printf("Passed\n");
+        else
+            printf("Failed\n");
+
+        for(j = 0; j < test.count; ++j)
+        {
+            printf("dst_c[%d] = %x, dst_asm[%d] = %x \n", j, dst_c[j], j, dst_asm[j]);
+        }
+    }
+}
+
+int main()
+{
+    scanline_col32cb16blend_test();
+    return 0;
+}
diff --git a/libpixelflinger/tests/arch-aarch64/disassembler/Android.mk b/libpixelflinger/tests/arch-aarch64/disassembler/Android.mk
new file mode 100644
index 0000000..376c3b7
--- /dev/null
+++ b/libpixelflinger/tests/arch-aarch64/disassembler/Android.mk
@@ -0,0 +1,17 @@
+LOCAL_PATH:= $(call my-dir)
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES:= \
+    aarch64_diassembler_test.cpp \
+    ../../../codeflinger/Aarch64Disassembler.cpp
+
+LOCAL_SHARED_LIBRARIES :=
+
+LOCAL_C_INCLUDES := \
+    system/core/libpixelflinger/codeflinger
+
+LOCAL_MODULE:= test-pixelflinger-aarch64-disassembler-test
+
+LOCAL_MODULE_TAGS := tests
+
+include $(BUILD_EXECUTABLE)
diff --git a/libpixelflinger/tests/arch-aarch64/disassembler/aarch64_diassembler_test.cpp b/libpixelflinger/tests/arch-aarch64/disassembler/aarch64_diassembler_test.cpp
new file mode 100644
index 0000000..17caee1
--- /dev/null
+++ b/libpixelflinger/tests/arch-aarch64/disassembler/aarch64_diassembler_test.cpp
@@ -0,0 +1,321 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include <stdio.h>
+#include <inttypes.h>
+#include <string.h>
+
+int aarch64_disassemble(uint32_t code, char* instr);
+
+struct test_table_entry_t
+{
+     uint32_t code;
+     const char *instr;
+};
+static test_table_entry_t test_table [] =
+{
+    { 0x91000240, "add x0, x18, #0x0, lsl #0"         },
+    { 0x9140041f, "add sp, x0, #0x1, lsl #12"         },
+    { 0x917ffff2, "add x18, sp, #0xfff, lsl #12"      },
+
+    { 0xd13ffe40, "sub x0, x18, #0xfff, lsl #0"       },
+    { 0xd140001f, "sub sp, x0, #0x0, lsl #12"         },
+    { 0xd14007f2, "sub x18, sp, #0x1, lsl #12"        },
+
+    { 0x8b1e0200, "add x0, x16, x30, lsl #0"          },
+    { 0x8b507fdf, "add xzr, x30, x16, lsr #31"        },
+    { 0x8b8043f0, "add x16, xzr, x0, asr #16"         },
+    { 0x8b5f401e, "add x30, x0, xzr, lsr #16"         },
+
+
+    { 0x4b1e0200, "sub w0, w16, w30, lsl #0"          },
+    { 0x4b507fdf, "sub wzr, w30, w16, lsr #31"        },
+    { 0x4b8043f0, "sub w16, wzr, w0, asr #16"         },
+    { 0x4b5f401e, "sub w30, w0, wzr, lsr #16"         },
+
+    { 0x6b1e0200, "subs w0, w16, w30, lsl #0"         },
+    { 0x6b507fdf, "subs wzr, w30, w16, lsr #31"       },
+    { 0x6b8043f0, "subs w16, wzr, w0, asr #16"        },
+    { 0x6b5f401e, "subs w30, w0, wzr, lsr #16"        },
+
+    { 0x0a1e0200, "and w0, w16, w30, lsl #0"          },
+    { 0x0a507fdf, "and wzr, w30, w16, lsr #31"        },
+    { 0x0a8043f0, "and w16, wzr, w0, asr #16"         },
+    { 0x0adf401e, "and w30, w0, wzr, ror #16"         },
+
+    { 0x2a1e0200, "orr w0, w16, w30, lsl #0"          },
+    { 0x2a507fdf, "orr wzr, w30, w16, lsr #31"        },
+    { 0x2a8043f0, "orr w16, wzr, w0, asr #16"         },
+    { 0x2adf401e, "orr w30, w0, wzr, ror #16"         },
+
+    { 0x2a3e0200, "orn w0, w16, w30, lsl #0"          },
+    { 0x2a707fdf, "orn wzr, w30, w16, lsr #31"        },
+    { 0x2aa043f0, "orn w16, wzr, w0, asr #16"         },
+    { 0x2aff401e, "orn w30, w0, wzr, ror #16"         },
+
+    { 0x729fffe0, "movk w0, #0xffff, lsl #0"          },
+    { 0x72a0000f, "movk w15, #0x0, lsl #16"           },
+    { 0x7281fffe, "movk w30, #0xfff, lsl #0"          },
+    { 0x72a0003f, "movk wzr, #0x1, lsl #16"           },
+
+    { 0x529fffe0, "movz w0, #0xffff, lsl #0"          },
+    { 0x52a0000f, "movz w15, #0x0, lsl #16"           },
+    { 0x5281fffe, "movz w30, #0xfff, lsl #0"          },
+    { 0x52a0003f, "movz wzr, #0x1, lsl #16"           },
+
+    { 0xd29fffe0, "movz x0, #0xffff, lsl #0"          },
+    { 0xd2a0000f, "movz x15, #0x0, lsl #16"           },
+    { 0xd2c1fffe, "movz x30, #0xfff, lsl #32"         },
+    { 0xd2e0003f, "movz xzr, #0x1, lsl #48"           },
+
+    { 0x1a8003e0, "csel w0, wzr, w0, eq"              },
+    { 0x1a831001, "csel w1, w0, w3, ne"               },
+    { 0x1a9e2022, "csel w2, w1, w30, cs"              },
+    { 0x1a8a3083, "csel w3, w4, w10, cc"              },
+    { 0x1a8b40e4, "csel w4, w7, w11, mi"              },
+    { 0x1a9b5105, "csel w5, w8, w27, pl"              },
+    { 0x1a846167, "csel w7, w11, w4, vs"              },
+    { 0x1a8671c8, "csel w8, w14, w6, vc"              },
+    { 0x1a878289, "csel w9, w20, w7, hi"              },
+    { 0x1a8c92aa, "csel w10, w21, w12, ls"            },
+    { 0x1a8ea2ce, "csel w14, w22, w14, ge"            },
+    { 0x1a9fb3b2, "csel w18, w29, wzr, lt"            },
+    { 0x1a9fc3d8, "csel w24, w30, wzr, gt"            },
+    { 0x1a82d17e, "csel w30, w11, w2, le"             },
+    { 0x1a81e19f, "csel wzr, w12, w1, al"             },
+
+    { 0x9a8003e0, "csel x0, xzr, x0, eq"              },
+    { 0x9a831001, "csel x1, x0, x3, ne"               },
+    { 0x9a9e2022, "csel x2, x1, x30, cs"              },
+    { 0x9a8a3083, "csel x3, x4, x10, cc"              },
+    { 0x9a8b40e4, "csel x4, x7, x11, mi"              },
+    { 0x9a9b5105, "csel x5, x8, x27, pl"              },
+    { 0x9a846167, "csel x7, x11, x4, vs"              },
+    { 0x9a8671c8, "csel x8, x14, x6, vc"              },
+    { 0x9a878289, "csel x9, x20, x7, hi"              },
+    { 0x9a8c92aa, "csel x10, x21, x12, ls"            },
+    { 0x9a8ea2ce, "csel x14, x22, x14, ge"            },
+    { 0x9a9fb3b2, "csel x18, x29, xzr, lt"            },
+    { 0x9a9fc3d8, "csel x24, x30, xzr, gt"            },
+    { 0x9a82d17e, "csel x30, x11, x2, le"             },
+    { 0x9a81e19f, "csel xzr, x12, x1, al"             },
+
+    { 0x5a8003e0, "csinv w0, wzr, w0, eq"             },
+    { 0x5a831001, "csinv w1, w0, w3, ne"              },
+    { 0x5a9e2022, "csinv w2, w1, w30, cs"             },
+    { 0x5a8a3083, "csinv w3, w4, w10, cc"             },
+    { 0x5a8b40e4, "csinv w4, w7, w11, mi"             },
+    { 0x5a9b5105, "csinv w5, w8, w27, pl"             },
+    { 0x5a846167, "csinv w7, w11, w4, vs"             },
+    { 0x5a8671c8, "csinv w8, w14, w6, vc"             },
+    { 0x5a878289, "csinv w9, w20, w7, hi"             },
+    { 0x5a8c92aa, "csinv w10, w21, w12, ls"           },
+    { 0x5a8ea2ce, "csinv w14, w22, w14, ge"           },
+    { 0x5a9fb3b2, "csinv w18, w29, wzr, lt"           },
+    { 0x5a9fc3d8, "csinv w24, w30, wzr, gt"           },
+    { 0x5a82d17e, "csinv w30, w11, w2, le"            },
+    { 0x5a81e19f, "csinv wzr, w12, w1, al"            },
+
+    { 0x1b1f3fc0, "madd w0, w30, wzr, w15"            },
+    { 0x1b0079ef, "madd w15, w15, w0, w30"            },
+    { 0x1b0f7ffe, "madd w30, wzr, w15, wzr"           },
+    { 0x1b1e001f, "madd wzr, w0, w30, w0"             },
+
+    { 0x9b3f3fc0, "smaddl x0, w30, wzr, x15"          },
+    { 0x9b2079ef, "smaddl x15, w15, w0, x30"          },
+    { 0x9b2f7ffe, "smaddl x30, wzr, w15, xzr"         },
+    { 0x9b3e001f, "smaddl xzr, w0, w30, x0"           },
+
+    { 0xd65f0000, "ret x0"                            },
+    { 0xd65f01e0, "ret x15"                           },
+    { 0xd65f03c0, "ret x30"                           },
+    { 0xd65f03e0, "ret xzr"                           },
+
+    { 0xb87f4be0, "ldr w0, [sp, wzr, uxtw #0]"        },
+    { 0xb87ed80f, "ldr w15, [x0, w30, sxtw #2]"       },
+    { 0xb86fc9fe, "ldr w30, [x15, w15, sxtw #0]"      },
+    { 0xb8605bdf, "ldr wzr, [x30, w0, uxtw #2]"       },
+    { 0xb87febe0, "ldr w0, [sp, xzr, sxtx #0]"        },
+    { 0xb87e780f, "ldr w15, [x0, x30, lsl #2]"        },
+    { 0xb86f69fe, "ldr w30, [x15, x15, lsl #0]"       },
+    { 0xb860fbdf, "ldr wzr, [x30, x0, sxtx #2]"       },
+
+    { 0xb83f4be0, "str w0, [sp, wzr, uxtw #0]"        },
+    { 0xb83ed80f, "str w15, [x0, w30, sxtw #2]"       },
+    { 0xb82fc9fe, "str w30, [x15, w15, sxtw #0]"      },
+    { 0xb8205bdf, "str wzr, [x30, w0, uxtw #2]"       },
+    { 0xb83febe0, "str w0, [sp, xzr, sxtx #0]"        },
+    { 0xb83e780f, "str w15, [x0, x30, lsl #2]"        },
+    { 0xb82f69fe, "str w30, [x15, x15, lsl #0]"       },
+    { 0xb820fbdf, "str wzr, [x30, x0, sxtx #2]"       },
+
+    { 0x787f4be0, "ldrh w0, [sp, wzr, uxtw #0]"       },
+    { 0x787ed80f, "ldrh w15, [x0, w30, sxtw #1]"      },
+    { 0x786fc9fe, "ldrh w30, [x15, w15, sxtw #0]"     },
+    { 0x78605bdf, "ldrh wzr, [x30, w0, uxtw #1]"      },
+    { 0x787febe0, "ldrh w0, [sp, xzr, sxtx #0]"       },
+    { 0x787e780f, "ldrh w15, [x0, x30, lsl #1]"       },
+    { 0x786f69fe, "ldrh w30, [x15, x15, lsl #0]"      },
+    { 0x7860fbdf, "ldrh wzr, [x30, x0, sxtx #1]"      },
+
+    { 0x783f4be0, "strh w0, [sp, wzr, uxtw #0]"       },
+    { 0x783ed80f, "strh w15, [x0, w30, sxtw #1]"      },
+    { 0x782fc9fe, "strh w30, [x15, w15, sxtw #0]"     },
+    { 0x78205bdf, "strh wzr, [x30, w0, uxtw #1]"      },
+    { 0x783febe0, "strh w0, [sp, xzr, sxtx #0]"       },
+    { 0x783e780f, "strh w15, [x0, x30, lsl #1]"       },
+    { 0x782f69fe, "strh w30, [x15, x15, lsl #0]"      },
+    { 0x7820fbdf, "strh wzr, [x30, x0, sxtx #1]"      },
+
+    { 0x387f5be0, "ldrb w0, [sp, wzr, uxtw #0]"       },
+    { 0x387ec80f, "ldrb w15, [x0, w30, sxtw ]"        },
+    { 0x386fd9fe, "ldrb w30, [x15, w15, sxtw #0]"     },
+    { 0x38604bdf, "ldrb wzr, [x30, w0, uxtw ]"        },
+    { 0x387ffbe0, "ldrb w0, [sp, xzr, sxtx #0]"       },
+    { 0x387e780f, "ldrb w15, [x0, x30, lsl #0]"       },
+    { 0x386f79fe, "ldrb w30, [x15, x15, lsl #0]"      },
+    { 0x3860ebdf, "ldrb wzr, [x30, x0, sxtx ]"        },
+
+    { 0x383f5be0, "strb w0, [sp, wzr, uxtw #0]"       },
+    { 0x383ec80f, "strb w15, [x0, w30, sxtw ]"        },
+    { 0x382fd9fe, "strb w30, [x15, w15, sxtw #0]"     },
+    { 0x38204bdf, "strb wzr, [x30, w0, uxtw ]"        },
+    { 0x383ffbe0, "strb w0, [sp, xzr, sxtx #0]"       },
+    { 0x383e780f, "strb w15, [x0, x30, lsl #0]"       },
+    { 0x382f79fe, "strb w30, [x15, x15, lsl #0]"      },
+    { 0x3820ebdf, "strb wzr, [x30, x0, sxtx ]"        },
+
+    { 0xf87f4be0, "ldr x0, [sp, wzr, uxtw #0]"        },
+    { 0xf87ed80f, "ldr x15, [x0, w30, sxtw #3]"       },
+    { 0xf86fc9fe, "ldr x30, [x15, w15, sxtw #0]"      },
+    { 0xf8605bdf, "ldr xzr, [x30, w0, uxtw #3]"       },
+    { 0xf87febe0, "ldr x0, [sp, xzr, sxtx #0]"        },
+    { 0xf87e780f, "ldr x15, [x0, x30, lsl #3]"        },
+    { 0xf86f69fe, "ldr x30, [x15, x15, lsl #0]"       },
+    { 0xf860fbdf, "ldr xzr, [x30, x0, sxtx #3]"       },
+
+    { 0xf83f4be0, "str x0, [sp, wzr, uxtw #0]"        },
+    { 0xf83ed80f, "str x15, [x0, w30, sxtw #3]"       },
+    { 0xf82fc9fe, "str x30, [x15, w15, sxtw #0]"      },
+    { 0xf8205bdf, "str xzr, [x30, w0, uxtw #3]"       },
+    { 0xf83febe0, "str x0, [sp, xzr, sxtx #0]"        },
+    { 0xf83e780f, "str x15, [x0, x30, lsl #3]"        },
+    { 0xf82f69fe, "str x30, [x15, x15, lsl #0]"       },
+    { 0xf820fbdf, "str xzr, [x30, x0, sxtx #3]"       },
+
+    { 0xb85007e0, "ldr w0, [sp], #-256"               },
+    { 0xb840040f, "ldr w15, [x0], #0"                 },
+    { 0xb84015fe, "ldr w30, [x15], #1"                },
+    { 0xb84ff7df, "ldr wzr, [x30], #255"              },
+    { 0xb8100fe0, "str w0, [sp, #-256]!"              },
+    { 0xb8000c0f, "str w15, [x0, #0]!"                },
+    { 0xb8001dfe, "str w30, [x15, #1]!"               },
+    { 0xb80fffdf, "str wzr, [x30, #255]!"             },
+
+    { 0x13017be0, "sbfm w0, wzr, #1, #30"             },
+    { 0x131e7fcf, "sbfm w15, w30, #30, #31"           },
+    { 0x131f01fe, "sbfm w30, w15, #31, #0"            },
+    { 0x1300041f, "sbfm wzr, w0, #0, #1"              },
+
+    { 0x53017be0, "ubfm w0, wzr, #1, #30"             },
+    { 0x531e7fcf, "ubfm w15, w30, #30, #31"           },
+    { 0x531f01fe, "ubfm w30, w15, #31, #0"            },
+    { 0x5300041f, "ubfm wzr, w0, #0, #1"              },
+    { 0xd3417fe0, "ubfm x0, xzr, #1, #31"             },
+    { 0xd35fffcf, "ubfm x15, x30, #31, #63"           },
+    { 0xd35f01fe, "ubfm x30, x15, #31, #0"            },
+    { 0xd340041f, "ubfm xzr, x0, #0, #1"              },
+
+    { 0x139e7be0, "extr w0, wzr, w30, #30"            },
+    { 0x138f7fcf, "extr w15, w30, w15, #31"           },
+    { 0x138001fe, "extr w30, w15, w0, #0"             },
+    { 0x139f041f, "extr wzr, w0, wzr, #1"             },
+
+    { 0x54000020, "b.eq #.+4"                         },
+    { 0x54000201, "b.ne #.+64"                        },
+    { 0x54000802, "b.cs #.+256"                       },
+    { 0x54002003, "b.cc #.+1024"                      },
+    { 0x54008004, "b.mi #.+4096"                      },
+    { 0x54ffffe5, "b.pl #.-4"                         },
+    { 0x54ffff06, "b.vs #.-32"                        },
+    { 0x54fffc07, "b.vc #.-128"                       },
+    { 0x54fff008, "b.hi #.-512"                       },
+    { 0x54000049, "b.ls #.+8"                         },
+    { 0x5400006a, "b.ge #.+12"                        },
+    { 0x5400008b, "b.lt #.+16"                        },
+    { 0x54ffffcc, "b.gt #.-8"                         },
+    { 0x54ffffad, "b.le #.-12"                        },
+    { 0x54ffff8e, "b.al #.-16"                        },
+
+    { 0x8b2001e0, "add x0, x15, w0, uxtb #0"          },
+    { 0x8b2f27cf, "add x15, x30, w15, uxth #1"        },
+    { 0x8b3e4bfe, "add x30, sp, w30, uxtw #2"         },
+    { 0x8b3f6c1f, "add sp, x0, xzr, uxtx #3"          },
+    { 0x8b2091e0, "add x0, x15, w0, sxtb #4"          },
+    { 0x8b2fa3cf, "add x15, x30, w15, sxth #0"        },
+    { 0x8b3ec7fe, "add x30, sp, w30, sxtw #1"         },
+    { 0x8b3fe81f, "add sp, x0, xzr, sxtx #2"          },
+
+    { 0xcb2001e0, "sub x0, x15, w0, uxtb #0"          },
+    { 0xcb2f27cf, "sub x15, x30, w15, uxth #1"        },
+    { 0xcb3e4bfe, "sub x30, sp, w30, uxtw #2"         },
+    { 0xcb3f6c1f, "sub sp, x0, xzr, uxtx #3"          },
+    { 0xcb2091e0, "sub x0, x15, w0, sxtb #4"          },
+    { 0xcb2fa3cf, "sub x15, x30, w15, sxth #0"        },
+    { 0xcb3ec7fe, "sub x30, sp, w30, sxtw #1"         },
+    { 0xcb3fe81f, "sub sp, x0, xzr, sxtx #2"          }
+};
+
+int main()
+{
+    char instr[256];
+    uint32_t failed = 0;
+    for(uint32_t i = 0; i < sizeof(test_table)/sizeof(test_table_entry_t); ++i)
+    {
+        test_table_entry_t *test;
+        test = &test_table[i];
+        aarch64_disassemble(test->code, instr);
+        if(strcmp(instr, test->instr) != 0)
+        {
+            printf("Test Failed \n"
+                   "Code     : 0x%0x\n"
+                   "Expected : %s\n"
+                   "Actual   : %s\n", test->code, test->instr, instr);
+            failed++;
+        }
+    }
+    if(failed == 0)
+    {
+        printf("All tests PASSED\n");
+        return 0;
+    }
+    else
+    {
+        printf("%d tests FAILED\n", failed);
+        return -1;
+    }
+}
diff --git a/libpixelflinger/tests/arch-aarch64/t32cb16blend/Android.mk b/libpixelflinger/tests/arch-aarch64/t32cb16blend/Android.mk
new file mode 100644
index 0000000..a67f0e3
--- /dev/null
+++ b/libpixelflinger/tests/arch-aarch64/t32cb16blend/Android.mk
@@ -0,0 +1,16 @@
+LOCAL_PATH:= $(call my-dir)
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES:= \
+    t32cb16blend_test.c \
+    ../../../arch-aarch64/t32cb16blend.S
+
+LOCAL_SHARED_LIBRARIES :=
+
+LOCAL_C_INCLUDES :=
+
+LOCAL_MODULE:= test-pixelflinger-aarch64-t32cb16blend
+
+LOCAL_MODULE_TAGS := tests
+
+include $(BUILD_EXECUTABLE)
diff --git a/libpixelflinger/tests/arch-aarch64/t32cb16blend/t32cb16blend_test.c b/libpixelflinger/tests/arch-aarch64/t32cb16blend/t32cb16blend_test.c
new file mode 100644
index 0000000..bcde3e6
--- /dev/null
+++ b/libpixelflinger/tests/arch-aarch64/t32cb16blend/t32cb16blend_test.c
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+
+#define ARGB_8888_MAX   0xFFFFFFFF
+#define ARGB_8888_MIN   0x00000000
+#define RGB_565_MAX     0xFFFF
+#define RGB_565_MIN     0x0000
+
+struct test_t
+{
+    char name[256];
+    uint32_t src_color;
+    uint16_t dst_color;
+    size_t count;
+};
+
+struct test_t tests[] =
+{
+    {"Count 0", 0, 0, 0},
+    {"Count 1, Src=Max, Dst=Min", ARGB_8888_MAX, RGB_565_MIN, 1},
+    {"Count 2, Src=Min, Dst=Max", ARGB_8888_MIN, RGB_565_MAX, 2},
+    {"Count 3, Src=Max, Dst=Max", ARGB_8888_MAX, RGB_565_MAX, 3},
+    {"Count 4, Src=Min, Dst=Min", ARGB_8888_MAX, RGB_565_MAX, 4},
+    {"Count 1, Src=Rand, Dst=Rand", 0x12345678, 0x9ABC, 1},
+    {"Count 2, Src=Rand, Dst=Rand", 0xABCDEF12, 0x2345, 2},
+    {"Count 3, Src=Rand, Dst=Rand", 0x11111111, 0xEDFE, 3},
+    {"Count 4, Src=Rand, Dst=Rand", 0x12345678, 0x9ABC, 4},
+    {"Count 5, Src=Rand, Dst=Rand", 0xEFEFFEFE, 0xFACC, 5},
+    {"Count 10, Src=Rand, Dst=Rand", 0x12345678, 0x9ABC, 10}
+
+};
+
+void scanline_t32cb16blend_aarch64(uint16_t*, uint32_t*, size_t);
+void scanline_t32cb16blend_c(uint16_t * dst, uint32_t* src, size_t count)
+{
+    while (count--)
+    {
+        uint16_t d = *dst;
+        uint32_t s = *src++;
+        int dstR = (d>>11)&0x1f;
+        int dstG = (d>>5)&0x3f;
+        int dstB = (d)&0x1f;
+        int srcR = (s >> (   3))&0x1F;
+        int srcG = (s >> ( 8+2))&0x3F;
+        int srcB = (s >> (16+3))&0x1F;
+        int srcAlpha = (s>>24) & 0xFF;
+
+
+        int f = 0x100 - (srcAlpha + ((srcAlpha>>7) & 0x1));
+        srcR += (f*dstR)>>8;
+        srcG += (f*dstG)>>8;
+        srcB += (f*dstB)>>8;
+        srcR = srcR > 0x1F? 0x1F: srcR;
+        srcG = srcG > 0x3F? 0x3F: srcG;
+        srcB = srcB > 0x1F? 0x1F: srcB;
+        *dst++ = (uint16_t)((srcR<<11)|(srcG<<5)|srcB);
+    }
+}
+
+void scanline_t32cb16blend_test()
+{
+    uint16_t dst_c[16], dst_asm[16];
+    uint32_t src[16];
+    uint32_t i;
+    uint32_t  j;
+
+    for(i = 0; i < sizeof(tests)/sizeof(struct test_t); ++i)
+    {
+        struct test_t test = tests[i];
+
+        printf("Testing - %s:",test.name);
+
+        memset(dst_c, 0, sizeof(dst_c));
+        memset(dst_asm, 0, sizeof(dst_asm));
+
+        for(j = 0; j < test.count; ++j)
+        {
+            dst_c[j]   = test.dst_color;
+            dst_asm[j] = test.dst_color;
+            src[j] = test.src_color;
+        }
+
+        scanline_t32cb16blend_c(dst_c,src,test.count);
+        scanline_t32cb16blend_aarch64(dst_asm,src,test.count);
+
+
+        if(memcmp(dst_c, dst_asm, sizeof(dst_c)) == 0)
+            printf("Passed\n");
+        else
+            printf("Failed\n");
+
+        for(j = 0; j < test.count; ++j)
+        {
+            printf("dst_c[%d] = %x, dst_asm[%d] = %x \n", j, dst_c[j], j, dst_asm[j]);
+        }
+    }
+}
+
+int main()
+{
+    scanline_t32cb16blend_test();
+    return 0;
+}
diff --git a/libpixelflinger/tests/codegen/codegen.cpp b/libpixelflinger/tests/codegen/codegen.cpp
index 3d5a040..e8a4f5e 100644
--- a/libpixelflinger/tests/codegen/codegen.cpp
+++ b/libpixelflinger/tests/codegen/codegen.cpp
@@ -10,8 +10,9 @@
 #include "codeflinger/GGLAssembler.h"
 #include "codeflinger/ARMAssembler.h"
 #include "codeflinger/MIPSAssembler.h"
+#include "codeflinger/Aarch64Assembler.h"
 
-#if defined(__arm__) || defined(__mips__)
+#if defined(__arm__) || defined(__mips__) || defined(__aarch64__)
 #   define ANDROID_ARM_CODEGEN  1
 #else
 #   define ANDROID_ARM_CODEGEN  0
@@ -19,6 +20,8 @@
 
 #if defined (__mips__)
 #define ASSEMBLY_SCRATCH_SIZE   4096
+#elif defined(__aarch64__)
+#define ASSEMBLY_SCRATCH_SIZE   8192
 #else
 #define ASSEMBLY_SCRATCH_SIZE   2048
 #endif
@@ -53,13 +56,17 @@
     GGLAssembler assembler( new ArmToMipsAssembler(a) );
 #endif
 
+#if defined(__aarch64__)
+    GGLAssembler assembler( new ArmToAarch64Assembler(a) );
+#endif
+
     int err = assembler.scanline(needs, (context_t*)c);
     if (err != 0) {
         printf("error %08x (%s)\n", err, strerror(-err));
     }
     gglUninit(c);
 #else
-    printf("This test runs only on ARM or MIPS\n");
+    printf("This test runs only on ARM, Aarch64 or MIPS\n");
 #endif
 }
 
diff --git a/libpixelflinger/tests/gglmul/Android.mk b/libpixelflinger/tests/gglmul/Android.mk
new file mode 100644
index 0000000..64f88b7
--- /dev/null
+++ b/libpixelflinger/tests/gglmul/Android.mk
@@ -0,0 +1,16 @@
+LOCAL_PATH:= $(call my-dir)
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES:= \
+	gglmul_test.cpp
+
+LOCAL_SHARED_LIBRARIES :=
+
+LOCAL_C_INCLUDES := \
+	system/core/libpixelflinger
+
+LOCAL_MODULE:= test-pixelflinger-gglmul
+
+LOCAL_MODULE_TAGS := tests
+
+include $(BUILD_EXECUTABLE)
diff --git a/libpixelflinger/tests/gglmul/gglmul_test.cpp b/libpixelflinger/tests/gglmul/gglmul_test.cpp
new file mode 100644
index 0000000..103e4e9
--- /dev/null
+++ b/libpixelflinger/tests/gglmul/gglmul_test.cpp
@@ -0,0 +1,279 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+
+#include "private/pixelflinger/ggl_fixed.h"
+
+// gglClampx() tests
+struct gglClampx_test_t
+{
+    GGLfixed input;
+    GGLfixed output;
+};
+
+gglClampx_test_t gglClampx_tests[] =
+{
+    {FIXED_ONE + 1, FIXED_ONE},
+    {FIXED_ONE, FIXED_ONE},
+    {FIXED_ONE - 1, FIXED_ONE - 1},
+    {1, 1},
+    {0, 0},
+    {FIXED_MIN,0}
+};
+
+void gglClampx_test()
+{
+    uint32_t i;
+
+    printf("Testing gglClampx\n");
+    for(i = 0; i < sizeof(gglClampx_tests)/sizeof(gglClampx_test_t); ++i)
+    {
+        gglClampx_test_t *test = &gglClampx_tests[i];
+        printf("Test input=0x%08x output=0x%08x :",
+                test->input, test->output);
+        if(gglClampx(test->input) == test->output)
+            printf("Passed\n");
+        else
+            printf("Failed\n");
+    }
+}
+
+// gglClz() tests
+struct gglClz_test_t
+{
+    GGLfixed input;
+    GGLfixed output;
+};
+
+gglClz_test_t gglClz_tests[] =
+{
+    {0, 32},
+    {1, 31},
+    {-1,0}
+};
+
+void gglClz_test()
+{
+    uint32_t i;
+
+    printf("Testing gglClz\n");
+    for(i = 0; i < sizeof(gglClz_tests)/sizeof(gglClz_test_t); ++i)
+    {
+        gglClz_test_t *test = &gglClz_tests[i];
+        printf("Test input=0x%08x output=%2d :", test->input, test->output);
+        if(gglClz(test->input) == test->output)
+            printf("Passed\n");
+        else
+            printf("Failed\n");
+    }
+}
+
+// gglMulx() tests
+struct gglMulx_test_t
+{
+    GGLfixed x;
+    GGLfixed y;
+    int      shift;
+};
+
+gglMulx_test_t gglMulx_tests[] =
+{
+    {1,1,1},
+    {0,1,1},
+    {FIXED_ONE,FIXED_ONE,16},
+    {FIXED_MIN,FIXED_MAX,16},
+    {FIXED_MAX,FIXED_MAX,16},
+    {FIXED_MIN,FIXED_MIN,16},
+    {FIXED_HALF,FIXED_ONE,16},
+    {FIXED_MAX,FIXED_MAX,31},
+    {FIXED_ONE,FIXED_MAX,31}
+};
+
+void gglMulx_test()
+{
+    uint32_t i;
+    GGLfixed actual, expected;
+
+    printf("Testing gglMulx\n");
+    for(i = 0; i < sizeof(gglMulx_tests)/sizeof(gglMulx_test_t); ++i)
+    {
+        gglMulx_test_t *test = &gglMulx_tests[i];
+        printf("Test x=0x%08x y=0x%08x shift=%2d :",
+                test->x, test->y, test->shift);
+        actual = gglMulx(test->x, test->y, test->shift);
+        expected =
+          ((int64_t)test->x * test->y + (1 << (test->shift-1))) >> test->shift;
+    if(actual == expected)
+        printf(" Passed\n");
+    else
+        printf(" Failed Actual(0x%08x) Expected(0x%08x)\n",
+               actual, expected);
+    }
+}
+// gglMulAddx() tests
+struct gglMulAddx_test_t
+{
+    GGLfixed x;
+    GGLfixed y;
+    int      shift;
+    GGLfixed a;
+};
+
+gglMulAddx_test_t gglMulAddx_tests[] =
+{
+    {1,2,1,1},
+    {0,1,1,1},
+    {FIXED_ONE,FIXED_ONE,16, 0},
+    {FIXED_MIN,FIXED_MAX,16, FIXED_HALF},
+    {FIXED_MAX,FIXED_MAX,16, FIXED_MIN},
+    {FIXED_MIN,FIXED_MIN,16, FIXED_MAX},
+    {FIXED_HALF,FIXED_ONE,16,FIXED_ONE},
+    {FIXED_MAX,FIXED_MAX,31, FIXED_HALF},
+    {FIXED_ONE,FIXED_MAX,31, FIXED_HALF}
+};
+
+void gglMulAddx_test()
+{
+    uint32_t i;
+    GGLfixed actual, expected;
+
+    printf("Testing gglMulAddx\n");
+    for(i = 0; i < sizeof(gglMulAddx_tests)/sizeof(gglMulAddx_test_t); ++i)
+    {
+        gglMulAddx_test_t *test = &gglMulAddx_tests[i];
+        printf("Test x=0x%08x y=0x%08x shift=%2d a=0x%08x :",
+                test->x, test->y, test->shift, test->a);
+        actual = gglMulAddx(test->x, test->y,test->a, test->shift);
+        expected = (((int64_t)test->x * test->y) >> test->shift) + test->a;
+
+        if(actual == expected)
+            printf(" Passed\n");
+        else
+            printf(" Failed Actual(0x%08x) Expected(0x%08x)\n",
+                    actual, expected);
+    }
+}
+// gglMulSubx() tests
+struct gglMulSubx_test_t
+{
+    GGLfixed x;
+    GGLfixed y;
+    int      shift;
+    GGLfixed a;
+};
+
+gglMulSubx_test_t gglMulSubx_tests[] =
+{
+    {1,2,1,1},
+    {0,1,1,1},
+    {FIXED_ONE,FIXED_ONE,16, 0},
+    {FIXED_MIN,FIXED_MAX,16, FIXED_HALF},
+    {FIXED_MAX,FIXED_MAX,16, FIXED_MIN},
+    {FIXED_MIN,FIXED_MIN,16, FIXED_MAX},
+    {FIXED_HALF,FIXED_ONE,16,FIXED_ONE},
+    {FIXED_MAX,FIXED_MAX,31, FIXED_HALF},
+    {FIXED_ONE,FIXED_MAX,31, FIXED_HALF}
+};
+
+void gglMulSubx_test()
+{
+    uint32_t i;
+    GGLfixed actual, expected;
+
+    printf("Testing gglMulSubx\n");
+    for(i = 0; i < sizeof(gglMulSubx_tests)/sizeof(gglMulSubx_test_t); ++i)
+    {
+        gglMulSubx_test_t *test = &gglMulSubx_tests[i];
+        printf("Test x=0x%08x y=0x%08x shift=%2d a=0x%08x :",
+                test->x, test->y, test->shift, test->a);
+        actual = gglMulSubx(test->x, test->y, test->a, test->shift);
+        expected = (((int64_t)test->x * test->y) >> test->shift) - test->a;
+
+        if(actual == expected)
+            printf(" Passed\n");
+        else
+            printf(" Failed Actual(0x%08x) Expected(0x%08x)\n",
+                actual, expected);
+    }
+}
+
+// gglMulii() tests
+const int32_t INT32_MAX = 0x7FFFFFFF;
+const int32_t INT32_MIN = 0x80000000;
+
+struct gglMulii_test_t
+{
+    int32_t x;
+    int32_t y;
+};
+
+gglMulii_test_t gglMulii_tests[] =
+{
+    {1,INT32_MIN},
+    {1,INT32_MAX},
+    {0,INT32_MIN},
+    {0,INT32_MAX},
+    {INT32_MIN, INT32_MAX},
+    {INT32_MAX, INT32_MIN},
+    {INT32_MIN, INT32_MIN},
+    {INT32_MAX, INT32_MAX}
+};
+
+void gglMulii_test()
+{
+    uint32_t i;
+    int64_t actual, expected;
+
+    printf("Testing gglMulii\n");
+    for(i = 0; i < sizeof(gglMulii_tests)/sizeof(gglMulii_test_t); ++i)
+    {
+        gglMulii_test_t *test = &gglMulii_tests[i];
+        printf("Test x=0x%08x y=0x%08x :", test->x, test->y);
+        actual = gglMulii(test->x, test->y);
+        expected = ((int64_t)test->x * test->y);
+
+        if(actual == expected)
+            printf(" Passed\n");
+        else
+            printf(" Failed Actual(%ld) Expected(%ld)\n",
+                    actual, expected);
+    }
+}
+
+int main(int argc, char** argv)
+{
+    gglClampx_test();
+    gglClz_test();
+    gglMulx_test();
+    gglMulAddx_test();
+    gglMulSubx_test();
+    gglMulii_test();
+    return 0;
+}