Merge "Add stub for x86 & x86_64."
diff --git a/cpu_ref/linkloader/include/impl/ELFObject.hxx b/cpu_ref/linkloader/include/impl/ELFObject.hxx
index 412fded..b06ccd0 100644
--- a/cpu_ref/linkloader/include/impl/ELFObject.hxx
+++ b/cpu_ref/linkloader/include/impl/ELFObject.hxx
@@ -506,7 +506,6 @@
 #undef SIGN_EXTEND
 
         void *callee_addr = sym->getAddress(EM_AARCH64);
-        bool try_direct = false;        // Try to use a direct call?
         bool call_via_stub = false;     // Call via a stub (linker veneer).
 
         switch (sym->getType()) {
@@ -524,7 +523,6 @@
                    "sym->getAddress(EM_ARM) function call.");
             abort();
           }
-          try_direct = true;
           break;
 
         case STT_NOTYPE:
@@ -541,16 +539,13 @@
           break;
         }
 
-        uint32_t result = 0;
+        S = reinterpret_cast<int64_t>(callee_addr);
+        uint32_t result = (S + A - P) >> 2;
+
         // See if we can do the branch without a stub.
-        if (try_direct) {
-          S = reinterpret_cast<int64_t>(callee_addr);
-          result = (S + A - P) >> 2;
-          call_via_stub = false;        // Assume it's in range.
-          if (result > 0x01FFFFFF && result < 0xFE000000) {
-            // Not in range, need a stub.
-            call_via_stub = true;
-          }
+        if (result > 0x01FFFFFF && result < 0xFE000000) {
+          // Not in range, need a stub.
+          call_via_stub = true;
         }
 
         // Calling via a stub makes a BL instruction to a stub containing the following code:
@@ -567,6 +562,7 @@
           StubLayout *stub_layout = text->getStubLayout();
 
           if (!stub_layout) {
+            __android_log_print(ANDROID_LOG_ERROR, "rs", "unable to get stub layout\n");
             llvm::errs() << "unable to get stub layout." << "\n";
             abort();
           }
@@ -574,6 +570,7 @@
           void *stub = stub_layout->allocateStub(callee_addr);
 
           if (!stub) {
+            __android_log_print(ANDROID_LOG_ERROR, "rs", "unable to get allocate stub\n");
             llvm::errs() << "unable to allocate stub." << "\n";
             abort();
           }
@@ -584,6 +581,7 @@
           result = (S + A - P) >> 2;
 
           if (result > 0x01FFFFFF && result < 0xFE000000) {
+            __android_log_print(ANDROID_LOG_ERROR, "rs", "stub is still too far\n");
             rsl_assert(0 && "Stub is still too far");
             abort();
           }
@@ -591,7 +589,7 @@
 
         // 'result' contains the offset from PC to the destination address, encoded
         // in the correct form for the BL or B instructions.
-        *inst32 = ((result) & 0x03FFFFFF) | (*inst & 0xFC000000);
+        *inst32 = (result & 0x03FFFFFF) | (*inst & 0xFC000000);
       }
       break;
     case R_AARCH64_MOVW_UABS_G0:
diff --git a/cpu_ref/linkloader/include/impl/ELFSectionProgBits.hxx b/cpu_ref/linkloader/include/impl/ELFSectionProgBits.hxx
index 434e5b5..cdec650 100644
--- a/cpu_ref/linkloader/include/impl/ELFSectionProgBits.hxx
+++ b/cpu_ref/linkloader/include/impl/ELFSectionProgBits.hxx
@@ -43,16 +43,19 @@
   StubLayout *stubs = result->getStubLayout();
   if (stubs) {
     // Compute the maximal possible numbers of stubs
-    std::string reltab_name(".rel" + std::string(sh->getName()));
+    max_num_stubs = 0;
+    for (const char* prefix : {".rel", ".rela"}) {
+      std::string reltab_name(prefix + std::string(sh->getName()));
 
-    ELFSectionRelTableTy const *reltab =
-      static_cast<ELFSectionRelTableTy *>(
-        owner->getSectionByName(reltab_name.c_str()));
+      ELFSectionRelTableTy const *reltab =
+        static_cast<ELFSectionRelTableTy *>(
+          owner->getSectionByName(reltab_name.c_str()));
 
-    if (reltab) {
-      // If we have relocation table, then get the approximation of
-      // maximum numbers of stubs.
-      max_num_stubs = reltab->getMaxNumStubs(owner);
+      if (reltab) {
+        // If we have relocation table, then get the approximation of
+        // maximum numbers of stubs.
+        max_num_stubs += reltab->getMaxNumStubs(owner);
+      }
     }
 
     // Compute the stub table size
diff --git a/driver/runtime/Android.mk b/driver/runtime/Android.mk
index fa28a32..cec21d4 100755
--- a/driver/runtime/Android.mk
+++ b/driver/runtime/Android.mk
@@ -82,6 +82,7 @@
 LOCAL_SRC_FILES_64 := $(clcore_files_64)
 
 include $(LOCAL_PATH)/build_bc_lib.mk
+rs_debug_runtime :=
 
 # Build an optimized version of the library for x86 platforms (all have SSE2/3).
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),x86 x86_64))
diff --git a/driver/runtime/build_bc_lib_internal.mk b/driver/runtime/build_bc_lib_internal.mk
index efb6bb6..51e614b 100644
--- a/driver/runtime/build_bc_lib_internal.mk
+++ b/driver/runtime/build_bc_lib_internal.mk
@@ -50,7 +50,6 @@
 ifeq ($(rs_debug_runtime),1)
     bc_cflags += -DRS_DEBUG_RUNTIME
 endif
-rs_debug_runtime:=
 
 bc_src_files := $(LOCAL_SRC_FILES)
 bc_src_files += $(LOCAL_SRC_FILES_$(TARGET_$(LOCAL_2ND_ARCH_VAR_PREFIX)ARCH)) $(LOCAL_SRC_FILES_$(my_32_64_bit_suffix))
diff --git a/driver/runtime/ll64/allocation.ll b/driver/runtime/ll64/allocation.ll
index 6bb5877..fcbf0f2 100644
--- a/driver/runtime/ll64/allocation.ll
+++ b/driver/runtime/ll64/allocation.ll
@@ -1,8 +1,10 @@
 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64-none-linux-gnueabi"
 
-declare i8* @rsOffset([1 x i64] %a.coerce, i32 %sizeOf, i32 %x, i32 %y, i32 %z)
-declare i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z)
+%struct.rs_allocation = type { i64*, i64*, i64*, i64* }
+
+declare i8* @rsOffset(%struct.rs_allocation* %a, i32 %sizeOf, i32 %x, i32 %y, i32 %z)
+declare i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z)
 
 ; The loads and stores in this file are annotated with RenderScript-specific
 ; information for the type based alias analysis, such that the TBAA analysis
@@ -28,44 +30,44 @@
 !15 = metadata !{metadata !"allocation", metadata !14}
 
 !21 = metadata !{metadata !"char", metadata !15}
-define void @rsSetElementAtImpl_char([1 x i64] %a.coerce, i8 signext %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 1, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_char(%struct.rs_allocation* %a, i8 signext %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 1, i32 %x, i32 %y, i32 %z) #10
   store i8 %val, i8* %1, align 1, !tbaa !21
   ret void
 }
 
-define signext i8 @rsGetElementAtImpl_char([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 1, i32 %x, i32 %y, i32 %z) #10
+define signext i8 @rsGetElementAtImpl_char(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 1, i32 %x, i32 %y, i32 %z) #10
   %2 = load i8* %1, align 1, !tbaa !21
   ret i8 %2
 }
 
 !22 = metadata !{metadata !"char2", metadata !15}
-define void @rsSetElementAtImpl_char2([1 x i64] %a.coerce, <2 x i8> %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 2, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_char2(%struct.rs_allocation* %a, <2 x i8> %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i8>*
   store <2 x i8> %val, <2 x i8>* %2, align 2, !tbaa !22
   ret void
 }
 
-define <2 x i8> @rsGetElementAtImpl_char2([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 2, i32 %x, i32 %y, i32 %z) #10
+define <2 x i8> @rsGetElementAtImpl_char2(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i8>*
   %3 = load <2 x i8>* %2, align 2, !tbaa !22
   ret <2 x i8> %3
 }
 
 !23 = metadata !{metadata !"char3", metadata !15}
-define void @rsSetElementAtImpl_char3([1 x i64] %a.coerce, <3 x i8> %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_char3(%struct.rs_allocation* %a, <3 x i8> %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #10
   %2 = shufflevector <3 x i8> %val, <3 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
   %3 = bitcast i8* %1 to <4 x i8>*
   store <4 x i8> %2, <4 x i8>* %3, align 4, !tbaa !23
   ret void
 }
 
-define <3 x i8> @rsGetElementAtImpl_char3([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #10
+define <3 x i8> @rsGetElementAtImpl_char3(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i8>*
   %3 = load <4 x i8>* %2, align 4, !tbaa !23
   %4 = shufflevector <4 x i8> %3, <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
@@ -73,59 +75,59 @@
 }
 
 !24 = metadata !{metadata !"char4", metadata !15}
-define void @rsSetElementAtImpl_char4([1 x i64] %a.coerce, <4 x i8> %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_char4(%struct.rs_allocation* %a, <4 x i8> %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i8>*
   store <4 x i8> %val, <4 x i8>* %2, align 4, !tbaa !24
   ret void
 }
 
-define <4 x i8> @rsGetElementAtImpl_char4([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #10
+define <4 x i8> @rsGetElementAtImpl_char4(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i8>*
   %3 = load <4 x i8>* %2, align 4, !tbaa !24
   ret <4 x i8> %3
 }
 
 !25 = metadata !{metadata !"uchar", metadata !15}
-define void @rsSetElementAtImpl_uchar([1 x i64] %a.coerce, i8 zeroext %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 1, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_uchar(%struct.rs_allocation* %a, i8 zeroext %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 1, i32 %x, i32 %y, i32 %z) #10
   store i8 %val, i8* %1, align 1, !tbaa !25
   ret void
 }
 
-define zeroext i8 @rsGetElementAtImpl_uchar([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 1, i32 %x, i32 %y, i32 %z) #10
+define zeroext i8 @rsGetElementAtImpl_uchar(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 1, i32 %x, i32 %y, i32 %z) #10
   %2 = load i8* %1, align 1, !tbaa !25
   ret i8 %2
 }
 
 !26 = metadata !{metadata !"uchar2", metadata !15}
-define void @rsSetElementAtImpl_uchar2([1 x i64] %a.coerce, <2 x i8> %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 2, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_uchar2(%struct.rs_allocation* %a, <2 x i8> %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i8>*
   store <2 x i8> %val, <2 x i8>* %2, align 2, !tbaa !26
   ret void
 }
 
-define <2 x i8> @rsGetElementAtImpl_uchar2([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 2, i32 %x, i32 %y, i32 %z) #10
+define <2 x i8> @rsGetElementAtImpl_uchar2(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i8>*
   %3 = load <2 x i8>* %2, align 2, !tbaa !26
   ret <2 x i8> %3
 }
 
 !27 = metadata !{metadata !"uchar3", metadata !15}
-define void @rsSetElementAtImpl_uchar3([1 x i64] %a.coerce, <3 x i8> %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_uchar3(%struct.rs_allocation* %a, <3 x i8> %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #10
   %2 = shufflevector <3 x i8> %val, <3 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
   %3 = bitcast i8* %1 to <4 x i8>*
   store <4 x i8> %2, <4 x i8>* %3, align 4, !tbaa !27
   ret void
 }
 
-define <3 x i8> @rsGetElementAtImpl_uchar3([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #10
+define <3 x i8> @rsGetElementAtImpl_uchar3(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i8>*
   %3 = load <4 x i8>* %2, align 4, !tbaa !27
   %4 = shufflevector <4 x i8> %3, <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
@@ -133,61 +135,61 @@
 }
 
 !28 = metadata !{metadata !"uchar4", metadata !15}
-define void @rsSetElementAtImpl_uchar4([1 x i64] %a.coerce, <4 x i8> %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_uchar4(%struct.rs_allocation* %a, <4 x i8> %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i8>*
   store <4 x i8> %val, <4 x i8>* %2, align 4, !tbaa !28
   ret void
 }
 
-define <4 x i8> @rsGetElementAtImpl_uchar4([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #10
+define <4 x i8> @rsGetElementAtImpl_uchar4(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i8>*
   %3 = load <4 x i8>* %2, align 4, !tbaa !28
   ret <4 x i8> %3
 }
 
 !29 = metadata !{metadata !"short", metadata !15}
-define void @rsSetElementAtImpl_short([1 x i64] %a.coerce, i16 signext %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 2, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_short(%struct.rs_allocation* %a, i16 signext %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to i16*
   store i16 %val, i16* %2, align 2, !tbaa !29
   ret void
 }
 
-define signext i16 @rsGetElementAtImpl_short([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 2, i32 %x, i32 %y, i32 %z) #10
+define signext i16 @rsGetElementAtImpl_short(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to i16*
   %3 = load i16* %2, align 2, !tbaa !29
   ret i16 %3
 }
 
 !30 = metadata !{metadata !"short2", metadata !15}
-define void @rsSetElementAtImpl_short2([1 x i64] %a.coerce, <2 x i16> %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_short2(%struct.rs_allocation* %a, <2 x i16> %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i16>*
   store <2 x i16> %val, <2 x i16>* %2, align 4, !tbaa !30
   ret void
 }
 
-define <2 x i16> @rsGetElementAtImpl_short2([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #10
+define <2 x i16> @rsGetElementAtImpl_short2(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i16>*
   %3 = load <2 x i16>* %2, align 4, !tbaa !30
   ret <2 x i16> %3
 }
 
 !31 = metadata !{metadata !"short3", metadata !15}
-define void @rsSetElementAtImpl_short3([1 x i64] %a.coerce, <3 x i16> %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_short3(%struct.rs_allocation* %a, <3 x i16> %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #10
   %2 = shufflevector <3 x i16> %val, <3 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
   %3 = bitcast i8* %1 to <4 x i16>*
   store <4 x i16> %2, <4 x i16>* %3, align 8, !tbaa !31
   ret void
 }
 
-define <3 x i16> @rsGetElementAtImpl_short3([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #10
+define <3 x i16> @rsGetElementAtImpl_short3(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i16>*
   %3 = load <4 x i16>* %2, align 8, !tbaa !31
   %4 = shufflevector <4 x i16> %3, <4 x i16> undef, <3 x i32> <i32 0, i32 1, i32 2>
@@ -195,61 +197,61 @@
 }
 
 !32 = metadata !{metadata !"short4", metadata !15}
-define void @rsSetElementAtImpl_short4([1 x i64] %a.coerce, <4 x i16> %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_short4(%struct.rs_allocation* %a, <4 x i16> %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i16>*
   store <4 x i16> %val, <4 x i16>* %2, align 8, !tbaa !32
   ret void
 }
 
-define <4 x i16> @rsGetElementAtImpl_short4([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #10
+define <4 x i16> @rsGetElementAtImpl_short4(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i16>*
   %3 = load <4 x i16>* %2, align 8, !tbaa !32
   ret <4 x i16> %3
 }
 
 !33 = metadata !{metadata !"ushort", metadata !15}
-define void @rsSetElementAtImpl_ushort([1 x i64] %a.coerce, i16 zeroext %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 2, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_ushort(%struct.rs_allocation* %a, i16 zeroext %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to i16*
   store i16 %val, i16* %2, align 2, !tbaa !33
   ret void
 }
 
-define zeroext i16 @rsGetElementAtImpl_ushort([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 2, i32 %x, i32 %y, i32 %z) #10
+define zeroext i16 @rsGetElementAtImpl_ushort(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to i16*
   %3 = load i16* %2, align 2, !tbaa !33
   ret i16 %3
 }
 
 !34 = metadata !{metadata !"ushort2", metadata !15}
-define void @rsSetElementAtImpl_ushort2([1 x i64] %a.coerce, <2 x i16> %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_ushort2(%struct.rs_allocation* %a, <2 x i16> %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i16>*
   store <2 x i16> %val, <2 x i16>* %2, align 4, !tbaa !34
   ret void
 }
 
-define <2 x i16> @rsGetElementAtImpl_ushort2([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #10
+define <2 x i16> @rsGetElementAtImpl_ushort2(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i16>*
   %3 = load <2 x i16>* %2, align 4, !tbaa !34
   ret <2 x i16> %3
 }
 
 !35 = metadata !{metadata !"ushort3", metadata !15}
-define void @rsSetElementAtImpl_ushort3([1 x i64] %a.coerce, <3 x i16> %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_ushort3(%struct.rs_allocation* %a, <3 x i16> %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #10
   %2 = shufflevector <3 x i16> %val, <3 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
   %3 = bitcast i8* %1 to <4 x i16>*
   store <4 x i16> %2, <4 x i16>* %3, align 8, !tbaa !35
   ret void
 }
 
-define <3 x i16> @rsGetElementAtImpl_ushort3([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #10
+define <3 x i16> @rsGetElementAtImpl_ushort3(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i16>*
   %3 = load <4 x i16>* %2, align 8, !tbaa !35
   %4 = shufflevector <4 x i16> %3, <4 x i16> undef, <3 x i32> <i32 0, i32 1, i32 2>
@@ -257,61 +259,61 @@
 }
 
 !36 = metadata !{metadata !"ushort4", metadata !15}
-define void @rsSetElementAtImpl_ushort4([1 x i64] %a.coerce, <4 x i16> %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_ushort4(%struct.rs_allocation* %a, <4 x i16> %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i16>*
   store <4 x i16> %val, <4 x i16>* %2, align 8, !tbaa !36
   ret void
 }
 
-define <4 x i16> @rsGetElementAtImpl_ushort4([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #10
+define <4 x i16> @rsGetElementAtImpl_ushort4(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i16>*
   %3 = load <4 x i16>* %2, align 8, !tbaa !36
   ret <4 x i16> %3
 }
 
 !37 = metadata !{metadata !"int", metadata !15}
-define void @rsSetElementAtImpl_int([1 x i64] %a.coerce, i32 %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_int(%struct.rs_allocation* %a, i32 %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to i32*
   store i32 %val, i32* %2, align 4, !tbaa !37
   ret void
 }
 
-define i32 @rsGetElementAtImpl_int([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #10
+define i32 @rsGetElementAtImpl_int(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to i32*
   %3 = load i32* %2, align 4, !tbaa !37
   ret i32 %3
 }
 
 !38 = metadata !{metadata !"int2", metadata !15}
-define void @rsSetElementAtImpl_int2([1 x i64] %a.coerce, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_int2(%struct.rs_allocation* %a, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i32>*
   store <2 x i32> %val, <2 x i32>* %2, align 8, !tbaa !38
   ret void
 }
 
-define <2 x i32> @rsGetElementAtImpl_int2([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #10
+define <2 x i32> @rsGetElementAtImpl_int2(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i32>*
   %3 = load <2 x i32>* %2, align 8, !tbaa !38
   ret <2 x i32> %3
 }
 
 !39 = metadata !{metadata !"int3", metadata !15}
-define void @rsSetElementAtImpl_int3([1 x i64] %a.coerce, <3 x i32> %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_int3(%struct.rs_allocation* %a, <3 x i32> %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #10
   %2 = shufflevector <3 x i32> %val, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
   %3 = bitcast i8* %1 to <4 x i32>*
   store <4 x i32> %2, <4 x i32>* %3, align 16, !tbaa !39
   ret void
 }
 
-define <3 x i32> @rsGetElementAtImpl_int3([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #10
+define <3 x i32> @rsGetElementAtImpl_int3(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i32>*
   %3 = load <4 x i32>* %2, align 8, !tbaa !39
   %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
@@ -319,61 +321,61 @@
 }
 
 !40 = metadata !{metadata !"int4", metadata !15}
-define void @rsSetElementAtImpl_int4([1 x i64] %a.coerce, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_int4(%struct.rs_allocation* %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i32>*
   store <4 x i32> %val, <4 x i32>* %2, align 16, !tbaa !40
   ret void
 }
 
-define <4 x i32> @rsGetElementAtImpl_int4([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #10
+define <4 x i32> @rsGetElementAtImpl_int4(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i32>*
   %3 = load <4 x i32>* %2, align 16, !tbaa !40
   ret <4 x i32> %3
 }
 
 !41 = metadata !{metadata !"uint", metadata !15}
-define void @rsSetElementAtImpl_uint([1 x i64] %a.coerce, i32 %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_uint(%struct.rs_allocation* %a, i32 %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to i32*
   store i32 %val, i32* %2, align 4, !tbaa !41
   ret void
 }
 
-define i32 @rsGetElementAtImpl_uint([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #10
+define i32 @rsGetElementAtImpl_uint(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to i32*
   %3 = load i32* %2, align 4, !tbaa !41
   ret i32 %3
 }
 
 !42 = metadata !{metadata !"uint2", metadata !15}
-define void @rsSetElementAtImpl_uint2([1 x i64] %a.coerce, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_uint2(%struct.rs_allocation* %a, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i32>*
   store <2 x i32> %val, <2 x i32>* %2, align 8, !tbaa !42
   ret void
 }
 
-define <2 x i32> @rsGetElementAtImpl_uint2([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #10
+define <2 x i32> @rsGetElementAtImpl_uint2(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i32>*
   %3 = load <2 x i32>* %2, align 8, !tbaa !42
   ret <2 x i32> %3
 }
 
 !43 = metadata !{metadata !"uint3", metadata !15}
-define void @rsSetElementAtImpl_uint3([1 x i64] %a.coerce, <3 x i32> %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_uint3(%struct.rs_allocation* %a, <3 x i32> %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #10
   %2 = shufflevector <3 x i32> %val, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
   %3 = bitcast i8* %1 to <4 x i32>*
   store <4 x i32> %2, <4 x i32>* %3, align 16, !tbaa !43
   ret void
 }
 
-define <3 x i32> @rsGetElementAtImpl_uint3([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #10
+define <3 x i32> @rsGetElementAtImpl_uint3(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i32>*
   %3 = load <4 x i32>* %2, align 8, !tbaa !43
   %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
@@ -381,61 +383,61 @@
 }
 
 !44 = metadata !{metadata !"uint4", metadata !15}
-define void @rsSetElementAtImpl_uint4([1 x i64] %a.coerce, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_uint4(%struct.rs_allocation* %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i32>*
   store <4 x i32> %val, <4 x i32>* %2, align 16, !tbaa !44
   ret void
 }
 
-define <4 x i32> @rsGetElementAtImpl_uint4([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #10
+define <4 x i32> @rsGetElementAtImpl_uint4(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i32>*
   %3 = load <4 x i32>* %2, align 16, !tbaa !44
   ret <4 x i32> %3
 }
 
 !45 = metadata !{metadata !"long", metadata !15}
-define void @rsSetElementAtImpl_long([1 x i64] %a.coerce, i64 %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_long(%struct.rs_allocation* %a, i64 %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to i64*
   store i64 %val, i64* %2, align 8, !tbaa !45
   ret void
 }
 
-define i64 @rsGetElementAtImpl_long([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #10
+define i64 @rsGetElementAtImpl_long(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to i64*
   %3 = load i64* %2, align 8, !tbaa !45
   ret i64 %3
 }
 
 !46 = metadata !{metadata !"long2", metadata !15}
-define void @rsSetElementAtImpl_long2([1 x i64] %a.coerce, <2 x i64> %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_long2(%struct.rs_allocation* %a, <2 x i64> %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i64>*
   store <2 x i64> %val, <2 x i64>* %2, align 16, !tbaa !46
   ret void
 }
 
-define <2 x i64> @rsGetElementAtImpl_long2([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #10
+define <2 x i64> @rsGetElementAtImpl_long2(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i64>*
   %3 = load <2 x i64>* %2, align 16, !tbaa !46
   ret <2 x i64> %3
 }
 
 !47 = metadata !{metadata !"long3", metadata !15}
-define void @rsSetElementAtImpl_long3([1 x i64] %a.coerce, <3 x i64> %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_long3(%struct.rs_allocation* %a, <3 x i64> %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #10
   %2 = shufflevector <3 x i64> %val, <3 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
   %3 = bitcast i8* %1 to <4 x i64>*
   store <4 x i64> %2, <4 x i64>* %3, align 32, !tbaa !47
   ret void
 }
 
-define void @rsGetElementAtImpl_long3(<3 x i64>* noalias nocapture sret %agg.result, [1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #10
+define void @rsGetElementAtImpl_long3(<3 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i64>*
   %3 = load <4 x i64>* %2, align 32
   %4 = bitcast <3 x i64>* %agg.result to <4 x i64>*
@@ -444,15 +446,15 @@
 }
 
 !48 = metadata !{metadata !"long4", metadata !15}
-define void @rsSetElementAtImpl_long4([1 x i64] %a.coerce, <4 x i64> %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_long4(%struct.rs_allocation* %a, <4 x i64> %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i64>*
   store <4 x i64> %val, <4 x i64>* %2, align 32, !tbaa !48
   ret void
 }
 
-define void @rsGetElementAtImpl_long4(<4 x i64>* noalias nocapture sret %agg.result, [1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #10
+define void @rsGetElementAtImpl_long4(<4 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i64>*
   %3 = load <4 x i64>* %2, align 32, !tbaa !15
   store <4 x i64> %3, <4 x i64>* %agg.result, align 32, !tbaa !48
@@ -460,46 +462,46 @@
 }
 
 !49 = metadata !{metadata !"ulong", metadata !15}
-define void @rsSetElementAtImpl_ulong([1 x i64] %a.coerce, i64 %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_ulong(%struct.rs_allocation* %a, i64 %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to i64*
   store i64 %val, i64* %2, align 8, !tbaa !49
   ret void
 }
 
-define i64 @rsGetElementAtImpl_ulong([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #10
+define i64 @rsGetElementAtImpl_ulong(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to i64*
   %3 = load i64* %2, align 8, !tbaa !49
   ret i64 %3
 }
 
 !50 = metadata !{metadata !"ulong2", metadata !15}
-define void @rsSetElementAtImpl_ulong2([1 x i64] %a.coerce, <2 x i64> %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_ulong2(%struct.rs_allocation* %a, <2 x i64> %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i64>*
   store <2 x i64> %val, <2 x i64>* %2, align 16, !tbaa !50
   ret void
 }
 
-define <2 x i64> @rsGetElementAtImpl_ulong2([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #10
+define <2 x i64> @rsGetElementAtImpl_ulong2(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i64>*
   %3 = load <2 x i64>* %2, align 16, !tbaa !50
   ret <2 x i64> %3
 }
 
 !51 = metadata !{metadata !"ulong3", metadata !15}
-define void @rsSetElementAtImpl_ulong3([1 x i64] %a.coerce, <3 x i64> %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_ulong3(%struct.rs_allocation* %a, <3 x i64> %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #10
   %2 = shufflevector <3 x i64> %val, <3 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
   %3 = bitcast i8* %1 to <4 x i64>*
   store <4 x i64> %2, <4 x i64>* %3, align 32, !tbaa !51
   ret void
 }
 
-define void @rsGetElementAtImpl_ulong3(<3 x i64>* noalias nocapture sret %agg.result, [1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #10
+define void @rsGetElementAtImpl_ulong3(<3 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i64>*
   %3 = load <4 x i64>* %2, align 32
   %4 = bitcast <3 x i64>* %agg.result to <4 x i64>*
@@ -508,15 +510,15 @@
 }
 
 !52 = metadata !{metadata !"ulong4", metadata !15}
-define void @rsSetElementAtImpl_ulong4([1 x i64] %a.coerce, <4 x i64> %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_ulong4(%struct.rs_allocation* %a, <4 x i64> %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i64>*
   store <4 x i64> %val, <4 x i64>* %2, align 32, !tbaa !52
   ret void
 }
 
-define void @rsGetElementAtImpl_ulong4(<4 x i64>* noalias nocapture sret %agg.result, [1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #10
+define void @rsGetElementAtImpl_ulong4(<4 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i64>*
   %3 = load <4 x i64>* %2, align 32, !tbaa !15
   store <4 x i64> %3, <4 x i64>* %agg.result, align 32, !tbaa !52
@@ -524,46 +526,46 @@
 }
 
 !53 = metadata !{metadata !"float", metadata !15}
-define void @rsSetElementAtImpl_float([1 x i64] %a.coerce, float %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_float(%struct.rs_allocation* %a, float %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to float*
   store float %val, float* %2, align 4, !tbaa !53
   ret void
 }
 
-define float @rsGetElementAtImpl_float([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #10
+define float @rsGetElementAtImpl_float(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to float*
   %3 = load float* %2, align 4, !tbaa !53
   ret float %3
 }
 
 !54 = metadata !{metadata !"float2", metadata !15}
-define void @rsSetElementAtImpl_float2([1 x i64] %a.coerce, <2 x float> %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_float2(%struct.rs_allocation* %a, <2 x float> %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x float>*
   store <2 x float> %val, <2 x float>* %2, align 8, !tbaa !54
   ret void
 }
 
-define <2 x float> @rsGetElementAtImpl_float2([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #10
+define <2 x float> @rsGetElementAtImpl_float2(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x float>*
   %3 = load <2 x float>* %2, align 8, !tbaa !54
   ret <2 x float> %3
 }
 
 !55 = metadata !{metadata !"float3", metadata !15}
-define void @rsSetElementAtImpl_float3([1 x i64] %a.coerce, <3 x float> %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_float3(%struct.rs_allocation* %a, <3 x float> %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #10
   %2 = shufflevector <3 x float> %val, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
   %3 = bitcast i8* %1 to <4 x float>*
   store <4 x float> %2, <4 x float>* %3, align 16, !tbaa !55
   ret void
 }
 
-define <3 x float> @rsGetElementAtImpl_float3([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #10
+define <3 x float> @rsGetElementAtImpl_float3(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x float>*
   %3 = load <4 x float>* %2, align 8, !tbaa !55
   %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
@@ -571,53 +573,53 @@
 }
 
 !56 = metadata !{metadata !"float4", metadata !15}
-define void @rsSetElementAtImpl_float4([1 x i64] %a.coerce, <4 x float> %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_float4(%struct.rs_allocation* %a, <4 x float> %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x float>*
   store <4 x float> %val, <4 x float>* %2, align 16, !tbaa !56
   ret void
 }
 
-define <4 x float> @rsGetElementAtImpl_float4([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #10
+define <4 x float> @rsGetElementAtImpl_float4(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x float>*
   %3 = load <4 x float>* %2, align 16, !tbaa !56
   ret <4 x float> %3
 }
 
 !57 = metadata !{metadata !"double", metadata !15}
-define void @rsSetElementAtImpl_double([1 x i64] %a.coerce, double %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_double(%struct.rs_allocation* %a, double %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to double*
   store double %val, double* %2, align 8, !tbaa !57
   ret void
 }
 
-define double @rsGetElementAtImpl_double([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #10
+define double @rsGetElementAtImpl_double(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to double*
   %3 = load double* %2, align 8, !tbaa !57
   ret double %3
 }
 
 !58 = metadata !{metadata !"double2", metadata !15}
-define void @rsSetElementAtImpl_double2([1 x i64] %a.coerce, <2 x double> %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_double2(%struct.rs_allocation* %a, <2 x double> %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x double>*
   store <2 x double> %val, <2 x double>* %2, align 16, !tbaa !58
   ret void
 }
 
-define <2 x double> @rsGetElementAtImpl_double2([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #3 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #10
+define <2 x double> @rsGetElementAtImpl_double2(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #3 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x double>*
   %3 = load <2 x double>* %2, align 16, !tbaa !58
   ret <2 x double> %3
 }
 
 !59 = metadata !{metadata !"double3", metadata !15}
-define void @rsSetElementAtImpl_double3([1 x i64] %a.coerce, <3 x double> %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_double3(%struct.rs_allocation* %a, <3 x double> %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #10
   %2 = shufflevector <3 x double> %val, <3 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
   %3 = bitcast i8* %1 to <4 x double>*
   store <4 x double> %2, <4 x double>* %3, align 32, !tbaa !59
@@ -625,8 +627,8 @@
 }
 
 
-define void @rsGetElementAtImpl_double3(<3 x double>* noalias nocapture sret %agg.result, [1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #10
+define void @rsGetElementAtImpl_double3(<3 x double>* noalias nocapture sret %agg.result, %struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x double>*
   %3 = load <4 x double>* %2, align 32
   %4 = bitcast <3 x double>* %agg.result to <4 x double>*
@@ -635,14 +637,14 @@
 }
 
 !60 = metadata !{metadata !"double4", metadata !15}
-define void @rsSetElementAtImpl_double4([1 x i64] %a.coerce, <4 x double> %val, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #10
+define void @rsSetElementAtImpl_double4(%struct.rs_allocation* %a, <4 x double> %val, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x double>*
   store <4 x double> %val, <4 x double>* %2, align 32, !tbaa !60
   ret void
 }
-define void @rsGetElementAtImpl_double4(<4 x double>* noalias nocapture sret %agg.result, [1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #2 {
-  %1 = tail call i8* @rsOffset([1 x i64] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #10
+define void @rsGetElementAtImpl_double4(<4 x double>* noalias nocapture sret %agg.result, %struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2 {
+  %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x double>*
   %3 = load <4 x double>* %2, align 32, !tbaa !15
   store <4 x double> %3, <4 x double>* %agg.result, align 32, !tbaa !60
@@ -650,382 +652,382 @@
 }
 
 
-define <4 x i64> @__rsAllocationVLoadXImpl_long4([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define <4 x i64> @__rsAllocationVLoadXImpl_long4(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i64>*
   %3 = load <4 x i64>* %2, align 8
   ret <4 x i64> %3
 }
-define <3 x i64> @__rsAllocationVLoadXImpl_long3([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define <3 x i64> @__rsAllocationVLoadXImpl_long3(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <3 x i64>*
   %3 = load <3 x i64>* %2, align 8
   ret <3 x i64> %3
 }
-define <2 x i64> @__rsAllocationVLoadXImpl_long2([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define <2 x i64> @__rsAllocationVLoadXImpl_long2(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i64>*
   %3 = load <2 x i64>* %2, align 8
   ret <2 x i64> %3
 }
 
-define <4 x i64> @__rsAllocationVLoadXImpl_ulong4([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define <4 x i64> @__rsAllocationVLoadXImpl_ulong4(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i64>*
   %3 = load <4 x i64>* %2, align 8
   ret <4 x i64> %3
 }
-define <3 x i64> @__rsAllocationVLoadXImpl_ulong3([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define <3 x i64> @__rsAllocationVLoadXImpl_ulong3(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <3 x i64>*
   %3 = load <3 x i64>* %2, align 8
   ret <3 x i64> %3
 }
-define <2 x i64> @__rsAllocationVLoadXImpl_ulong2([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define <2 x i64> @__rsAllocationVLoadXImpl_ulong2(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i64>*
   %3 = load <2 x i64>* %2, align 8
   ret <2 x i64> %3
 }
 
-define <4 x i32> @__rsAllocationVLoadXImpl_int4([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define <4 x i32> @__rsAllocationVLoadXImpl_int4(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i32>*
   %3 = load <4 x i32>* %2, align 4
   ret <4 x i32> %3
 }
-define <3 x i32> @__rsAllocationVLoadXImpl_int3([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define <3 x i32> @__rsAllocationVLoadXImpl_int3(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <3 x i32>*
   %3 = load <3 x i32>* %2, align 4
   ret <3 x i32> %3
 }
-define <2 x i32> @__rsAllocationVLoadXImpl_int2([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define <2 x i32> @__rsAllocationVLoadXImpl_int2(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i32>*
   %3 = load <2 x i32>* %2, align 4
   ret <2 x i32> %3
 }
 
-define <4 x i32> @__rsAllocationVLoadXImpl_uint4([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define <4 x i32> @__rsAllocationVLoadXImpl_uint4(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i32>*
   %3 = load <4 x i32>* %2, align 4
   ret <4 x i32> %3
 }
-define <3 x i32> @__rsAllocationVLoadXImpl_uint3([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define <3 x i32> @__rsAllocationVLoadXImpl_uint3(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <3 x i32>*
   %3 = load <3 x i32>* %2, align 4
   ret <3 x i32> %3
 }
-define <2 x i32> @__rsAllocationVLoadXImpl_uint2([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define <2 x i32> @__rsAllocationVLoadXImpl_uint2(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i32>*
   %3 = load <2 x i32>* %2, align 4
   ret <2 x i32> %3
 }
 
-define <4 x i16> @__rsAllocationVLoadXImpl_short4([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define <4 x i16> @__rsAllocationVLoadXImpl_short4(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i16>*
   %3 = load <4 x i16>* %2, align 2
   ret <4 x i16> %3
 }
-define <3 x i16> @__rsAllocationVLoadXImpl_short3([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define <3 x i16> @__rsAllocationVLoadXImpl_short3(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <3 x i16>*
   %3 = load <3 x i16>* %2, align 2
   ret <3 x i16> %3
 }
-define <2 x i16> @__rsAllocationVLoadXImpl_short2([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define <2 x i16> @__rsAllocationVLoadXImpl_short2(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i16>*
   %3 = load <2 x i16>* %2, align 2
   ret <2 x i16> %3
 }
 
-define <4 x i16> @__rsAllocationVLoadXImpl_ushort4([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define <4 x i16> @__rsAllocationVLoadXImpl_ushort4(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i16>*
   %3 = load <4 x i16>* %2, align 2
   ret <4 x i16> %3
 }
-define <3 x i16> @__rsAllocationVLoadXImpl_ushort3([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define <3 x i16> @__rsAllocationVLoadXImpl_ushort3(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <3 x i16>*
   %3 = load <3 x i16>* %2, align 2
   ret <3 x i16> %3
 }
-define <2 x i16> @__rsAllocationVLoadXImpl_ushort2([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define <2 x i16> @__rsAllocationVLoadXImpl_ushort2(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i16>*
   %3 = load <2 x i16>* %2, align 2
   ret <2 x i16> %3
 }
 
-define <4 x i8> @__rsAllocationVLoadXImpl_char4([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define <4 x i8> @__rsAllocationVLoadXImpl_char4(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i8>*
   %3 = load <4 x i8>* %2, align 1
   ret <4 x i8> %3
 }
-define <3 x i8> @__rsAllocationVLoadXImpl_char3([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define <3 x i8> @__rsAllocationVLoadXImpl_char3(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <3 x i8>*
   %3 = load <3 x i8>* %2, align 1
   ret <3 x i8> %3
 }
-define <2 x i8> @__rsAllocationVLoadXImpl_char2([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define <2 x i8> @__rsAllocationVLoadXImpl_char2(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i8>*
   %3 = load <2 x i8>* %2, align 1
   ret <2 x i8> %3
 }
 
-define <4 x i8> @__rsAllocationVLoadXImpl_uchar4([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define <4 x i8> @__rsAllocationVLoadXImpl_uchar4(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i8>*
   %3 = load <4 x i8>* %2, align 1
   ret <4 x i8> %3
 }
-define <3 x i8> @__rsAllocationVLoadXImpl_uchar3([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define <3 x i8> @__rsAllocationVLoadXImpl_uchar3(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <3 x i8>*
   %3 = load <3 x i8>* %2, align 1
   ret <3 x i8> %3
 }
-define <2 x i8> @__rsAllocationVLoadXImpl_uchar2([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define <2 x i8> @__rsAllocationVLoadXImpl_uchar2(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i8>*
   %3 = load <2 x i8>* %2, align 1
   ret <2 x i8> %3
 }
 
-define <4 x float> @__rsAllocationVLoadXImpl_float4([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define <4 x float> @__rsAllocationVLoadXImpl_float4(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x float>*
   %3 = load <4 x float>* %2, align 4
   ret <4 x float> %3
 }
-define <3 x float> @__rsAllocationVLoadXImpl_float3([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define <3 x float> @__rsAllocationVLoadXImpl_float3(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <3 x float>*
   %3 = load <3 x float>* %2, align 4
   ret <3 x float> %3
 }
-define <2 x float> @__rsAllocationVLoadXImpl_float2([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define <2 x float> @__rsAllocationVLoadXImpl_float2(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x float>*
   %3 = load <2 x float>* %2, align 4
   ret <2 x float> %3
 }
 
-define <4 x double> @__rsAllocationVLoadXImpl_double4([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define <4 x double> @__rsAllocationVLoadXImpl_double4(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x double>*
   %3 = load <4 x double>* %2, align 8
   ret <4 x double> %3
 }
-define <3 x double> @__rsAllocationVLoadXImpl_double3([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define <3 x double> @__rsAllocationVLoadXImpl_double3(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <3 x double>*
   %3 = load <3 x double>* %2, align 8
   ret <3 x double> %3
 }
-define <2 x double> @__rsAllocationVLoadXImpl_double2([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define <2 x double> @__rsAllocationVLoadXImpl_double2(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x double>*
   %3 = load <2 x double>* %2, align 8
   ret <2 x double> %3
 }
 
 
-define void @__rsAllocationVStoreXImpl_long4([1 x i64] %a.coerce, <4 x i64> %val, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define void @__rsAllocationVStoreXImpl_long4(%struct.rs_allocation* %a, <4 x i64> %val, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i64>*
   store <4 x i64> %val, <4 x i64>* %2, align 8
   ret void
 }
-define void @__rsAllocationVStoreXImpl_long3([1 x i64] %a.coerce, <3 x i64> %val, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define void @__rsAllocationVStoreXImpl_long3(%struct.rs_allocation* %a, <3 x i64> %val, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <3 x i64>*
   store <3 x i64> %val, <3 x i64>* %2, align 8
   ret void
 }
-define void @__rsAllocationVStoreXImpl_long2([1 x i64] %a.coerce, <2 x i64> %val, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define void @__rsAllocationVStoreXImpl_long2(%struct.rs_allocation* %a, <2 x i64> %val, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i64>*
   store <2 x i64> %val, <2 x i64>* %2, align 8
   ret void
 }
 
-define void @__rsAllocationVStoreXImpl_ulong4([1 x i64] %a.coerce, <4 x i64> %val, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define void @__rsAllocationVStoreXImpl_ulong4(%struct.rs_allocation* %a, <4 x i64> %val, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i64>*
   store <4 x i64> %val, <4 x i64>* %2, align 8
   ret void
 }
-define void @__rsAllocationVStoreXImpl_ulong3([1 x i64] %a.coerce, <3 x i64> %val, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define void @__rsAllocationVStoreXImpl_ulong3(%struct.rs_allocation* %a, <3 x i64> %val, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <3 x i64>*
   store <3 x i64> %val, <3 x i64>* %2, align 8
   ret void
 }
-define void @__rsAllocationVStoreXImpl_ulong2([1 x i64] %a.coerce, <2 x i64> %val, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define void @__rsAllocationVStoreXImpl_ulong2(%struct.rs_allocation* %a, <2 x i64> %val, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i64>*
   store <2 x i64> %val, <2 x i64>* %2, align 8
   ret void
 }
 
-define void @__rsAllocationVStoreXImpl_int4([1 x i64] %a.coerce, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define void @__rsAllocationVStoreXImpl_int4(%struct.rs_allocation* %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i32>*
   store <4 x i32> %val, <4 x i32>* %2, align 8
   ret void
 }
-define void @__rsAllocationVStoreXImpl_int3([1 x i64] %a.coerce, <3 x i32> %val, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define void @__rsAllocationVStoreXImpl_int3(%struct.rs_allocation* %a, <3 x i32> %val, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <3 x i32>*
   store <3 x i32> %val, <3 x i32>* %2, align 8
   ret void
 }
-define void @__rsAllocationVStoreXImpl_int2([1 x i64] %a.coerce, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define void @__rsAllocationVStoreXImpl_int2(%struct.rs_allocation* %a, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i32>*
   store <2 x i32> %val, <2 x i32>* %2, align 8
   ret void
 }
 
-define void @__rsAllocationVStoreXImpl_uint4([1 x i64] %a.coerce, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define void @__rsAllocationVStoreXImpl_uint4(%struct.rs_allocation* %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i32>*
   store <4 x i32> %val, <4 x i32>* %2, align 8
   ret void
 }
-define void @__rsAllocationVStoreXImpl_uint3([1 x i64] %a.coerce, <3 x i32> %val, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define void @__rsAllocationVStoreXImpl_uint3(%struct.rs_allocation* %a, <3 x i32> %val, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <3 x i32>*
   store <3 x i32> %val, <3 x i32>* %2, align 8
   ret void
 }
-define void @__rsAllocationVStoreXImpl_uint2([1 x i64] %a.coerce, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define void @__rsAllocationVStoreXImpl_uint2(%struct.rs_allocation* %a, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i32>*
   store <2 x i32> %val, <2 x i32>* %2, align 8
   ret void
 }
 
-define void @__rsAllocationVStoreXImpl_short4([1 x i64] %a.coerce, <4 x i16> %val, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define void @__rsAllocationVStoreXImpl_short4(%struct.rs_allocation* %a, <4 x i16> %val, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i16>*
   store <4 x i16> %val, <4 x i16>* %2, align 8
   ret void
 }
-define void @__rsAllocationVStoreXImpl_short3([1 x i64] %a.coerce, <3 x i16> %val, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define void @__rsAllocationVStoreXImpl_short3(%struct.rs_allocation* %a, <3 x i16> %val, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <3 x i16>*
   store <3 x i16> %val, <3 x i16>* %2, align 8
   ret void
 }
-define void @__rsAllocationVStoreXImpl_short2([1 x i64] %a.coerce, <2 x i16> %val, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define void @__rsAllocationVStoreXImpl_short2(%struct.rs_allocation* %a, <2 x i16> %val, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i16>*
   store <2 x i16> %val, <2 x i16>* %2, align 8
   ret void
 }
 
-define void @__rsAllocationVStoreXImpl_ushort4([1 x i64] %a.coerce, <4 x i16> %val, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define void @__rsAllocationVStoreXImpl_ushort4(%struct.rs_allocation* %a, <4 x i16> %val, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i16>*
   store <4 x i16> %val, <4 x i16>* %2, align 8
   ret void
 }
-define void @__rsAllocationVStoreXImpl_ushort3([1 x i64] %a.coerce, <3 x i16> %val, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define void @__rsAllocationVStoreXImpl_ushort3(%struct.rs_allocation* %a, <3 x i16> %val, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <3 x i16>*
   store <3 x i16> %val, <3 x i16>* %2, align 8
   ret void
 }
-define void @__rsAllocationVStoreXImpl_ushort2([1 x i64] %a.coerce, <2 x i16> %val, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define void @__rsAllocationVStoreXImpl_ushort2(%struct.rs_allocation* %a, <2 x i16> %val, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i16>*
   store <2 x i16> %val, <2 x i16>* %2, align 8
   ret void
 }
 
-define void @__rsAllocationVStoreXImpl_char4([1 x i64] %a.coerce, <4 x i8> %val, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define void @__rsAllocationVStoreXImpl_char4(%struct.rs_allocation* %a, <4 x i8> %val, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i8>*
   store <4 x i8> %val, <4 x i8>* %2, align 8
   ret void
 }
-define void @__rsAllocationVStoreXImpl_char3([1 x i64] %a.coerce, <3 x i8> %val, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define void @__rsAllocationVStoreXImpl_char3(%struct.rs_allocation* %a, <3 x i8> %val, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <3 x i8>*
   store <3 x i8> %val, <3 x i8>* %2, align 8
   ret void
 }
-define void @__rsAllocationVStoreXImpl_char2([1 x i64] %a.coerce, <2 x i8> %val, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define void @__rsAllocationVStoreXImpl_char2(%struct.rs_allocation* %a, <2 x i8> %val, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i8>*
   store <2 x i8> %val, <2 x i8>* %2, align 8
   ret void
 }
 
-define void @__rsAllocationVStoreXImpl_uchar4([1 x i64] %a.coerce, <4 x i8> %val, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define void @__rsAllocationVStoreXImpl_uchar4(%struct.rs_allocation* %a, <4 x i8> %val, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x i8>*
   store <4 x i8> %val, <4 x i8>* %2, align 8
   ret void
 }
-define void @__rsAllocationVStoreXImpl_uchar3([1 x i64] %a.coerce, <3 x i8> %val, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define void @__rsAllocationVStoreXImpl_uchar3(%struct.rs_allocation* %a, <3 x i8> %val, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <3 x i8>*
   store <3 x i8> %val, <3 x i8>* %2, align 8
   ret void
 }
-define void @__rsAllocationVStoreXImpl_uchar2([1 x i64] %a.coerce, <2 x i8> %val, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define void @__rsAllocationVStoreXImpl_uchar2(%struct.rs_allocation* %a, <2 x i8> %val, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x i8>*
   store <2 x i8> %val, <2 x i8>* %2, align 8
   ret void
 }
 
-define void @__rsAllocationVStoreXImpl_float4([1 x i64] %a.coerce, <4 x float> %val, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define void @__rsAllocationVStoreXImpl_float4(%struct.rs_allocation* %a, <4 x float> %val, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x float>*
   store <4 x float> %val, <4 x float>* %2, align 8
   ret void
 }
-define void @__rsAllocationVStoreXImpl_float3([1 x i64] %a.coerce, <3 x float> %val, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define void @__rsAllocationVStoreXImpl_float3(%struct.rs_allocation* %a, <3 x float> %val, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <3 x float>*
   store <3 x float> %val, <3 x float>* %2, align 8
   ret void
 }
-define void @__rsAllocationVStoreXImpl_float2([1 x i64] %a.coerce, <2 x float> %val, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define void @__rsAllocationVStoreXImpl_float2(%struct.rs_allocation* %a, <2 x float> %val, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x float>*
   store <2 x float> %val, <2 x float>* %2, align 8
   ret void
 }
 
-define void @__rsAllocationVStoreXImpl_double4([1 x i64] %a.coerce, <4 x double> %val, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define void @__rsAllocationVStoreXImpl_double4(%struct.rs_allocation* %a, <4 x double> %val, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <4 x double>*
   store <4 x double> %val, <4 x double>* %2, align 8
   ret void
 }
-define void @__rsAllocationVStoreXImpl_double3([1 x i64] %a.coerce, <3 x double> %val, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define void @__rsAllocationVStoreXImpl_double3(%struct.rs_allocation* %a, <3 x double> %val, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <3 x double>*
   store <3 x double> %val, <3 x double>* %2, align 8
   ret void
 }
-define void @__rsAllocationVStoreXImpl_double2([1 x i64] %a.coerce, <2 x double> %val, i32 %x, i32 %y, i32 %z) #0 {
-  %1 = tail call i8* @rsOffsetNs([1 x i64] %a.coerce, i32 %x, i32 %y, i32 %z) #10
+define void @__rsAllocationVStoreXImpl_double2(%struct.rs_allocation* %a, <2 x double> %val, i32 %x, i32 %y, i32 %z) #0 {
+  %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #10
   %2 = bitcast i8* %1 to <2 x double>*
   store <2 x double> %val, <2 x double>* %2, align 8
   ret void
diff --git a/scriptc/rs_types.rsh b/scriptc/rs_types.rsh
index de7e6aa..33cd7da 100644
--- a/scriptc/rs_types.rsh
+++ b/scriptc/rs_types.rsh
@@ -116,11 +116,11 @@
 typedef int64_t ssize_t;
 #endif
 
-//#ifndef __LP64__
+#ifndef __LP64__
 #define RS_BASE_OBJ typedef struct { const int* const p; } __attribute__((packed, aligned(4)))
-//#else
-//#define RS_BASE_OBJ typedef struct { const int* const p; const int* const r; const int* const v1; const int* const v2; }
-//#endif
+#else
+#define RS_BASE_OBJ typedef struct { const long* const p; const long* const r; const long* const v1; const long* const v2; }
+#endif
 
 /**
  * \brief Opaque handle to a RenderScript element.