diff --git a/Android.mk b/Android.mk
index e0556e2..b3f3bc4 100644
--- a/Android.mk
+++ b/Android.mk
@@ -14,7 +14,18 @@
 # limitations under the License.
 #
 LOCAL_PATH := $(call my-dir)
-SLANG_ENABLE_ASSERTIONS := false
+
+FORCE_BUILD_LLVM_DISABLE_NDEBUG ?= false
+# Legality check: FORCE_BUILD_LLVM_DISABLE_NDEBUG should consist of one word -- either "true" or "false".
+ifneq "$(words $(FORCE_BUILD_LLVM_DISABLE_NDEBUG))$(words $(filter-out true false,$(FORCE_BUILD_LLVM_DISABLE_NDEBUG)))" "10"
+  $(error FORCE_BUILD_LLVM_DISABLE_NDEBUG may only be true, false, or unset)
+endif
+
+FORCE_BUILD_LLVM_DEBUG ?= false
+# Legality check: FORCE_BUILD_LLVM_DEBUG should consist of one word -- either "true" or "false".
+ifneq "$(words $(FORCE_BUILD_LLVM_DEBUG))$(words $(filter-out true false,$(FORCE_BUILD_LLVM_DEBUG)))" "10"
+  $(error FORCE_BUILD_LLVM_DEBUG may only be true, false, or unset)
+endif
 
 # The prebuilt tools should be used when we are doing app-only build.
 ifeq ($(TARGET_BUILD_APPS),)
@@ -22,7 +33,7 @@
 
 local_cflags_for_slang := -Wall -Werror -std=c++11
 ifeq ($(TARGET_BUILD_VARIANT),eng)
-local_cflags_for_slang += -O0
+local_cflags_for_slang += -O0 -D__ENABLE_INTERNAL_OPTIONS
 else
 ifeq ($(TARGET_BUILD_VARIANT),userdebug)
 else
@@ -34,10 +45,6 @@
 include $(LOCAL_PATH)/rs_version.mk
 local_cflags_for_slang += $(RS_VERSION_DEFINE)
 
-ifeq ($(SLANG_ENABLE_ASSERTIONS),true)
-local_cflags_for_slang += -D_DEBUG -UNDEBUG
-endif
-
 static_libraries_needed_by_slang := \
 	libLLVMBitWriter_2_9 \
 	libLLVMBitWriter_2_9_func \
@@ -74,6 +81,7 @@
 
 LOCAL_SRC_FILES :=	\
 	slang.cpp	\
+	slang_bitcode_gen.cpp	\
 	slang_backend.cpp	\
 	slang_pragma_recorder.cpp	\
 	slang_diagnostic_buffer.cpp
@@ -152,11 +160,13 @@
 	slang_rs_export_element.cpp	\
 	slang_rs_export_var.cpp	\
 	slang_rs_export_func.cpp	\
-	slang_rs_export_foreach.cpp \
+	slang_rs_export_foreach.cpp	\
+	slang_rs_export_reduce.cpp	\
 	slang_rs_object_ref_count.cpp	\
 	slang_rs_reflection.cpp \
 	slang_rs_reflection_cpp.cpp \
 	slang_rs_reflect_utils.cpp \
+	slang_rs_special_func.cpp	\
 	strip_unknown_attributes.cpp
 
 LOCAL_C_INCLUDES += frameworks/compile/libbcc/include
diff --git a/RSCCOptions.td b/RSCCOptions.td
index 96cdaa8..2339cd7 100644
--- a/RSCCOptions.td
+++ b/RSCCOptions.td
@@ -60,8 +60,8 @@
   HelpText<"Build ASTs then convert to LLVM, but emit nothing">;
 }
 
-def m32 : Flag<["-"], "m32">, HelpText<"Emit 32-bit C++ code">;
-def m64 : Flag<["-"], "m64">, HelpText<"Emit 64-bit C++ code">;
+def m32 : Flag<["-"], "m32">, HelpText<"Emit 32-bit code (only for C++, unless eng build)">;
+def m64 : Flag<["-"], "m64">, HelpText<"Emit 64-bit code (only for C++, unless eng build)">;
 
 def emit_g : Flag<["-"], "g">,
   HelpText<"Emit LLVM Debug Metadata">;
@@ -102,6 +102,7 @@
 let Group = M_Group in {
 
   def MD : Flag<["-"], "MD">, HelpText<"Emit .d dependency files">;
+  def MP : Flag<["-"], "MP">, HelpText<"Also emit phony target for dependency files">;
 
   def M : Flag<["-"], "M">;
   def emit_dep : Flag<["-"], "emit-dep">, Alias<M>;
@@ -123,6 +124,21 @@
   HelpText<"Reflect C++ classes">;
 
 //===----------------------------------------------------------------------===//
+// Diagnostic Options
+//===----------------------------------------------------------------------===//
+
+def ast_print : Flag<["-"], "ast-print">,
+  HelpText<"Print clang AST prior to llvm IR generation">;
+
+def debug : Flag<["-"], "debug">,
+  HelpText<"Enable debug output">;
+
+def print_after_all : Flag<["-"], "print-after-all">,
+  HelpText<"Print llvm IR after each pass">;
+def print_before_all : Flag<["-"], "print-before-all">,
+  HelpText<"Print llvm IR before each pass">;
+
+//===----------------------------------------------------------------------===//
 // Misc Options
 //===----------------------------------------------------------------------===//
 
diff --git a/lit-tests/bitcode_wrapper/bitcode_wrapper_test.ll b/lit-tests/bitcode_wrapper/bitcode_wrapper_test.ll
new file mode 100644
index 0000000..77e32c0
--- /dev/null
+++ b/lit-tests/bitcode_wrapper/bitcode_wrapper_test.ll
@@ -0,0 +1,51 @@
+; This test assembles this file to bitcode with all supported target
+; API versions, then checks that the bitcode file was generated and
+; has the right magic number.
+
+; RUN: %llvm-rs-as -target-api 11 %s -o %t11
+; RUN: xxd -ps -l 4 %t11 | FileCheck %s
+; RUN: %llvm-rs-as -target-api 12 %s -o %t12
+; RUN: xxd -ps -l 4 %t12 | FileCheck %s
+; RUN: %llvm-rs-as -target-api 13 %s -o %t13
+; RUN: xxd -ps -l 4 %t13 | FileCheck %s
+; RUN: %llvm-rs-as -target-api 14 %s -o %t14
+; RUN: xxd -ps -l 4 %t14 | FileCheck %s
+; RUN: %llvm-rs-as -target-api 15 %s -o %t15
+; RUN: xxd -ps -l 4 %t15 | FileCheck %s
+; RUN: %llvm-rs-as -target-api 16 %s -o %t16
+; RUN: xxd -ps -l 4 %t16 | FileCheck %s
+; RUN: %llvm-rs-as -target-api 17 %s -o %t17
+; RUN: xxd -ps -l 4 %t17 | FileCheck %s
+; RUN: %llvm-rs-as -target-api 18 %s -o %t18
+; RUN: xxd -ps -l 4 %t18 | FileCheck %s
+; RUN: %llvm-rs-as -target-api 19 %s -o %t19
+; RUN: xxd -ps -l 4 %t19 | FileCheck %s
+; RUN: %llvm-rs-as -target-api 20 %s -o %t20
+; RUN: xxd -ps -l 4 %t20 | FileCheck %s
+; RUN: %llvm-rs-as -target-api 21 %s -o %t21
+; RUN: xxd -ps -l 4 %t21 | FileCheck %s
+; RUN: %llvm-rs-as -target-api 22 %s -o %t22
+; RUN: xxd -ps -l 4 %t22 | FileCheck %s
+; RUN: %llvm-rs-as -target-api 23 %s -o %t23
+; RUN: xxd -ps -l 4 %t23 | FileCheck %s
+
+; RUN: %llvm-rs-as -target-api 0 %s -o %t0
+; RUN: xxd -ps -l 4 %t0 | FileCheck %s
+
+; Check for the magic number.
+
+; CHECK: dec0170b
+
+; ModuleID = 'kernel.bc'
+target datalayout = "e-p:32:32-i64:64-v128:64:128-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+!\23pragma = !{!3, !4}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, !"min_enum_size", i32 4}
+!2 = !{!"clang version 3.6 "}
+!3 = !{!"version", !"1"}
+!4 = !{!"java_package_name", !"foo"}
diff --git a/lit-tests/debug/debug_disabled.rs b/lit-tests/debug/debug_disabled.rs
index 4485697..b0386ba 100644
--- a/lit-tests/debug/debug_disabled.rs
+++ b/lit-tests/debug/debug_disabled.rs
@@ -1,6 +1,6 @@
 // RUN: %Slang %s
 // RUN: %rs-filecheck-wrapper %s
-// CHECK-NOT: DW_TAG_subprogram
+// CHECK-NOT: MDSubprogram
 
 #pragma version(1)
 #pragma rs java_package_name(foo)
diff --git a/lit-tests/debug/debug_enabled.rs b/lit-tests/debug/debug_enabled.rs
index 4632744..8c4fdc6 100644
--- a/lit-tests/debug/debug_enabled.rs
+++ b/lit-tests/debug/debug_enabled.rs
@@ -1,6 +1,6 @@
 // RUN: %Slang -g %s
 // RUN: %rs-filecheck-wrapper %s
-// CHECK: DW_TAG_subprogram
+// CHECK: MDSubprogram
 
 #pragma version(1)
 #pragma rs java_package_name(foo)
diff --git a/lit-tests/lit.cfg b/lit-tests/lit.cfg
index de16162..88ad6e6 100644
--- a/lit-tests/lit.cfg
+++ b/lit-tests/lit.cfg
@@ -6,9 +6,10 @@
 config.name = 'slang_lit_tests'
 
 # suffixes: A list of file extensions to treat as test files.
-config.suffixes = ['.rs']
+config.suffixes = ['.rs', '.ll']
 
 # testFormat: The test format to use to interpret tests.
+import lit.formats
 config.test_format = lit.formats.ShTest()
 
 # Get the base build directory for the android source tree from environment.
@@ -34,15 +35,17 @@
         return tool
 
     # Otherwise look in the path.
+    import lit.util
     tool = lit.util.which(binary_name, PATH)
 
     if not tool:
-        lit.fatal("couldn't find " + binary_name + " program in " + PATH + " , try setting "
-                  + env_var + " in your environment")
+        lit_config.fatal("couldn't find " + binary_name + " program in " + PATH + " , try setting "
+                         + env_var + " in your environment")
 
     return os.path.abspath(tool)
 
 config.slang = inferTool('llvm-rs-cc', 'SLANG', os.path.join(config.base_path, 'out', 'host', 'linux-x86', 'bin')).replace('\\', '/')
+config.llvm_rs_as = inferTool('llvm-rs-as', 'LLVM_RS_AS', os.path.join(config.base_path, 'out', 'host', 'linux-x86', 'bin')).replace('\\', '/')
 
 config.filecheck = inferTool('FileCheck', 'FILECHECK', config.environment['PATH'])
 config.rs_filecheck_wrapper = inferTool('rs-filecheck-wrapper.sh', 'RS_FILECHECK_WRAPPER', os.path.join(config.base_path, 'frameworks', 'compile', 'slang', 'lit-tests'))
@@ -55,12 +58,14 @@
                      + " -output-dep-dir " + config.test_exec_root \
                      + " -java-reflection-path-base " + config.test_exec_root
 
-if not lit.quiet:
-    lit.note('using slang: %r' % config.slang)
-    lit.note('using FileCheck: %r' % config.filecheck)
-    lit.note('using rs-filecheck-wrapper.sh: %r' % config.rs_filecheck_wrapper)
-    lit.note('using output directory: %r' % config.test_exec_root)
+if not lit_config.quiet:
+    lit_config.note('using slang: %r' % config.slang)
+    lit_config.note('using llvm-rs-as: %r' % config.llvm_rs_as)
+    lit_config.note('using FileCheck: %r' % config.filecheck)
+    lit_config.note('using rs-filecheck-wrapper.sh: %r' % config.rs_filecheck_wrapper)
+    lit_config.note('using output directory: %r' % config.test_exec_root)
 
 # Tools configuration substitutions
 config.substitutions.append( ('%Slang', ' ' + config.slang + ' ' + config.slang_includes + ' ' + config.slang_options ) )
+config.substitutions.append( ('%llvm-rs-as', config.llvm_rs_as) )
 config.substitutions.append( ('%rs-filecheck-wrapper', ' ' + config.rs_filecheck_wrapper + ' ' + config.test_exec_root + ' ' + config.filecheck + ' ') )
diff --git a/lit-tests/reduce_metadata/reduce.rs b/lit-tests/reduce_metadata/reduce.rs
new file mode 100644
index 0000000..82da76d
--- /dev/null
+++ b/lit-tests/reduce_metadata/reduce.rs
@@ -0,0 +1,349 @@
+// Check for generation of reduce metadata.
+
+// RUN: %Slang -target-api 0 %s
+// RUN: %rs-filecheck-wrapper %s
+
+#pragma version(1)
+#pragma rs java_package_name(foo)
+
+// CHECK-NOT: foreach
+// CHECK: !\23rs_export_reduce =
+// CHECK-NOT: foreach
+
+// CHECK: !{!"mul_bool"}
+bool __attribute__((kernel("reduce")))
+mul_bool(bool lhs, bool rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_char"}
+char __attribute__((kernel("reduce")))
+mul_char(char lhs, char rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_char2"}
+char2 __attribute__((kernel("reduce")))
+mul_char2(char2 lhs, char2 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_char3"}
+char3 __attribute__((kernel("reduce")))
+mul_char3(char3 lhs, char3 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_char4"}
+char4 __attribute__((kernel("reduce")))
+mul_char4(char4 lhs, char4 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_double"}
+double __attribute__((kernel("reduce")))
+mul_double(double lhs, double rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_double2"}
+double2 __attribute__((kernel("reduce")))
+mul_double2(double2 lhs, double2 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_double3"}
+double3 __attribute__((kernel("reduce")))
+mul_double3(double3 lhs, double3 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_double4"}
+double4 __attribute__((kernel("reduce")))
+mul_double4(double4 lhs, double4 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_float"}
+float __attribute__((kernel("reduce")))
+mul_float(float lhs, float rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_float2"}
+float2 __attribute__((kernel("reduce")))
+mul_float2(float2 lhs, float2 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_float3"}
+float3 __attribute__((kernel("reduce")))
+mul_float3(float3 lhs, float3 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_float4"}
+float4 __attribute__((kernel("reduce")))
+mul_float4(float4 lhs, float4 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_int"}
+int __attribute__((kernel("reduce")))
+mul_int(int lhs, int rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_int2"}
+int2 __attribute__((kernel("reduce")))
+mul_int2(int2 lhs, int2 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_int3"}
+int3 __attribute__((kernel("reduce")))
+mul_int3(int3 lhs, int3 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_int4"}
+int4 __attribute__((kernel("reduce")))
+mul_int4(int4 lhs, int4 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_long"}
+long __attribute__((kernel("reduce")))
+mul_long(long lhs, long rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_long2"}
+long2 __attribute__((kernel("reduce")))
+mul_long2(long2 lhs, long2 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_long3"}
+long3 __attribute__((kernel("reduce")))
+mul_long3(long3 lhs, long3 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_long4"}
+long4 __attribute__((kernel("reduce")))
+mul_long4(long4 lhs, long4 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_short"}
+short __attribute__((kernel("reduce")))
+mul_short(short lhs, short rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_short2"}
+short2 __attribute__((kernel("reduce")))
+mul_short2(short2 lhs, short2 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_short3"}
+short3 __attribute__((kernel("reduce")))
+mul_short3(short3 lhs, short3 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_short4"}
+short4 __attribute__((kernel("reduce")))
+mul_short4(short4 lhs, short4 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_uchar"}
+uchar __attribute__((kernel("reduce")))
+mul_uchar(uchar lhs, uchar rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_uchar2"}
+uchar2 __attribute__((kernel("reduce")))
+mul_uchar2(uchar2 lhs, uchar2 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_uchar3"}
+uchar3 __attribute__((kernel("reduce")))
+mul_uchar3(uchar3 lhs, uchar3 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_uchar4"}
+uchar4 __attribute__((kernel("reduce")))
+mul_uchar4(uchar4 lhs, uchar4 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_uint"}
+uint __attribute__((kernel("reduce")))
+mul_uint(uint lhs, uint rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_uint2"}
+uint2 __attribute__((kernel("reduce")))
+mul_uint2(uint2 lhs, uint2 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_uint3"}
+uint3 __attribute__((kernel("reduce")))
+mul_uint3(uint3 lhs, uint3 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_uint4"}
+uint4 __attribute__((kernel("reduce")))
+mul_uint4(uint4 lhs, uint4 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_ulong"}
+ulong __attribute__((kernel("reduce")))
+mul_ulong(ulong lhs, ulong rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_ulong2"}
+ulong2 __attribute__((kernel("reduce")))
+mul_ulong2(ulong2 lhs, ulong2 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_ulong3"}
+ulong3 __attribute__((kernel("reduce")))
+mul_ulong3(ulong3 lhs, ulong3 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_ulong4"}
+ulong4 __attribute__((kernel("reduce")))
+mul_ulong4(ulong4 lhs, ulong4 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_ushort"}
+ushort __attribute__((kernel("reduce")))
+mul_ushort(ushort lhs, ushort rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_ushort2"}
+ushort2 __attribute__((kernel("reduce")))
+mul_ushort2(ushort2 lhs, ushort2 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_ushort3"}
+ushort3 __attribute__((kernel("reduce")))
+mul_ushort3(ushort3 lhs, ushort3 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: !{!"mul_ushort4"}
+ushort4 __attribute__((kernel("reduce")))
+mul_ushort4(ushort4 lhs, ushort4 rhs) {
+  return lhs * rhs;
+}
+
+
+struct indirect {
+  bool elem_bool;
+  char elem_char;
+  char2 elem_char2;
+  char3 elem_char3;
+  char4 elem_char4;
+  double elem_double;
+  double2 elem_double2;
+  double3 elem_double3;
+  double4 elem_double4;
+  float elem_float;
+  float2 elem_float2;
+  float3 elem_float3;
+  float4 elem_float4;
+  int elem_int;
+  int2 elem_int2;
+  int3 elem_int3;
+  int4 elem_int4;
+  long elem_long;
+  long2 elem_long2;
+  long3 elem_long3;
+  long4 elem_long4;
+  short elem_short;
+  short2 elem_short2;
+  short3 elem_short3;
+  short4 elem_short4;
+  uchar elem_uchar;
+  uchar2 elem_uchar2;
+  uchar3 elem_uchar3;
+  uchar4 elem_uchar4;
+  uint elem_uint;
+  uint2 elem_uint2;
+  uint3 elem_uint3;
+  uint4 elem_uint4;
+  ulong elem_ulong;
+  ulong2 elem_ulong2;
+  ulong3 elem_ulong3;
+  ulong4 elem_ulong4;
+  ushort elem_ushort;
+  ushort2 elem_ushort2;
+  ushort3 elem_ushort3;
+  ushort4 elem_ushort4;
+};
+
+// CHECK: !{!"mul_indirect"}
+struct indirect __attribute__((kernel("reduce")))
+mul_indirect(struct indirect lhs, struct indirect rhs) {
+  lhs.elem_bool *= rhs.elem_bool;
+  lhs.elem_char *= rhs.elem_char;
+  lhs.elem_char2 *= rhs.elem_char2;
+  lhs.elem_char3 *= rhs.elem_char3;
+  lhs.elem_char4 *= rhs.elem_char4;
+  lhs.elem_double *= rhs.elem_double;
+  lhs.elem_double2 *= rhs.elem_double2;
+  lhs.elem_double3 *= rhs.elem_double3;
+  lhs.elem_double4 *= rhs.elem_double4;
+  lhs.elem_float *= rhs.elem_float;
+  lhs.elem_float2 *= rhs.elem_float2;
+  lhs.elem_float3 *= rhs.elem_float3;
+  lhs.elem_float4 *= rhs.elem_float4;
+  lhs.elem_int *= rhs.elem_int;
+  lhs.elem_int2 *= rhs.elem_int2;
+  lhs.elem_int3 *= rhs.elem_int3;
+  lhs.elem_int4 *= rhs.elem_int4;
+  lhs.elem_long *= rhs.elem_long;
+  lhs.elem_long2 *= rhs.elem_long2;
+  lhs.elem_long3 *= rhs.elem_long3;
+  lhs.elem_long4 *= rhs.elem_long4;
+  lhs.elem_short *= rhs.elem_short;
+  lhs.elem_short2 *= rhs.elem_short2;
+  lhs.elem_short3 *= rhs.elem_short3;
+  lhs.elem_short4 *= rhs.elem_short4;
+  lhs.elem_uchar *= rhs.elem_uchar;
+  lhs.elem_uchar2 *= rhs.elem_uchar2;
+  lhs.elem_uchar3 *= rhs.elem_uchar3;
+  lhs.elem_uchar4 *= rhs.elem_uchar4;
+  lhs.elem_uint *= rhs.elem_uint;
+  lhs.elem_uint2 *= rhs.elem_uint2;
+  lhs.elem_uint3 *= rhs.elem_uint3;
+  lhs.elem_uint4 *= rhs.elem_uint4;
+  lhs.elem_ulong *= rhs.elem_ulong;
+  lhs.elem_ulong2 *= rhs.elem_ulong2;
+  lhs.elem_ulong3 *= rhs.elem_ulong3;
+  lhs.elem_ulong4 *= rhs.elem_ulong4;
+  lhs.elem_ushort *= rhs.elem_ushort;
+  lhs.elem_ushort2 *= rhs.elem_ushort2;
+  lhs.elem_ushort3 *= rhs.elem_ushort3;
+  lhs.elem_ushort4 *= rhs.elem_ushort4;
+  return lhs;
+}
diff --git a/lit-tests/rs-filecheck-wrapper.sh b/lit-tests/rs-filecheck-wrapper.sh
index 816c80a..8f6d718 100755
--- a/lit-tests/rs-filecheck-wrapper.sh
+++ b/lit-tests/rs-filecheck-wrapper.sh
@@ -9,4 +9,6 @@
 
 FILECHECK_INPUTFILE=`basename $SOURCEFILE | sed 's/\.rs\$/.ll/'`
 
-$FILECHECK -input-file $OUTDIR/$FILECHECK_INPUTFILE $SOURCEFILE
+# This runs FileCheck on both the 32 bit and the 64 bit bitcode files.
+$FILECHECK -input-file $OUTDIR/bc32/$FILECHECK_INPUTFILE $SOURCEFILE
+$FILECHECK -input-file $OUTDIR/bc64/$FILECHECK_INPUTFILE $SOURCEFILE
diff --git a/lit-tests/run-lit-tests.sh b/lit-tests/run-lit-tests.sh
new file mode 100755
index 0000000..28a9696
--- /dev/null
+++ b/lit-tests/run-lit-tests.sh
@@ -0,0 +1,6 @@
+#!/bin/bash -e
+
+LIT_PATH=$ANDROID_BUILD_TOP/frameworks/compile/libbcc/tests/debuginfo/llvm-lit
+TESTS=$ANDROID_BUILD_TOP/frameworks/compile/slang/lit-tests
+
+$LIT_PATH $TESTS $@
diff --git a/llvm-rs-as.cpp b/llvm-rs-as.cpp
index 1f81b14..c63a1ac 100644
--- a/llvm-rs-as.cpp
+++ b/llvm-rs-as.cpp
@@ -29,9 +29,8 @@
 #include "llvm/Support/SystemUtils.h"
 #include "llvm/Support/ToolOutputFile.h"
 
-#include "BitWriter_3_2/ReaderWriter_3_2.h"
-#include "BitWriter_2_9/ReaderWriter_2_9.h"
-#include "BitWriter_2_9_func/ReaderWriter_2_9_func.h"
+#include "slang_bitcode_gen.h"
+#include "slang_version.h"
 
 #include <memory>
 using namespace llvm;
@@ -49,6 +48,11 @@
 static cl::opt<bool>
 DisableOutput("disable-output", cl::desc("Disable output"), cl::init(false));
 
+static cl::opt<uint32_t>
+TargetAPI("target-api", cl::desc("Specify RenderScript target API version "
+                                 "(0 = development API) (default is 0)"),
+          cl::init(0));
+
 static cl::opt<bool>
 DumpAsm("d", cl::desc("Print assembly as parsed"), cl::Hidden);
 
@@ -56,20 +60,8 @@
 DisableVerify("disable-verify", cl::Hidden,
               cl::desc("Do not run verifier on input LLVM (dangerous!)"));
 
-enum BCVersion {
-  BC29, BC29Func, BC32, BCHEAD
-};
 
-cl::opt<BCVersion> BitcodeVersion("bitcode-version",
-  cl::desc("Set the bitcode version to be written:"),
-  cl::values(
-    clEnumValN(BC29, "BC29", "Version 2.9"),
-     clEnumVal(BC29Func,     "Version 2.9 func"),
-     clEnumVal(BC32,         "Version 3.2"),
-     clEnumVal(BCHEAD,       "Most current version"),
-    clEnumValEnd), cl::init(BC32));
-
-static void WriteOutputFile(const Module *M) {
+static void WriteOutputFile(const Module *M, uint32_t ModuleTargetAPI) {
   // Infer the output filename if needed.
   if (OutputFilename.empty()) {
     if (InputFilename == "-") {
@@ -97,24 +89,15 @@
   }
 
   if (Force || !CheckBitcodeOutputToConsole(Out->os(), true)) {
-    switch(BitcodeVersion) {
-      case BC29:
-        llvm_2_9::WriteBitcodeToFile(M, Out->os());
-        break;
-      case BC29Func:
-        llvm_2_9_func::WriteBitcodeToFile(M, Out->os());
-        break;
-      case BC32:
-        llvm_3_2::WriteBitcodeToFile(M, Out->os());
-        break;
-      case BCHEAD:
-        llvm::WriteBitcodeToFile(M, Out->os());
-        break;
+    slang::writeBitcode(Out->os(), *M,
+        /* TargetAPI = */ ModuleTargetAPI,
+        /* OptimizationLevel = */ 3);
+
+    if (!Out->os().has_error()) {
+      // Declare success.
+      Out->keep();
     }
   }
-
-  // Declare success.
-  Out->keep();
 }
 
 int main(int argc, char **argv) {
@@ -125,6 +108,18 @@
   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
   cl::ParseCommandLineOptions(argc, argv, "llvm .ll -> .bc assembler\n");
 
+  // Check target API.
+  uint32_t ActualTargetAPI = (TargetAPI == 0) ? RS_DEVELOPMENT_API : TargetAPI;
+
+  if (ActualTargetAPI != RS_DEVELOPMENT_API &&
+      (ActualTargetAPI < SLANG_MINIMUM_TARGET_API ||
+       ActualTargetAPI > SLANG_MAXIMUM_TARGET_API)) {
+    errs() << "target API level '" << ActualTargetAPI << "' is out of range "
+           << "('" << SLANG_MINIMUM_TARGET_API << "' - '"
+           << SLANG_MAXIMUM_TARGET_API << "')\n";
+    return 1;
+  }
+
   // Parse the file now...
   SMDiagnostic Err;
   std::unique_ptr<Module> M(parseAssemblyFile(InputFilename, Err, Context));
@@ -147,7 +142,7 @@
   if (DumpAsm) errs() << "Here's the assembly:\n" << *M.get();
 
   if (!DisableOutput)
-    WriteOutputFile(M.get());
+    WriteOutputFile(M.get(), ActualTargetAPI);
 
   return 0;
 }
diff --git a/llvm-rs-cc.cpp b/llvm-rs-cc.cpp
index 905f2e7..1a89d62 100644
--- a/llvm-rs-cc.cpp
+++ b/llvm-rs-cc.cpp
@@ -33,6 +33,7 @@
 #include "llvm/Support/Signals.h"
 #include "llvm/Target/TargetMachine.h"
 
+#include "os_sep.h"
 #include "rs_cc_options.h"
 #include "slang.h"
 #include "slang_assert.h"
diff --git a/rs_cc_options.cpp b/rs_cc_options.cpp
index 94d1453..ef8c2d2 100644
--- a/rs_cc_options.cpp
+++ b/rs_cc_options.cpp
@@ -153,6 +153,12 @@
       Opts.mOutputType = Slang::OT_Bitcode;
       break;
     }
+    case OPT_MP: {
+      Opts.mEmitDependency = true;
+      Opts.mOutputType = Slang::OT_Bitcode;
+      Opts.mEmitPhonyDependency = true;
+      break;
+    }
     default: { slangAssert(false && "Invalid option in M group!"); }
     }
   }
@@ -222,11 +228,22 @@
       }
     }
   } else if (lastBitwidthArg) {
-    // -m32/-m64 are forbidden for non-C++ reflection paths.
-    DiagEngine.Report(
-        DiagEngine.getCustomDiagID(clang::DiagnosticsEngine::Error,
-                                   "cannot use -m32/-m64 without specifying "
-                                   "C++ reflection (-reflect-c++)"));
+      // -m32/-m64 are forbidden for non-C++ reflection paths for non-eng builds
+      // (they would make it too easy for a developer to accidentally create and
+      // release an APK that has 32-bit or 64-bit bitcode but not both).
+#ifdef __ENABLE_INTERNAL_OPTIONS
+      if (lastBitwidthArg->getOption().matches(OPT_m32)) {
+        Opts.mBitWidth = 32;
+      } else {
+        Opts.mBitWidth = 64;
+      }
+      Opts.mEmit3264 = false;
+#else
+      DiagEngine.Report(
+          DiagEngine.getCustomDiagID(clang::DiagnosticsEngine::Error,
+                                     "cannot use -m32/-m64 without specifying "
+                                     "C++ reflection (-reflect-c++)"));
+#endif
   }
 
   Opts.mDependencyOutputDir =
@@ -237,6 +254,25 @@
   Opts.mShowVersion = Args->hasArg(OPT_version);
   Opts.mDebugEmission = Args->hasArg(OPT_emit_g);
   Opts.mVerbose = Args->hasArg(OPT_verbose);
+  Opts.mASTPrint = Args->hasArg(OPT_ast_print);
+
+  // Delegate options
+
+  std::vector<std::string> DelegatedStrings;
+  for (int Opt : std::vector<unsigned>{OPT_debug, OPT_print_after_all, OPT_print_before_all}) {
+    if (Args->hasArg(Opt)) {
+      // TODO: Don't assume that the option begins with "-"; determine this programmatically instead.
+      DelegatedStrings.push_back(std::string("-") + std::string(OptParser->getOptionName(Opt)));
+      slangAssert(OptParser->getOptionKind(Opt) == llvm::opt::Option::FlagClass);
+    }
+  }
+  if (DelegatedStrings.size()) {
+    std::vector<const char *> DelegatedCStrs;
+    DelegatedCStrs.push_back(*ArgVector.data()); // program name
+    std::for_each(DelegatedStrings.cbegin(), DelegatedStrings.cend(),
+                  [&DelegatedCStrs](const std::string &String) { DelegatedCStrs.push_back(String.c_str()); });
+    llvm::cl::ParseCommandLineOptions(DelegatedCStrs.size(), DelegatedCStrs.data());
+  }
 
   // If we are emitting both 32-bit and 64-bit bitcode, we must embed it.
 
@@ -253,11 +289,10 @@
     Opts.mTargetAPI = UINT_MAX;
   }
 
-  Opts.mEmit3264 =
-      (Opts.mTargetAPI >= 21) && (Opts.mBitcodeStorage != BCST_CPP_CODE);
-  if (Opts.mEmit3264) {
+  if ((Opts.mTargetAPI < 21) || (Opts.mBitcodeStorage == BCST_CPP_CODE))
+    Opts.mEmit3264 = false;
+  if (Opts.mEmit3264)
     Opts.mBitcodeStorage = BCST_JAVA_CODE;
-  }
 
   if (DiagEngine.hasErrorOccurred()) {
     llvm::errs() << DiagsBuffer.str();
diff --git a/rs_cc_options.h b/rs_cc_options.h
index e45dae0..d27732e 100644
--- a/rs_cc_options.h
+++ b/rs_cc_options.h
@@ -75,6 +75,10 @@
   // Emit output dependency file for each input file.
   bool mEmitDependency;
 
+  // Emit phony targets for each header dependency, which can avoid make errors
+  // when the header gets deleted. See -MP option of cc.
+  bool mEmitPhonyDependency;
+
   // The output directory for writing dependency files
   // (i.e. out/target/common/obj/APPS/.../src/renderscript).
   std::string mDependencyOutputDir;
@@ -98,6 +102,9 @@
   // Display verbose information about the compilation on stdout.
   bool mVerbose;
 
+  // Display AST.
+  bool mASTPrint;
+
   // Emit both 32-bit and 64-bit bitcode (embedded in the reflected sources).
   bool mEmit3264;
 
@@ -106,13 +113,15 @@
     mBitWidth = 32;
     mBitcodeStorage = slang::BCST_APK_RESOURCE;
     mEmitDependency = 0;
+    mEmitPhonyDependency = 0;
     mShowHelp = 0;
     mShowVersion = 0;
     mTargetAPI = RS_VERSION;
     mDebugEmission = 0;
     mOptimizationLevel = llvm::CodeGenOpt::Aggressive;
     mVerbose = false;
-    mEmit3264 = false;
+    mASTPrint = false;
+    mEmit3264 = true;
   }
 };
 
diff --git a/slang.cpp b/slang.cpp
index 2c38359..02a77bb 100644
--- a/slang.cpp
+++ b/slang.cpp
@@ -226,9 +226,9 @@
 }
 
 clang::ASTConsumer *
-Slang::createBackend(const clang::CodeGenOptions &CodeGenOpts,
+Slang::createBackend(const RSCCOptions &Opts, const clang::CodeGenOptions &CodeGenOpts,
                      llvm::raw_ostream *OS, OutputType OT) {
-  return new Backend(mRSContext, &getDiagnostics(), CodeGenOpts,
+  return new Backend(mRSContext, &getDiagnostics(), Opts, CodeGenOpts,
                      getTargetOptions(), &mPragmas, OS, OT, getSourceManager(),
                      mAllowRSPrefix, mIsFilterscript);
 }
@@ -339,7 +339,7 @@
   return true;
 }
 
-int Slang::generateDepFile() {
+int Slang::generateDepFile(bool PhonyTarget) {
   if (mDiagEngine->hasErrorOccurred())
     return 1;
   if (mDOS.get() == nullptr)
@@ -348,6 +348,8 @@
   // Initialize options for generating dependency file
   clang::DependencyOutputOptions DepOpts;
   DepOpts.IncludeSystemHeaders = 1;
+  if (PhonyTarget)
+    DepOpts.UsePhonyTargets = 1;
   DepOpts.OutputFile = mDepOutputFileName;
   DepOpts.Targets = mAdditionalDepTargets;
   DepOpts.Targets.push_back(mDepTargetBCFileName);
@@ -386,7 +388,7 @@
   return mDiagEngine->hasErrorOccurred() ? 1 : 0;
 }
 
-int Slang::compile() {
+int Slang::compile(const RSCCOptions &Opts) {
   if (mDiagEngine->hasErrorOccurred())
     return 1;
   if (mOS.get() == nullptr)
@@ -396,7 +398,7 @@
   createPreprocessor();
   createASTContext();
 
-  mBackend.reset(createBackend(CodeGenOpts, &mOS->os(), mOT));
+  mBackend.reset(createBackend(Opts, CodeGenOpts, &mOS->os(), mOT));
 
   // Inform the diagnostic client we are processing a source file
   mDiagClient->BeginSourceFile(LangOpts, mPP.get());
@@ -534,8 +536,9 @@
           llvm::StringMapEntry<ReflectedDefinitionTy>::Create(RDKey);
       ME->setValue(std::make_pair(ERT, CurInputFile));
 
-      if (!ReflectedDefinitions.insert(ME))
-        delete ME;
+      if (!ReflectedDefinitions.insert(ME)) {
+        slangAssert(false && "Type shouldn't be in map yet!");
+      }
 
       // Take the ownership of ERT such that it won't be freed in ~RSContext().
       ERT->keep();
@@ -669,7 +672,9 @@
 
     mIsFilterscript = isFilterscript(InputFile);
 
-    if (Slang::compile() > 0)
+    CodeGenOpts.MainFileName = mInputFileName;
+
+    if (Slang::compile(Opts) > 0)
       return false;
 
     if (!Opts.mJavaReflectionPackageName.empty()) {
@@ -745,7 +750,7 @@
       if (SuppressAllWarnings) {
         getDiagnostics().setSuppressAllDiagnostics(true);
       }
-      if (generateDepFile() > 0)
+      if (generateDepFile(Opts.mEmitPhonyDependency) > 0)
         return false;
       if (SuppressAllWarnings) {
         getDiagnostics().setSuppressAllDiagnostics(false);
diff --git a/slang.h b/slang.h
index 0848451..ac128a9 100644
--- a/slang.h
+++ b/slang.h
@@ -189,7 +189,8 @@
   void initPreprocessor();
   void initASTContext();
 
-  clang::ASTConsumer *createBackend(const clang::CodeGenOptions &CodeGenOpts,
+  clang::ASTConsumer *createBackend(const RSCCOptions &Opts,
+                                    const clang::CodeGenOptions &CodeGenOpts,
                                     llvm::raw_ostream *OS,
                                     OutputType OT);
 
@@ -237,9 +238,9 @@
     mGeneratedFileNames.push_back(GeneratedFileName);
   }
 
-  int generateDepFile();
+  int generateDepFile(bool PhonyTarget);
 
-  int compile();
+  int compile(const RSCCOptions &Opts);
 
   char const *getErrorMessage() { return mDiagClient->str().c_str(); }
 
diff --git a/slang_backend.cpp b/slang_backend.cpp
index 8f4a255..0936494 100644
--- a/slang_backend.cpp
+++ b/slang_backend.cpp
@@ -19,8 +19,6 @@
 #include <string>
 #include <vector>
 
-#include "bcinfo/BitcodeWrapper.h"
-
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclGroup.h"
@@ -64,17 +62,18 @@
 
 #include "slang_assert.h"
 #include "slang.h"
+#include "slang_bitcode_gen.h"
 #include "slang_rs_context.h"
 #include "slang_rs_export_foreach.h"
 #include "slang_rs_export_func.h"
+#include "slang_rs_export_reduce.h"
 #include "slang_rs_export_type.h"
 #include "slang_rs_export_var.h"
 #include "slang_rs_metadata.h"
 
+#include "rs_cc_options.h"
+
 #include "strip_unknown_attributes.h"
-#include "BitWriter_2_9/ReaderWriter_2_9.h"
-#include "BitWriter_2_9_func/ReaderWriter_2_9_func.h"
-#include "BitWriter_3_2/ReaderWriter_3_2.h"
 
 namespace slang {
 
@@ -138,17 +137,10 @@
   // Target Machine Options
   llvm::TargetOptions Options;
 
-  Options.NoFramePointerElim = mCodeGenOpts.DisableFPElim;
-
-  // Use hardware FPU.
-  //
-  // FIXME: Need to detect the CPU capability and decide whether to use softfp.
-  // To use softfp, change following 2 lines to
-  //
-  // Options.FloatABIType = llvm::FloatABI::Soft;
-  // Options.UseSoftFloat = true;
-  Options.FloatABIType = llvm::FloatABI::Hard;
-  Options.UseSoftFloat = false;
+  // Use soft-float ABI for ARM (which is the target used by Slang during code
+  // generation).  Codegen still uses hardware FPU by default.  To use software
+  // floating point, add 'soft-float' feature to FeaturesStr below.
+  Options.FloatABIType = llvm::FloatABI::Soft;
 
   // BCC needs all unknown symbols resolved at compilation time. So we don't
   // need any relocation model.
@@ -215,7 +207,7 @@
 }
 
 Backend::Backend(RSContext *Context, clang::DiagnosticsEngine *DiagEngine,
-                 const clang::CodeGenOptions &CodeGenOpts,
+                 const RSCCOptions &Opts, const clang::CodeGenOptions &CodeGenOpts,
                  const clang::TargetOptions &TargetOpts, PragmaList *Pragmas,
                  llvm::raw_ostream *OS, Slang::OutputType OT,
                  clang::SourceManager &SourceMgr, bool AllowRSPrefix,
@@ -224,11 +216,12 @@
       mOT(OT), mGen(nullptr), mPerFunctionPasses(nullptr),
       mPerModulePasses(nullptr), mCodeGenPasses(nullptr),
       mBufferOutStream(*mpOS), mContext(Context),
-      mSourceMgr(SourceMgr), mAllowRSPrefix(AllowRSPrefix),
+      mSourceMgr(SourceMgr), mASTPrint(Opts.mASTPrint), mAllowRSPrefix(AllowRSPrefix),
       mIsFilterscript(IsFilterscript), mExportVarMetadata(nullptr),
       mExportFuncMetadata(nullptr), mExportForEachNameMetadata(nullptr),
-      mExportForEachSignatureMetadata(nullptr), mExportTypeMetadata(nullptr),
-      mRSObjectSlotsMetadata(nullptr), mRefCount(mContext->getASTContext()),
+      mExportForEachSignatureMetadata(nullptr), mExportReduceMetadata(nullptr),
+      mExportTypeMetadata(nullptr), mRSObjectSlotsMetadata(nullptr),
+      mRefCount(mContext->getASTContext()),
       mASTChecker(Context, Context->getTargetAPI(), IsFilterscript),
       mLLVMContext(llvm::getGlobalContext()), mDiagEngine(*DiagEngine),
       mCodeGenOpts(CodeGenOpts), mPragmas(Pragmas) {
@@ -241,25 +234,12 @@
   mpModule = mGen->GetModule();
 }
 
-// Encase the Bitcode in a wrapper containing RS version information.
-void Backend::WrapBitcode(llvm::raw_string_ostream &Bitcode) {
-  bcinfo::AndroidBitcodeWrapper wrapper;
-  size_t actualWrapperLen = bcinfo::writeAndroidBitcodeWrapper(
-      &wrapper, Bitcode.str().length(), getTargetAPI(),
-      SlangVersion::CURRENT, mCodeGenOpts.OptimizationLevel);
-
-  slangAssert(actualWrapperLen > 0);
-
-  // Write out the bitcode wrapper.
-  mBufferOutStream.write(reinterpret_cast<char*>(&wrapper), actualWrapperLen);
-
-  // Write out the actual encoded bitcode.
-  mBufferOutStream << Bitcode.str();
-}
-
 void Backend::HandleTranslationUnit(clang::ASTContext &Ctx) {
   HandleTranslationUnitPre(Ctx);
 
+  if (mASTPrint)
+    Ctx.getTranslationUnitDecl()->dump();
+
   mGen->HandleTranslationUnit(Ctx);
 
   // Here, we complete a translation unit (whole translation unit is now in LLVM
@@ -346,40 +326,8 @@
       break;
     }
     case Slang::OT_Bitcode: {
-      llvm::legacy::PassManager *BCEmitPM = new llvm::legacy::PassManager();
-      std::string BCStr;
-      llvm::raw_string_ostream Bitcode(BCStr);
-      unsigned int TargetAPI = getTargetAPI();
-      switch (TargetAPI) {
-        case SLANG_HC_TARGET_API:
-        case SLANG_HC_MR1_TARGET_API:
-        case SLANG_HC_MR2_TARGET_API: {
-          // Pre-ICS targets must use the LLVM 2.9 BitcodeWriter
-          BCEmitPM->add(llvm_2_9::createBitcodeWriterPass(Bitcode));
-          break;
-        }
-        case SLANG_ICS_TARGET_API:
-        case SLANG_ICS_MR1_TARGET_API: {
-          // ICS targets must use the LLVM 2.9_func BitcodeWriter
-          BCEmitPM->add(llvm_2_9_func::createBitcodeWriterPass(Bitcode));
-          break;
-        }
-        default: {
-          if (TargetAPI != SLANG_DEVELOPMENT_TARGET_API &&
-              (TargetAPI < SLANG_MINIMUM_TARGET_API ||
-               TargetAPI > SLANG_MAXIMUM_TARGET_API)) {
-            slangAssert(false && "Invalid target API value");
-          }
-          // Switch to the 3.2 BitcodeWriter by default, and don't use
-          // LLVM's included BitcodeWriter at all (for now).
-          BCEmitPM->add(llvm_3_2::createBitcodeWriterPass(Bitcode));
-          //BCEmitPM->add(llvm::createBitcodeWriterPass(Bitcode));
-          break;
-        }
-      }
-
-      BCEmitPM->run(*mpModule);
-      WrapBitcode(Bitcode);
+      writeBitcode(mBufferOutStream, *mpModule, getTargetAPI(),
+                   mCodeGenOpts.OptimizationLevel);
       break;
     }
     case Slang::OT_Nothing: {
@@ -778,6 +726,26 @@
   }
 }
 
+void Backend::dumpExportReduceInfo(llvm::Module *M) {
+  if (!mExportReduceMetadata) {
+    mExportReduceMetadata = M->getOrInsertNamedMetadata(RS_EXPORT_REDUCE_MN);
+  }
+
+  llvm::SmallVector<llvm::Metadata *, 1> ExportReduceInfo;
+
+  // Add the names of the reduce-style kernel functions to the metadata node.
+  for (auto I = mContext->export_reduce_begin(),
+            E = mContext->export_reduce_end(); I != E; ++I) {
+    ExportReduceInfo.clear();
+
+    ExportReduceInfo.push_back(
+      llvm::MDString::get(mLLVMContext, (*I)->getName().c_str()));
+
+    mExportReduceMetadata->addOperand(
+      llvm::MDNode::get(mLLVMContext, ExportReduceInfo));
+  }
+}
+
 void Backend::dumpExportTypeInfo(llvm::Module *M) {
   llvm::SmallVector<llvm::Metadata *, 1> ExportTypeInfo;
 
@@ -856,6 +824,9 @@
   if (mContext->hasExportForEach())
     dumpExportForEachInfo(M);
 
+  if (mContext->hasExportReduce())
+    dumpExportReduceInfo(M);
+
   if (mContext->hasExportType())
     dumpExportTypeInfo(M);
 }
diff --git a/slang_backend.h b/slang_backend.h
index e3dbdef..83912fd 100644
--- a/slang_backend.h
+++ b/slang_backend.h
@@ -81,12 +81,12 @@
   void CreateModulePasses();
   bool CreateCodeGenPasses();
 
-  void WrapBitcode(llvm::raw_string_ostream &Bitcode);
-
   RSContext *mContext;
 
   clang::SourceManager &mSourceMgr;
 
+  bool mASTPrint;
+
   bool mAllowRSPrefix;
 
   bool mIsFilterscript;
@@ -95,6 +95,7 @@
   llvm::NamedMDNode *mExportFuncMetadata;
   llvm::NamedMDNode *mExportForEachNameMetadata;
   llvm::NamedMDNode *mExportForEachSignatureMetadata;
+  llvm::NamedMDNode *mExportReduceMetadata;
   llvm::NamedMDNode *mExportTypeMetadata;
   llvm::NamedMDNode *mRSObjectSlotsMetadata;
 
@@ -107,6 +108,7 @@
   void dumpExportVarInfo(llvm::Module *M);
   void dumpExportFunctionInfo(llvm::Module *M);
   void dumpExportForEachInfo(llvm::Module *M);
+  void dumpExportReduceInfo(llvm::Module *M);
   void dumpExportTypeInfo(llvm::Module *M);
 
  protected:
@@ -136,6 +138,7 @@
  public:
   Backend(RSContext *Context,
             clang::DiagnosticsEngine *DiagEngine,
+            const RSCCOptions &Opts,
             const clang::CodeGenOptions &CodeGenOpts,
             const clang::TargetOptions &TargetOpts,
             PragmaList *Pragmas,
diff --git a/slang_bitcode_gen.cpp b/slang_bitcode_gen.cpp
new file mode 100644
index 0000000..83d96bf
--- /dev/null
+++ b/slang_bitcode_gen.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright 2015, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "bcinfo/BitcodeWrapper.h"
+
+#include "llvm/Support/raw_ostream.h"
+
+#include "BitWriter_2_9/ReaderWriter_2_9.h"
+#include "BitWriter_2_9_func/ReaderWriter_2_9_func.h"
+#include "BitWriter_3_2/ReaderWriter_3_2.h"
+
+#include "slang_assert.h"
+#include "slang_bitcode_gen.h"
+#include "slang_version.h"
+
+namespace slang {
+
+void writeBitcode(llvm::raw_ostream &Out,
+                  const llvm::Module &M,
+                  uint32_t TargetAPI,
+                  uint32_t OptimizationLevel) {
+  std::string BitcodeStr;
+  llvm::raw_string_ostream Bitcode(BitcodeStr);
+
+  // Create the bitcode.
+  switch (TargetAPI) {
+  case SLANG_HC_TARGET_API:
+  case SLANG_HC_MR1_TARGET_API:
+  case SLANG_HC_MR2_TARGET_API: {
+    // Pre-ICS targets must use the LLVM 2.9 BitcodeWriter
+    llvm_2_9::WriteBitcodeToFile(&M, Bitcode);
+    break;
+  }
+  case SLANG_ICS_TARGET_API:
+  case SLANG_ICS_MR1_TARGET_API: {
+    // ICS targets must use the LLVM 2.9_func BitcodeWriter
+    llvm_2_9_func::WriteBitcodeToFile(&M, Bitcode);
+    break;
+  }
+  default: {
+    if (TargetAPI != SLANG_DEVELOPMENT_TARGET_API &&
+        (TargetAPI < SLANG_MINIMUM_TARGET_API ||
+         TargetAPI > SLANG_MAXIMUM_TARGET_API)) {
+      slangAssert(false && "Invalid target API value");
+    }
+    // Switch to the 3.2 BitcodeWriter by default, and don't use
+    // LLVM's included BitcodeWriter at all (for now).
+    llvm_3_2::WriteBitcodeToFile(&M, Bitcode);
+    break;
+  }
+  }
+
+  const uint32_t CompilerVersion = SlangVersion::CURRENT;
+
+  // Create the bitcode wrapper.
+  bcinfo::AndroidBitcodeWrapper Wrapper;
+  size_t ActualWrapperLen = bcinfo::writeAndroidBitcodeWrapper(
+        &Wrapper, Bitcode.str().length(), TargetAPI,
+        CompilerVersion, OptimizationLevel);
+
+  slangAssert(ActualWrapperLen > 0);
+
+  // Write out the file.
+  Out.write(reinterpret_cast<char*>(&Wrapper), ActualWrapperLen);
+  Out << Bitcode.str();
+}
+
+}  // namespace slang
diff --git a/slang_bitcode_gen.h b/slang_bitcode_gen.h
new file mode 100644
index 0000000..cc0e9f6
--- /dev/null
+++ b/slang_bitcode_gen.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2015, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _FRAMEWORKS_COMPILE_SLANG_SLANG_BITCODE_GEN_H_  // NOLINT
+#define _FRAMEWORKS_COMPILE_SLANG_SLANG_BITCODE_GEN_H_
+
+#include <cstdint>
+
+namespace llvm {
+  class raw_ostream;
+  class Module;
+}
+
+namespace slang {
+
+// Write out the LLVM bitcode for a module, encased in a wrapper
+// containing RS version information.
+void writeBitcode(llvm::raw_ostream &Out,
+                  const llvm::Module &M,
+                  uint32_t TargetAPI,
+                  uint32_t OptimizationLevel);
+
+} // end namespace slang
+
+#endif  // _FRAMEWORKS_COMPILE_SLANG_SLANG_BITCODE_GEN_H_  NOLINT
diff --git a/slang_rs_check_ast.cpp b/slang_rs_check_ast.cpp
index 7c96291..3d3e886 100644
--- a/slang_rs_check_ast.cpp
+++ b/slang_rs_check_ast.cpp
@@ -14,11 +14,14 @@
  * limitations under the License.
  */
 
+#include "clang/AST/Attr.h"
+
 #include "slang_rs_check_ast.h"
 
 #include "slang_assert.h"
 #include "slang.h"
 #include "slang_rs_export_foreach.h"
+#include "slang_rs_export_reduce.h"
 #include "slang_rs_export_type.h"
 
 namespace slang {
@@ -148,6 +151,31 @@
     return;
   }
 
+  if (FD->hasAttr<clang::KernelAttr>()) {
+    // Validate that the kernel attribute is not used with static.
+    if (FD->getStorageClass() == clang::SC_Static) {
+      Context->ReportError(FD->getLocation(),
+                           "Invalid use of attribute kernel with "
+                           "static function declaration: %0")
+        << FD->getName();
+      mValid = false;
+    }
+
+    // We allow no arguments to the attribute, or an expected single
+    // argument. If there is an expected single argument, we verify
+    // that it is one of the recognized kernel kinds.
+    llvm::StringRef KernelKind =
+      FD->getAttr<clang::KernelAttr>()->getKernelKind();
+
+    if (!KernelKind.empty() && !KernelKind.equals("reduce")) {
+      Context->ReportError(FD->getLocation(),
+                           "Unknown kernel attribute argument '%0' "
+                           "in declaration of function '%1'")
+        << KernelKind << FD->getName();
+      mValid = false;
+    }
+  }
+
   clang::QualType resultType = FD->getReturnType().getCanonicalType();
   bool isExtern = (FD->getFormalLinkage() == clang::ExternalLinkage);
 
@@ -169,7 +197,8 @@
   }
 
   bool saveKernel = mInKernel;
-  mInKernel = RSExportForEach::isRSForEachFunc(mTargetAPI, Context, FD);
+  mInKernel = RSExportForEach::isRSForEachFunc(mTargetAPI, FD) ||
+              RSExportReduce::isRSReduceFunc(mTargetAPI, FD);
 
   if (clang::Stmt *Body = FD->getBody()) {
     Visit(Body);
diff --git a/slang_rs_context.cpp b/slang_rs_context.cpp
index 81b3b49..94eb6be 100644
--- a/slang_rs_context.cpp
+++ b/slang_rs_context.cpp
@@ -34,11 +34,13 @@
 #include "slang_assert.h"
 #include "slang_rs_export_foreach.h"
 #include "slang_rs_export_func.h"
+#include "slang_rs_export_reduce.h"
 #include "slang_rs_export_type.h"
 #include "slang_rs_export_var.h"
 #include "slang_rs_exportable.h"
 #include "slang_rs_pragma_handler.h"
 #include "slang_rs_reflection.h"
+#include "slang_rs_special_func.h"
 
 namespace slang {
 
@@ -98,25 +100,37 @@
     return false;
   }
 
-  if (RSExportForEach::isSpecialRSFunc(mTargetAPI, FD)) {
+  // Specialized function
+  if (RSSpecialFunc::isSpecialRSFunc(mTargetAPI, FD)) {
     // Do not reflect specialized functions like init, dtor, or graphics root.
-    return RSExportForEach::validateSpecialFuncDecl(mTargetAPI, this, FD);
-  } else if (RSExportForEach::isRSForEachFunc(mTargetAPI, this, FD)) {
-    RSExportForEach *EFE = RSExportForEach::Create(this, FD);
-    if (EFE == nullptr)
-      return false;
-    else
+    return RSSpecialFunc::validateSpecialFuncDecl(mTargetAPI, this, FD);
+  }
+
+  // Foreach kernel
+  if (RSExportForEach::isRSForEachFunc(mTargetAPI, FD)) {
+    if (auto *EFE = RSExportForEach::Create(this, FD)) {
       mExportForEach.push_back(EFE);
+      return true;
+    }
+    return false;
+  }
+
+  // Reduce kernel
+  if (RSExportReduce::isRSReduceFunc(mTargetAPI, FD)) {
+    if (auto *ER = RSExportReduce::Create(this, FD)) {
+      mExportReduce.push_back(ER);
+      return true;
+    }
+    return false;
+  }
+
+  // Invokable
+  if (auto *EF = RSExportFunc::Create(this, FD)) {
+    mExportFuncs.push_back(EF);
     return true;
   }
 
-  RSExportFunc *EF = RSExportFunc::Create(this, FD);
-  if (EF == nullptr)
-    return false;
-  else
-    mExportFuncs.push_back(EF);
-
-  return true;
+  return false;
 }
 
 
@@ -258,7 +272,7 @@
   if (mExportTypes.insert(NewItem)) {
     return true;
   } else {
-    free(NewItem);
+    NewItem->Destroy(mExportTypes.getAllocator());
     return false;
   }
 }
diff --git a/slang_rs_context.h b/slang_rs_context.h
index 8e9b577..8cced4d 100644
--- a/slang_rs_context.h
+++ b/slang_rs_context.h
@@ -48,6 +48,7 @@
   class RSExportVar;
   class RSExportFunc;
   class RSExportForEach;
+  class RSExportReduce;
   class RSExportType;
 
 class RSContext {
@@ -60,6 +61,7 @@
   typedef std::list<RSExportVar*> ExportVarList;
   typedef std::list<RSExportFunc*> ExportFuncList;
   typedef std::list<RSExportForEach*> ExportForEachList;
+  typedef std::list<RSExportReduce*> ExportReduceList;
   typedef llvm::StringMap<RSExportType*> ExportTypeMap;
 
  private:
@@ -100,6 +102,7 @@
   ExportVarList mExportVars;
   ExportFuncList mExportFuncs;
   ExportForEachList mExportForEach;
+  ExportReduceList mExportReduce;
   ExportTypeMap mExportTypes;
 
  public:
@@ -198,6 +201,15 @@
   }
   inline bool hasExportForEach() const { return !mExportForEach.empty(); }
 
+  typedef ExportReduceList::const_iterator const_export_reduce_iterator;
+  const_export_reduce_iterator export_reduce_begin() const {
+    return mExportReduce.begin();
+  }
+  const_export_reduce_iterator export_reduce_end() const {
+    return mExportReduce.end();
+  }
+  inline bool hasExportReduce() const { return !mExportReduce.empty(); }
+
   typedef ExportTypeMap::iterator export_type_iterator;
   typedef ExportTypeMap::const_iterator const_export_type_iterator;
   export_type_iterator export_types_begin() { return mExportTypes.begin(); }
diff --git a/slang_rs_export_foreach.cpp b/slang_rs_export_foreach.cpp
index d539e13..6a2d89e 100644
--- a/slang_rs_export_foreach.cpp
+++ b/slang_rs_export_foreach.cpp
@@ -30,6 +30,7 @@
 #include "slang_assert.h"
 #include "slang_rs_context.h"
 #include "slang_rs_export_type.h"
+#include "slang_rs_special_func.h"
 #include "slang_version.h"
 
 namespace {
@@ -89,8 +90,15 @@
   return ret;
 }
 
+bool isRootRSFunc(const clang::FunctionDecl *FD) {
+  if (!FD) {
+    return false;
+  }
+  return FD->getName().equals("root");
 }
 
+} // end anonymous namespace
+
 namespace slang {
 
 // This function takes care of additional validation and construction of
@@ -533,14 +541,20 @@
     }
   }
 
-  if (FE->hasIns()) {
+  // Construct type information about inputs and outputs. Return null when
+  // there is an error exporting types.
 
+  bool TypeExportError = false;
+
+  if (FE->hasIns()) {
     for (InIter BI = FE->mIns.begin(), EI = FE->mIns.end(); BI != EI; BI++) {
       const clang::Type *T = (*BI)->getType().getCanonicalType().getTypePtr();
       RSExportType *InExportType = RSExportType::Create(Context, T);
 
-      if (FE->mIsKernelStyle) {
-        slangAssert(InExportType != nullptr);
+      // It is not an error if we don't export an input type for legacy
+      // kernels. This can happen in the case of a void pointer.
+      if (FE->mIsKernelStyle && !InExportType) {
+        TypeExportError = true;
       }
 
       FE->mInTypes.push_back(InExportType);
@@ -548,12 +562,21 @@
   }
 
   if (FE->mIsKernelStyle && FE->mHasReturnType) {
-    const clang::Type *T = FE->mResultType.getTypePtr();
-    FE->mOutType = RSExportType::Create(Context, T);
-    slangAssert(FE->mOutType);
+    const clang::Type *ReturnType = FE->mResultType.getTypePtr();
+    FE->mOutType = RSExportType::Create(Context, ReturnType);
+    TypeExportError |= !FE->mOutType;
   } else if (FE->mOut) {
-    const clang::Type *T = FE->mOut->getType().getCanonicalType().getTypePtr();
-    FE->mOutType = RSExportType::Create(Context, T);
+    const clang::Type *OutType =
+        FE->mOut->getType().getCanonicalType().getTypePtr();
+    FE->mOutType = RSExportType::Create(Context, OutType);
+    // It is not an error if we don't export an output type.
+    // This can happen in the case of a void pointer.
+  }
+
+  if (TypeExportError) {
+    slangAssert(Context->getDiagnostics()->hasErrorOccurred() &&
+                "Error exporting type but no diagnostic message issued!");
+    return nullptr;
   }
 
   return FE;
@@ -567,54 +590,16 @@
   return FE;
 }
 
-bool RSExportForEach::isGraphicsRootRSFunc(unsigned int targetAPI,
-                                           const clang::FunctionDecl *FD) {
-  if (FD->hasAttr<clang::KernelAttr>()) {
-    return false;
-  }
-
-  if (!isRootRSFunc(FD)) {
-    return false;
-  }
-
-  if (FD->getNumParams() == 0) {
-    // Graphics root function
-    return true;
-  }
-
-  // Check for legacy graphics root function (with single parameter).
-  if ((targetAPI < SLANG_ICS_TARGET_API) && (FD->getNumParams() == 1)) {
-    const clang::QualType &IntType = FD->getASTContext().IntTy;
-    if (FD->getReturnType().getCanonicalType() == IntType) {
-      return true;
-    }
-  }
-
-  return false;
-}
-
 bool RSExportForEach::isRSForEachFunc(unsigned int targetAPI,
-                                      slang::RSContext* Context,
                                       const clang::FunctionDecl *FD) {
-  slangAssert(Context && FD);
-  bool hasKernelAttr = FD->hasAttr<clang::KernelAttr>();
+  slangAssert(FD);
 
-  if (FD->getStorageClass() == clang::SC_Static) {
-    if (hasKernelAttr) {
-      Context->ReportError(FD->getLocation(),
-                           "Invalid use of attribute kernel with "
-                           "static function declaration: %0")
-          << FD->getName();
-    }
-    return false;
+  // Anything tagged as a kernel("") is definitely used with ForEach.
+  if (auto *Kernel = FD->getAttr<clang::KernelAttr>()) {
+    return Kernel->getKernelKind().empty();
   }
 
-  // Anything tagged as a kernel is definitely used with ForEach.
-  if (hasKernelAttr) {
-    return true;
-  }
-
-  if (isGraphicsRootRSFunc(targetAPI, FD)) {
+  if (RSSpecialFunc::isGraphicsRootRSFunc(targetAPI, FD)) {
     return false;
   }
 
@@ -642,57 +627,4 @@
   return false;
 }
 
-bool
-RSExportForEach::validateSpecialFuncDecl(unsigned int targetAPI,
-                                         slang::RSContext *Context,
-                                         clang::FunctionDecl const *FD) {
-  slangAssert(Context && FD);
-  bool valid = true;
-  const clang::ASTContext &C = FD->getASTContext();
-  const clang::QualType &IntType = FD->getASTContext().IntTy;
-
-  if (isGraphicsRootRSFunc(targetAPI, FD)) {
-    if ((targetAPI < SLANG_ICS_TARGET_API) && (FD->getNumParams() == 1)) {
-      // Legacy graphics root function
-      const clang::ParmVarDecl *PVD = FD->getParamDecl(0);
-      clang::QualType QT = PVD->getType().getCanonicalType();
-      if (QT != IntType) {
-        Context->ReportError(PVD->getLocation(),
-                             "invalid parameter type for legacy "
-                             "graphics root() function: %0")
-            << PVD->getType();
-        valid = false;
-      }
-    }
-
-    // Graphics root function, so verify that it returns an int
-    if (FD->getReturnType().getCanonicalType() != IntType) {
-      Context->ReportError(FD->getLocation(),
-                           "root() is required to return "
-                           "an int for graphics usage");
-      valid = false;
-    }
-  } else if (isInitRSFunc(FD) || isDtorRSFunc(FD)) {
-    if (FD->getNumParams() != 0) {
-      Context->ReportError(FD->getLocation(),
-                           "%0(void) is required to have no "
-                           "parameters")
-          << FD->getName();
-      valid = false;
-    }
-
-    if (FD->getReturnType().getCanonicalType() != C.VoidTy) {
-      Context->ReportError(FD->getLocation(),
-                           "%0(void) is required to have a void "
-                           "return type")
-          << FD->getName();
-      valid = false;
-    }
-  } else {
-    slangAssert(false && "must be called on root, init or .rs.dtor function!");
-  }
-
-  return valid;
-}
-
 }  // namespace slang
diff --git a/slang_rs_export_foreach.h b/slang_rs_export_foreach.h
index 581d8a1..033e9ed 100644
--- a/slang_rs_export_foreach.h
+++ b/slang_rs_export_foreach.h
@@ -160,48 +160,8 @@
     return mParamPacketType->fields_end();
   }
 
-  inline static bool isInitRSFunc(const clang::FunctionDecl *FD) {
-    if (!FD) {
-      return false;
-    }
-    const llvm::StringRef Name = FD->getName();
-    static llvm::StringRef FuncInit("init");
-    return Name.equals(FuncInit);
-  }
-
-  inline static bool isRootRSFunc(const clang::FunctionDecl *FD) {
-    if (!FD) {
-      return false;
-    }
-    const llvm::StringRef Name = FD->getName();
-    static llvm::StringRef FuncRoot("root");
-    return Name.equals(FuncRoot);
-  }
-
-  inline static bool isDtorRSFunc(const clang::FunctionDecl *FD) {
-    if (!FD) {
-      return false;
-    }
-    const llvm::StringRef Name = FD->getName();
-    static llvm::StringRef FuncDtor(".rs.dtor");
-    return Name.equals(FuncDtor);
-  }
-
-  static bool isGraphicsRootRSFunc(unsigned int targetAPI,
-                                   const clang::FunctionDecl *FD);
-
-  static bool isRSForEachFunc(unsigned int targetAPI, slang::RSContext *Context,
+  static bool isRSForEachFunc(unsigned int targetAPI,
                               const clang::FunctionDecl *FD);
-
-  inline static bool isSpecialRSFunc(unsigned int targetAPI,
-                                     const clang::FunctionDecl *FD) {
-    return isGraphicsRootRSFunc(targetAPI, FD) || isInitRSFunc(FD) ||
-           isDtorRSFunc(FD);
-  }
-
-  static bool validateSpecialFuncDecl(unsigned int targetAPI,
-                                      slang::RSContext *Context,
-                                      const clang::FunctionDecl *FD);
 };  // RSExportForEach
 
 }  // namespace slang
diff --git a/slang_rs_export_reduce.cpp b/slang_rs_export_reduce.cpp
new file mode 100644
index 0000000..778be31
--- /dev/null
+++ b/slang_rs_export_reduce.cpp
@@ -0,0 +1,180 @@
+/*
+ * Copyright 2015, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "slang_rs_export_reduce.h"
+
+#include <algorithm>
+#include <string>
+
+#include "clang/AST/Attr.h"
+
+#include "slang_assert.h"
+#include "slang_rs_context.h"
+#include "slang_rs_export_type.h"
+#include "slang_version.h"
+
+
+namespace {
+
+bool haveReduceInTargetAPI(unsigned int TargetAPI) {
+  return TargetAPI == RS_DEVELOPMENT_API;
+}
+
+} // end anonymous namespace
+
+
+namespace slang {
+
+// Validate the parameters to a reduce kernel, and set up the
+// exportable object if the kernel is valid.
+//
+// This checks that the passed function declaration of a reduce kernel is
+// a function which satisfies all the requirements for a reduce
+// kernel. Namely, we check for:
+//  - correct target API
+//  - correct parameter count
+//  - non void return type
+//  - return type and parameter types match
+//  - no pointer types in signature.
+//
+// We try to report useful errors to the user.
+//
+// On success, this function returns true and sets the fields mIns and
+// mType to point to the arguments and to the kernel type.
+//
+// If an error was detected, this function returns false.
+bool RSExportReduce::validateAndConstructParams(
+    RSContext *Context, const clang::FunctionDecl *FD) {
+  slangAssert(Context && FD);
+  bool Valid = true;
+
+  // Validate API version.
+  if (!haveReduceInTargetAPI(Context->getTargetAPI())) {
+    Context->ReportError(FD->getLocation(),
+                         "Reduce-style kernel %0() unsupported in SDK level %1")
+      << FD->getName() << Context->getTargetAPI();
+    Valid = false;
+  }
+
+  // Validate parameter count.
+  if (FD->getNumParams() != 2) {
+    Context->ReportError(FD->getLocation(),
+                         "Reduce-style kernel %0() must take 2 parameters "
+                         "(found %1).")
+      << FD->getName() << FD->getNumParams();
+    Valid = false;
+  }
+
+  // Validate return type.
+  const clang::QualType ReturnTy = FD->getReturnType().getCanonicalType();
+
+  if (ReturnTy->isVoidType()) {
+    Context->ReportError(FD->getLocation(),
+                         "Reduce-style kernel %0() cannot return void")
+      << FD->getName();
+    Valid = false;
+  } else if (ReturnTy->isPointerType()) {
+    Context->ReportError(FD->getLocation(),
+                         "Reduce-style kernel %0() cannot return a pointer "
+                         "type: %1")
+      << FD->getName() << ReturnTy.getAsString();
+    Valid = false;
+  }
+
+  // Validate parameter types.
+  if (FD->getNumParams() == 0) {
+    return false;
+  }
+
+  const clang::ParmVarDecl &FirstParam = *FD->getParamDecl(0);
+  const clang::QualType FirstParamTy = FirstParam.getType().getCanonicalType();
+
+  for (auto PVD = FD->param_begin(), PE = FD->param_end(); PVD != PE; ++PVD) {
+    const clang::ParmVarDecl &Param = **PVD;
+    const clang::QualType ParamTy = Param.getType().getCanonicalType();
+
+    // Check that the parameter is not a pointer.
+    if (ParamTy->isPointerType()) {
+      Context->ReportError(Param.getLocation(),
+                           "Reduce-style kernel %0() cannot have "
+                           "parameter '%1' of pointer type: '%2'")
+        << FD->getName() << Param.getName() << ParamTy.getAsString();
+      Valid = false;
+    }
+
+    // Check for type mismatch between this parameter and the return type.
+    if (ParamTy != ReturnTy) {
+      Context->ReportError(FD->getLocation(),
+                           "Reduce-style kernel %0() return type '%1' is not "
+                           "the same type as parameter '%2' (type '%3')")
+        << FD->getName() << ReturnTy.getAsString() << Param.getName()
+        << ParamTy.getAsString();
+      Valid = false;
+    }
+
+    // Check for type mismatch between parameters. It is sufficient to check
+    // for a mismatch with the type of the first argument.
+    if (ParamTy != FirstParamTy) {
+      Context->ReportError(FirstParam.getLocation(),
+                           "In reduce-style kernel %0(): parameter '%1' "
+                           "(type '%2') does not have the same type as "
+                           "parameter '%3' (type '%4')")
+        << FD->getName() << FirstParam.getName() << FirstParamTy.getAsString()
+        << Param.getName() << ParamTy.getAsString();
+      Valid = false;
+    }
+  }
+
+  if (Valid) {
+    // If the validation was successful, then populate the fields of
+    // the exportable.
+    if (!(mType = RSExportType::Create(Context, ReturnTy.getTypePtr()))) {
+      // There was an error exporting the type for the reduce kernel.
+      return false;
+    }
+
+    slangAssert(mIns.size() == 2 && FD->param_end() - FD->param_begin() == 2);
+    std::copy(FD->param_begin(), FD->param_end(), mIns.begin());
+  }
+
+  return Valid;
+}
+
+RSExportReduce *RSExportReduce::Create(RSContext *Context,
+                                       const clang::FunctionDecl *FD) {
+  slangAssert(Context && FD);
+  llvm::StringRef Name = FD->getName();
+
+  slangAssert(!Name.empty() && "Function must have a name");
+
+  RSExportReduce *RE = new RSExportReduce(Context, Name);
+
+  if (!RE->validateAndConstructParams(Context, FD)) {
+    // Don't delete RE here - owned by Context.
+    return nullptr;
+  }
+
+  return RE;
+}
+
+bool RSExportReduce::isRSReduceFunc(unsigned int /* targetAPI */,
+                                    const clang::FunctionDecl *FD) {
+  slangAssert(FD);
+  clang::KernelAttr *KernelAttrOrNull = FD->getAttr<clang::KernelAttr>();
+  return KernelAttrOrNull && KernelAttrOrNull->getKernelKind().equals("reduce");
+}
+
+}  // namespace slang
diff --git a/slang_rs_export_reduce.h b/slang_rs_export_reduce.h
new file mode 100644
index 0000000..9df27ae
--- /dev/null
+++ b/slang_rs_export_reduce.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2015, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _FRAMEWORKS_COMPILE_SLANG_SLANG_RS_EXPORT_REDUCE_H_  // NOLINT
+#define _FRAMEWORKS_COMPILE_SLANG_SLANG_RS_EXPORT_REDUCE_H_
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/SmallVector.h"
+
+#include "slang_rs_context.h"
+#include "slang_rs_exportable.h"
+#include "slang_rs_export_type.h"
+
+namespace clang {
+  class FunctionDecl;
+}  // namespace clang
+
+namespace slang {
+
+// Base class for reflecting control-side reduce
+class RSExportReduce : public RSExportable {
+ public:
+  typedef llvm::SmallVectorImpl<const clang::ParmVarDecl*> InVec;
+  typedef InVec::const_iterator InIter;
+
+ private:
+  // Function name
+  std::string mName;
+  // Input and output type
+  RSExportType *mType;
+  // Inputs
+  llvm::SmallVector<const clang::ParmVarDecl *, 2> mIns;
+
+  RSExportReduce(RSContext *Context, const llvm::StringRef &Name)
+    : RSExportable(Context, RSExportable::EX_REDUCE),
+      mName(Name.data(), Name.size()), mType(nullptr), mIns(2) {
+  }
+
+  RSExportReduce(const RSExportReduce &) = delete;
+  RSExportReduce &operator=(const RSExportReduce &) = delete;
+
+  // Given a reduce kernel declaration, validate the parameters to the
+  // reduce kernel.
+  bool validateAndConstructParams(RSContext *Context,
+                                  const clang::FunctionDecl *FD);
+
+ public:
+  static RSExportReduce *Create(RSContext *Context,
+                                const clang::FunctionDecl *FD);
+
+  const std::string &getName() const {
+    return mName;
+  }
+
+  const InVec &getIns() const {
+    return mIns;
+  }
+
+  const RSExportType *getType() const {
+    return mType;
+  }
+
+  static bool isRSReduceFunc(unsigned int targetAPI,
+                             const clang::FunctionDecl *FD);
+
+};  // RSExportReduce
+
+}  // namespace slang
+
+#endif  // _FRAMEWORKS_COMPILE_SLANG_SLANG_RS_EXPORT_REDUCE_H_  NOLINT
diff --git a/slang_rs_exportable.h b/slang_rs_exportable.h
index 0871be3..e8fc11e 100644
--- a/slang_rs_exportable.h
+++ b/slang_rs_exportable.h
@@ -27,7 +27,8 @@
     EX_FUNC,
     EX_TYPE,
     EX_VAR,
-    EX_FOREACH
+    EX_FOREACH,
+    EX_REDUCE
   };
 
  private:
diff --git a/slang_rs_metadata.h b/slang_rs_metadata.h
index 63e7e0f..b84a8cd 100644
--- a/slang_rs_metadata.h
+++ b/slang_rs_metadata.h
@@ -33,4 +33,6 @@
 
 #define RS_EXPORT_FOREACH_MN "#rs_export_foreach"
 
+#define RS_EXPORT_REDUCE_MN "#rs_export_reduce"
+
 #endif  // _FRAMEWORKS_COMPILE_SLANG_SLANG_RS_METADATA_H_  NOLINT
diff --git a/slang_rs_reflection_cpp.cpp b/slang_rs_reflection_cpp.cpp
index 6b40ff5..03f2ee5 100644
--- a/slang_rs_reflection_cpp.cpp
+++ b/slang_rs_reflection_cpp.cpp
@@ -389,7 +389,7 @@
 
     if (ef->hasIns()) {
       // FIXME: Add support for kernels with multiple inputs.
-      assert(ef->getIns().size() == 1);
+      slangAssert(ef->getIns().size() == 1);
       Arguments.push_back(std::make_pair(
           "android::RSC::sp<const android::RSC::Allocation>", "ain"));
     }
@@ -417,7 +417,7 @@
     const RSExportForEach::InTypeVec &InTypes = ef->getInTypes();
     if (ef->hasIns()) {
       // FIXME: Add support for kernels with multiple inputs.
-      assert(ef->getIns().size() == 1);
+      slangAssert(ef->getIns().size() == 1);
       genTypeCheck(InTypes[0], "ain");
     }
     if (OET) {
@@ -437,7 +437,7 @@
 
     if (ef->hasIns()) {
       // FIXME: Add support for kernels with multiple inputs.
-      assert(ef->getIns().size() == 1);
+      slangAssert(ef->getIns().size() == 1);
       mOut << "ain, ";
     } else {
       mOut << "NULL, ";
diff --git a/slang_rs_special_func.cpp b/slang_rs_special_func.cpp
new file mode 100644
index 0000000..56ae590
--- /dev/null
+++ b/slang_rs_special_func.cpp
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2015, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "slang_rs_special_func.h"
+
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Attr.h"
+
+#include "slang_assert.h"
+#include "slang_version.h"
+
+namespace slang {
+
+bool RSSpecialFunc::isGraphicsRootRSFunc(unsigned int targetAPI,
+                                         const clang::FunctionDecl *FD) {
+  if (FD->hasAttr<clang::KernelAttr>()) {
+    return false;
+  }
+
+  if (!FD->getName().equals("root")) {
+    return false;
+  }
+
+  if (FD->getNumParams() == 0) {
+    // Graphics root function
+    return true;
+  }
+
+  // Check for legacy graphics root function (with single parameter).
+  if ((targetAPI < SLANG_ICS_TARGET_API) && (FD->getNumParams() == 1)) {
+    const clang::QualType &IntType = FD->getASTContext().IntTy;
+    if (FD->getReturnType().getCanonicalType() == IntType) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool
+RSSpecialFunc::validateSpecialFuncDecl(unsigned int targetAPI,
+                                       slang::RSContext *Context,
+                                       clang::FunctionDecl const *FD) {
+  slangAssert(Context && FD);
+  bool valid = true;
+  const clang::ASTContext &C = FD->getASTContext();
+  const clang::QualType &IntType = FD->getASTContext().IntTy;
+
+  if (isGraphicsRootRSFunc(targetAPI, FD)) {
+    if ((targetAPI < SLANG_ICS_TARGET_API) && (FD->getNumParams() == 1)) {
+      // Legacy graphics root function
+      const clang::ParmVarDecl *PVD = FD->getParamDecl(0);
+      clang::QualType QT = PVD->getType().getCanonicalType();
+      if (QT != IntType) {
+        Context->ReportError(PVD->getLocation(),
+                             "invalid parameter type for legacy "
+                             "graphics root() function: %0")
+            << PVD->getType();
+        valid = false;
+      }
+    }
+
+    // Graphics root function, so verify that it returns an int
+    if (FD->getReturnType().getCanonicalType() != IntType) {
+      Context->ReportError(FD->getLocation(),
+                           "root() is required to return "
+                           "an int for graphics usage");
+      valid = false;
+    }
+  } else if (isInitRSFunc(FD) || isDtorRSFunc(FD)) {
+    if (FD->getNumParams() != 0) {
+      Context->ReportError(FD->getLocation(),
+                           "%0(void) is required to have no "
+                           "parameters")
+          << FD->getName();
+      valid = false;
+    }
+
+    if (FD->getReturnType().getCanonicalType() != C.VoidTy) {
+      Context->ReportError(FD->getLocation(),
+                           "%0(void) is required to have a void "
+                           "return type")
+          << FD->getName();
+      valid = false;
+    }
+  } else {
+    slangAssert(false && "must be called on root, init or .rs.dtor function!");
+  }
+
+  return valid;
+}
+
+}  // namespace slang
diff --git a/slang_rs_special_func.h b/slang_rs_special_func.h
new file mode 100644
index 0000000..7390871
--- /dev/null
+++ b/slang_rs_special_func.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright 2015, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _FRAMEWORKS_COMPILE_SLANG_SLANG_RS_SPECIAL_FUNC_H_
+#define _FRAMEWORKS_COMPILE_SLANG_SLANG_RS_SPECIAL_FUNC_H_
+
+#include "llvm/ADT/StringRef.h"
+
+#include "clang/AST/Decl.h"
+
+#include "slang_rs_context.h"
+
+namespace slang {
+
+namespace RSSpecialFunc {
+
+inline bool isInitRSFunc(const clang::FunctionDecl *FD) {
+  if (!FD) {
+    return false;
+  }
+  const llvm::StringRef Name = FD->getName();
+  static llvm::StringRef FuncInit("init");
+  return Name.equals(FuncInit);
+}
+
+inline bool isDtorRSFunc(const clang::FunctionDecl *FD) {
+  if (!FD) {
+    return false;
+  }
+  const llvm::StringRef Name = FD->getName();
+  static llvm::StringRef FuncDtor(".rs.dtor");
+  return Name.equals(FuncDtor);
+}
+
+bool isGraphicsRootRSFunc(unsigned int targetAPI,
+                          const clang::FunctionDecl *FD);
+
+inline bool isSpecialRSFunc(unsigned int targetAPI,
+                                   const clang::FunctionDecl *FD) {
+  return isGraphicsRootRSFunc(targetAPI, FD) || isInitRSFunc(FD) ||
+         isDtorRSFunc(FD);
+}
+
+bool validateSpecialFuncDecl(unsigned int targetAPI,
+                             slang::RSContext *Context,
+                             const clang::FunctionDecl *FD);
+
+} // namespace RSSpecialFunc
+
+} // namespace slang
+
+#endif  // _FRAMEWORKS_COMPILE_SLANG_SLANG_RS_SPECIAL_FUNC_H
diff --git a/tests/F_anon_struct_kernel_sig/anon_struct_kernel_sig.rs b/tests/F_anon_struct_kernel_sig/anon_struct_kernel_sig.rs
new file mode 100644
index 0000000..028a328
--- /dev/null
+++ b/tests/F_anon_struct_kernel_sig/anon_struct_kernel_sig.rs
@@ -0,0 +1,21 @@
+#pragma version(1)
+#pragma rs java_package_name(foo)
+
+typedef struct {
+  int i;
+} myStruct;
+
+/* Test old-style kernel */
+void root(const myStruct *in, int *out) {
+  *out = in->i;
+}
+
+/* Test new-style kernel */
+myStruct RS_KERNEL kernel_returning_myStruct(int in) {
+  myStruct out = { in };
+  return out;
+}
+
+int RS_KERNEL kernel_with_myStruct_param(myStruct in) {
+  return in.i;
+}
diff --git a/tests/F_anon_struct_kernel_sig/stderr.txt.expect b/tests/F_anon_struct_kernel_sig/stderr.txt.expect
new file mode 100644
index 0000000..276aaaf
--- /dev/null
+++ b/tests/F_anon_struct_kernel_sig/stderr.txt.expect
@@ -0,0 +1 @@
+anon_struct_kernel_sig.rs:4:9: error: anonymous structures cannot be exported
diff --git a/tests/F_anon_struct_kernel_sig/stdout.txt.expect b/tests/F_anon_struct_kernel_sig/stdout.txt.expect
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/F_anon_struct_kernel_sig/stdout.txt.expect
diff --git a/tests/F_kernel_badattr/kernel_badattr.rs b/tests/F_kernel_badattr/kernel_badattr.rs
new file mode 100644
index 0000000..a719bd9
--- /dev/null
+++ b/tests/F_kernel_badattr/kernel_badattr.rs
@@ -0,0 +1,14 @@
+#pragma version(1)
+#pragma rs java_package_name(foo)
+
+int __attribute__((kernel("unimplemented"))) kernel(int arg) {
+  return 0;
+}
+
+int __attribute__((kernel(7))) kernel2(int arg) {
+  return 0;
+}
+
+int __attribute__((kernel("reduce", 1))) kernel3(int arg) {
+  return 0;
+}
diff --git a/tests/F_kernel_badattr/stderr.txt.expect b/tests/F_kernel_badattr/stderr.txt.expect
new file mode 100644
index 0000000..6cd1c74
--- /dev/null
+++ b/tests/F_kernel_badattr/stderr.txt.expect
@@ -0,0 +1,3 @@
+kernel_badattr.rs:8:27: error: 'kernel' attribute requires a string
+kernel_badattr.rs:12:20: error: 'kernel' attribute takes no more than 1 argument
+kernel_badattr.rs:4:46: error: Unknown kernel attribute argument 'unimplemented' in declaration of function 'kernel'
diff --git a/tests/F_kernel_badattr/stdout.txt.expect b/tests/F_kernel_badattr/stdout.txt.expect
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/F_kernel_badattr/stdout.txt.expect
diff --git a/tests/F_reduce_api_unsupported/reduce_api_unsupported.rs b/tests/F_reduce_api_unsupported/reduce_api_unsupported.rs
new file mode 100644
index 0000000..8dfedfb
--- /dev/null
+++ b/tests/F_reduce_api_unsupported/reduce_api_unsupported.rs
@@ -0,0 +1,12 @@
+// -target-api 23
+#pragma version(1)
+#pragma rs java_package_name(foo)
+
+typedef struct foo {
+   int x;
+} foo;
+
+foo __attribute__((kernel("reduce"))) addFoo(foo a, foo b) {
+  foo result = { a.x + b.x };
+  return result;
+}
diff --git a/tests/F_reduce_api_unsupported/stderr.txt.expect b/tests/F_reduce_api_unsupported/stderr.txt.expect
new file mode 100644
index 0000000..0e389a4
--- /dev/null
+++ b/tests/F_reduce_api_unsupported/stderr.txt.expect
@@ -0,0 +1 @@
+reduce_api_unsupported.rs:9:39: error: Reduce-style kernel addFoo() unsupported in SDK level 23
diff --git a/tests/F_reduce_api_unsupported/stdout.txt.expect b/tests/F_reduce_api_unsupported/stdout.txt.expect
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/F_reduce_api_unsupported/stdout.txt.expect
diff --git a/tests/F_reduce_non_binary/reduce_non_binary.rs b/tests/F_reduce_non_binary/reduce_non_binary.rs
new file mode 100644
index 0000000..77fdaa1
--- /dev/null
+++ b/tests/F_reduce_non_binary/reduce_non_binary.rs
@@ -0,0 +1,27 @@
+// -target-api 0
+#pragma version(1)
+#pragma rs java_package_name(foo)
+
+/* 0 arguments */
+
+int __attribute__((kernel("reduce"))) kernel0(void) {
+  return 0;
+}
+
+/* 1 argument */
+
+int __attribute__((kernel("reduce"))) kernel1(int arg1) {
+  return 0;
+}
+
+/* 3 arguments */
+
+int __attribute__((kernel("reduce"))) kernel3(int arg1, int arg2, int arg3) {
+  return 0;
+}
+
+/* 4 arguments */
+
+int __attribute__((kernel("reduce"))) kernel4(int arg1, int arg2, int arg3, int arg4) {
+  return 0;
+}
diff --git a/tests/F_reduce_non_binary/stderr.txt.expect b/tests/F_reduce_non_binary/stderr.txt.expect
new file mode 100644
index 0000000..1463a21
--- /dev/null
+++ b/tests/F_reduce_non_binary/stderr.txt.expect
@@ -0,0 +1,4 @@
+reduce_non_binary.rs:7:39: error: Reduce-style kernel kernel0() must take 2 parameters (found 0).
+reduce_non_binary.rs:13:39: error: Reduce-style kernel kernel1() must take 2 parameters (found 1).
+reduce_non_binary.rs:19:39: error: Reduce-style kernel kernel3() must take 2 parameters (found 3).
+reduce_non_binary.rs:25:39: error: Reduce-style kernel kernel4() must take 2 parameters (found 4).
diff --git a/tests/F_reduce_non_binary/stdout.txt.expect b/tests/F_reduce_non_binary/stdout.txt.expect
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/F_reduce_non_binary/stdout.txt.expect
diff --git a/tests/F_reduce_param_type_mismatch/reduce_param_type_mismatch.rs b/tests/F_reduce_param_type_mismatch/reduce_param_type_mismatch.rs
new file mode 100644
index 0000000..fd8e1aa
--- /dev/null
+++ b/tests/F_reduce_param_type_mismatch/reduce_param_type_mismatch.rs
@@ -0,0 +1,7 @@
+// -target-api 0
+#pragma version(1)
+#pragma rs java_package_name(foo)
+
+int __attribute__((kernel("reduce"))) kernel1(int arg1, float arg2) {
+  return 0;
+}
diff --git a/tests/F_reduce_param_type_mismatch/stderr.txt.expect b/tests/F_reduce_param_type_mismatch/stderr.txt.expect
new file mode 100644
index 0000000..cc55737
--- /dev/null
+++ b/tests/F_reduce_param_type_mismatch/stderr.txt.expect
@@ -0,0 +1,2 @@
+reduce_param_type_mismatch.rs:5:39: error: Reduce-style kernel kernel1() return type 'int' is not the same type as parameter 'arg2' (type 'float')
+reduce_param_type_mismatch.rs:5:51: error: In reduce-style kernel kernel1(): parameter 'arg1' (type 'int') does not have the same type as parameter 'arg2' (type 'float')
diff --git a/tests/F_reduce_param_type_mismatch/stdout.txt.expect b/tests/F_reduce_param_type_mismatch/stdout.txt.expect
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/F_reduce_param_type_mismatch/stdout.txt.expect
diff --git a/tests/F_reduce_ptr_param/reduce_ptr_param.rs b/tests/F_reduce_ptr_param/reduce_ptr_param.rs
new file mode 100644
index 0000000..f099e19
--- /dev/null
+++ b/tests/F_reduce_ptr_param/reduce_ptr_param.rs
@@ -0,0 +1,7 @@
+// -target-api 0
+#pragma version(1)
+#pragma rs java_package_name(foo)
+
+int __attribute__((kernel("reduce"))) kernel(int *arg1, int *arg2) {
+  return 0;
+}
diff --git a/tests/F_reduce_ptr_param/stderr.txt.expect b/tests/F_reduce_ptr_param/stderr.txt.expect
new file mode 100644
index 0000000..3dc971e
--- /dev/null
+++ b/tests/F_reduce_ptr_param/stderr.txt.expect
@@ -0,0 +1,4 @@
+reduce_ptr_param.rs:5:51: error: Reduce-style kernel kernel() cannot have parameter 'arg1' of pointer type: 'int *'
+reduce_ptr_param.rs:5:39: error: Reduce-style kernel kernel() return type 'int' is not the same type as parameter 'arg1' (type 'int *')
+reduce_ptr_param.rs:5:62: error: Reduce-style kernel kernel() cannot have parameter 'arg2' of pointer type: 'int *'
+reduce_ptr_param.rs:5:39: error: Reduce-style kernel kernel() return type 'int' is not the same type as parameter 'arg2' (type 'int *')
diff --git a/tests/F_reduce_ptr_param/stdout.txt.expect b/tests/F_reduce_ptr_param/stdout.txt.expect
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/F_reduce_ptr_param/stdout.txt.expect
diff --git a/tests/F_reduce_ptr_ret_val/reduce_ptr_ret_val.rs b/tests/F_reduce_ptr_ret_val/reduce_ptr_ret_val.rs
new file mode 100644
index 0000000..b1ee9c9
--- /dev/null
+++ b/tests/F_reduce_ptr_ret_val/reduce_ptr_ret_val.rs
@@ -0,0 +1,7 @@
+// -target-api 0
+#pragma version(1)
+#pragma rs java_package_name(foo)
+
+int *__attribute__((kernel("reduce"))) kernel(int arg1, int arg2) {
+  return 0;
+}
diff --git a/tests/F_reduce_ptr_ret_val/stderr.txt.expect b/tests/F_reduce_ptr_ret_val/stderr.txt.expect
new file mode 100644
index 0000000..5e5852e
--- /dev/null
+++ b/tests/F_reduce_ptr_ret_val/stderr.txt.expect
@@ -0,0 +1,3 @@
+reduce_ptr_ret_val.rs:5:40: error: Reduce-style kernel kernel() cannot return a pointer type: int *
+reduce_ptr_ret_val.rs:5:40: error: Reduce-style kernel kernel() return type 'int *' is not the same type as parameter 'arg1' (type 'int')
+reduce_ptr_ret_val.rs:5:40: error: Reduce-style kernel kernel() return type 'int *' is not the same type as parameter 'arg2' (type 'int')
diff --git a/tests/F_reduce_ptr_ret_val/stdout.txt.expect b/tests/F_reduce_ptr_ret_val/stdout.txt.expect
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/F_reduce_ptr_ret_val/stdout.txt.expect
diff --git a/tests/F_reduce_ret_type_mismatch/reduce_ret_type_mismatch.rs b/tests/F_reduce_ret_type_mismatch/reduce_ret_type_mismatch.rs
new file mode 100644
index 0000000..e43ca89
--- /dev/null
+++ b/tests/F_reduce_ret_type_mismatch/reduce_ret_type_mismatch.rs
@@ -0,0 +1,7 @@
+// -target-api 0
+#pragma version(1)
+#pragma rs java_package_name(foo)
+
+double __attribute__((kernel("reduce"))) kernel(float arg1, float arg2) {
+  return arg1 + arg2;
+}
diff --git a/tests/F_reduce_ret_type_mismatch/stderr.txt.expect b/tests/F_reduce_ret_type_mismatch/stderr.txt.expect
new file mode 100644
index 0000000..e616c94
--- /dev/null
+++ b/tests/F_reduce_ret_type_mismatch/stderr.txt.expect
@@ -0,0 +1,2 @@
+reduce_ret_type_mismatch.rs:5:42: error: Reduce-style kernel kernel() return type 'double' is not the same type as parameter 'arg1' (type 'float')
+reduce_ret_type_mismatch.rs:5:42: error: Reduce-style kernel kernel() return type 'double' is not the same type as parameter 'arg2' (type 'float')
diff --git a/tests/F_reduce_ret_type_mismatch/stdout.txt.expect b/tests/F_reduce_ret_type_mismatch/stdout.txt.expect
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/F_reduce_ret_type_mismatch/stdout.txt.expect
diff --git a/tests/F_reduce_void_ret/reduce_void_ret.rs b/tests/F_reduce_void_ret/reduce_void_ret.rs
new file mode 100644
index 0000000..8fecfc2
--- /dev/null
+++ b/tests/F_reduce_void_ret/reduce_void_ret.rs
@@ -0,0 +1,7 @@
+// -target-api 0
+#pragma version(1)
+#pragma rs java_package_name(foo)
+
+void __attribute__((kernel("reduce"))) kernel(int arg1, int arg2) {
+  return;
+}
diff --git a/tests/F_reduce_void_ret/stderr.txt.expect b/tests/F_reduce_void_ret/stderr.txt.expect
new file mode 100644
index 0000000..ecb3dd1
--- /dev/null
+++ b/tests/F_reduce_void_ret/stderr.txt.expect
@@ -0,0 +1,3 @@
+reduce_void_ret.rs:5:40: error: Reduce-style kernel kernel() cannot return void
+reduce_void_ret.rs:5:40: error: Reduce-style kernel kernel() return type 'void' is not the same type as parameter 'arg1' (type 'int')
+reduce_void_ret.rs:5:40: error: Reduce-style kernel kernel() return type 'void' is not the same type as parameter 'arg2' (type 'int')
diff --git a/tests/F_reduce_void_ret/stdout.txt.expect b/tests/F_reduce_void_ret/stdout.txt.expect
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/F_reduce_void_ret/stdout.txt.expect
diff --git a/tests/P_reduce/reduce.rs b/tests/P_reduce/reduce.rs
new file mode 100644
index 0000000..4017f79
--- /dev/null
+++ b/tests/P_reduce/reduce.rs
@@ -0,0 +1,298 @@
+// -target-api 0
+#pragma version(1)
+#pragma rs java_package_name(foo)
+
+bool __attribute__((kernel("reduce")))
+mul_bool(bool lhs, bool rhs) {
+  return lhs * rhs;
+}
+
+char __attribute__((kernel("reduce")))
+mul_char(char lhs, char rhs) {
+  return lhs * rhs;
+}
+
+char2 __attribute__((kernel("reduce")))
+mul_char2(char2 lhs, char2 rhs) {
+  return lhs * rhs;
+}
+
+char3 __attribute__((kernel("reduce")))
+mul_char3(char3 lhs, char3 rhs) {
+  return lhs * rhs;
+}
+
+char4 __attribute__((kernel("reduce")))
+mul_char4(char4 lhs, char4 rhs) {
+  return lhs * rhs;
+}
+
+double __attribute__((kernel("reduce")))
+mul_double(double lhs, double rhs) {
+  return lhs * rhs;
+}
+
+double2 __attribute__((kernel("reduce")))
+mul_double2(double2 lhs, double2 rhs) {
+  return lhs * rhs;
+}
+
+double3 __attribute__((kernel("reduce")))
+mul_double3(double3 lhs, double3 rhs) {
+  return lhs * rhs;
+}
+
+double4 __attribute__((kernel("reduce")))
+mul_double4(double4 lhs, double4 rhs) {
+  return lhs * rhs;
+}
+
+float __attribute__((kernel("reduce")))
+mul_float(float lhs, float rhs) {
+  return lhs * rhs;
+}
+
+float2 __attribute__((kernel("reduce")))
+mul_float2(float2 lhs, float2 rhs) {
+  return lhs * rhs;
+}
+
+float3 __attribute__((kernel("reduce")))
+mul_float3(float3 lhs, float3 rhs) {
+  return lhs * rhs;
+}
+
+float4 __attribute__((kernel("reduce")))
+mul_float4(float4 lhs, float4 rhs) {
+  return lhs * rhs;
+}
+
+int __attribute__((kernel("reduce")))
+mul_int(int lhs, int rhs) {
+  return lhs * rhs;
+}
+
+int2 __attribute__((kernel("reduce")))
+mul_int2(int2 lhs, int2 rhs) {
+  return lhs * rhs;
+}
+
+int3 __attribute__((kernel("reduce")))
+mul_int3(int3 lhs, int3 rhs) {
+  return lhs * rhs;
+}
+
+int4 __attribute__((kernel("reduce")))
+mul_int4(int4 lhs, int4 rhs) {
+  return lhs * rhs;
+}
+
+long __attribute__((kernel("reduce")))
+mul_long(long lhs, long rhs) {
+  return lhs * rhs;
+}
+
+long2 __attribute__((kernel("reduce")))
+mul_long2(long2 lhs, long2 rhs) {
+  return lhs * rhs;
+}
+
+long3 __attribute__((kernel("reduce")))
+mul_long3(long3 lhs, long3 rhs) {
+  return lhs * rhs;
+}
+
+long4 __attribute__((kernel("reduce")))
+mul_long4(long4 lhs, long4 rhs) {
+  return lhs * rhs;
+}
+
+short __attribute__((kernel("reduce")))
+mul_short(short lhs, short rhs) {
+  return lhs * rhs;
+}
+
+short2 __attribute__((kernel("reduce")))
+mul_short2(short2 lhs, short2 rhs) {
+  return lhs * rhs;
+}
+
+short3 __attribute__((kernel("reduce")))
+mul_short3(short3 lhs, short3 rhs) {
+  return lhs * rhs;
+}
+
+short4 __attribute__((kernel("reduce")))
+mul_short4(short4 lhs, short4 rhs) {
+  return lhs * rhs;
+}
+
+uchar __attribute__((kernel("reduce")))
+mul_uchar(uchar lhs, uchar rhs) {
+  return lhs * rhs;
+}
+
+uchar2 __attribute__((kernel("reduce")))
+mul_uchar2(uchar2 lhs, uchar2 rhs) {
+  return lhs * rhs;
+}
+
+uchar3 __attribute__((kernel("reduce")))
+mul_uchar3(uchar3 lhs, uchar3 rhs) {
+  return lhs * rhs;
+}
+
+uchar4 __attribute__((kernel("reduce")))
+mul_uchar4(uchar4 lhs, uchar4 rhs) {
+  return lhs * rhs;
+}
+
+uint __attribute__((kernel("reduce")))
+mul_uint(uint lhs, uint rhs) {
+  return lhs * rhs;
+}
+
+uint2 __attribute__((kernel("reduce")))
+mul_uint2(uint2 lhs, uint2 rhs) {
+  return lhs * rhs;
+}
+
+uint3 __attribute__((kernel("reduce")))
+mul_uint3(uint3 lhs, uint3 rhs) {
+  return lhs * rhs;
+}
+
+uint4 __attribute__((kernel("reduce")))
+mul_uint4(uint4 lhs, uint4 rhs) {
+  return lhs * rhs;
+}
+
+ulong __attribute__((kernel("reduce")))
+mul_ulong(ulong lhs, ulong rhs) {
+  return lhs * rhs;
+}
+
+ulong2 __attribute__((kernel("reduce")))
+mul_ulong2(ulong2 lhs, ulong2 rhs) {
+  return lhs * rhs;
+}
+
+ulong3 __attribute__((kernel("reduce")))
+mul_ulong3(ulong3 lhs, ulong3 rhs) {
+  return lhs * rhs;
+}
+
+ulong4 __attribute__((kernel("reduce")))
+mul_ulong4(ulong4 lhs, ulong4 rhs) {
+  return lhs * rhs;
+}
+
+ushort __attribute__((kernel("reduce")))
+mul_ushort(ushort lhs, ushort rhs) {
+  return lhs * rhs;
+}
+
+ushort2 __attribute__((kernel("reduce")))
+mul_ushort2(ushort2 lhs, ushort2 rhs) {
+  return lhs * rhs;
+}
+
+ushort3 __attribute__((kernel("reduce")))
+mul_ushort3(ushort3 lhs, ushort3 rhs) {
+  return lhs * rhs;
+}
+
+ushort4 __attribute__((kernel("reduce")))
+mul_ushort4(ushort4 lhs, ushort4 rhs) {
+  return lhs * rhs;
+}
+
+struct indirect {
+  bool elem_bool;
+  char elem_char;
+  char2 elem_char2;
+  char3 elem_char3;
+  char4 elem_char4;
+  double elem_double;
+  double2 elem_double2;
+  double3 elem_double3;
+  double4 elem_double4;
+  float elem_float;
+  float2 elem_float2;
+  float3 elem_float3;
+  float4 elem_float4;
+  int elem_int;
+  int2 elem_int2;
+  int3 elem_int3;
+  int4 elem_int4;
+  long elem_long;
+  long2 elem_long2;
+  long3 elem_long3;
+  long4 elem_long4;
+  short elem_short;
+  short2 elem_short2;
+  short3 elem_short3;
+  short4 elem_short4;
+  uchar elem_uchar;
+  uchar2 elem_uchar2;
+  uchar3 elem_uchar3;
+  uchar4 elem_uchar4;
+  uint elem_uint;
+  uint2 elem_uint2;
+  uint3 elem_uint3;
+  uint4 elem_uint4;
+  ulong elem_ulong;
+  ulong2 elem_ulong2;
+  ulong3 elem_ulong3;
+  ulong4 elem_ulong4;
+  ushort elem_ushort;
+  ushort2 elem_ushort2;
+  ushort3 elem_ushort3;
+  ushort4 elem_ushort4;
+};
+
+struct indirect __attribute__((kernel("reduce")))
+mul_indirect(struct indirect lhs, struct indirect rhs) {
+  lhs.elem_bool *= rhs.elem_bool;
+  lhs.elem_char *= rhs.elem_char;
+  lhs.elem_char2 *= rhs.elem_char2;
+  lhs.elem_char3 *= rhs.elem_char3;
+  lhs.elem_char4 *= rhs.elem_char4;
+  lhs.elem_double *= rhs.elem_double;
+  lhs.elem_double2 *= rhs.elem_double2;
+  lhs.elem_double3 *= rhs.elem_double3;
+  lhs.elem_double4 *= rhs.elem_double4;
+  lhs.elem_float *= rhs.elem_float;
+  lhs.elem_float2 *= rhs.elem_float2;
+  lhs.elem_float3 *= rhs.elem_float3;
+  lhs.elem_float4 *= rhs.elem_float4;
+  lhs.elem_int *= rhs.elem_int;
+  lhs.elem_int2 *= rhs.elem_int2;
+  lhs.elem_int3 *= rhs.elem_int3;
+  lhs.elem_int4 *= rhs.elem_int4;
+  lhs.elem_long *= rhs.elem_long;
+  lhs.elem_long2 *= rhs.elem_long2;
+  lhs.elem_long3 *= rhs.elem_long3;
+  lhs.elem_long4 *= rhs.elem_long4;
+  lhs.elem_short *= rhs.elem_short;
+  lhs.elem_short2 *= rhs.elem_short2;
+  lhs.elem_short3 *= rhs.elem_short3;
+  lhs.elem_short4 *= rhs.elem_short4;
+  lhs.elem_uchar *= rhs.elem_uchar;
+  lhs.elem_uchar2 *= rhs.elem_uchar2;
+  lhs.elem_uchar3 *= rhs.elem_uchar3;
+  lhs.elem_uchar4 *= rhs.elem_uchar4;
+  lhs.elem_uint *= rhs.elem_uint;
+  lhs.elem_uint2 *= rhs.elem_uint2;
+  lhs.elem_uint3 *= rhs.elem_uint3;
+  lhs.elem_uint4 *= rhs.elem_uint4;
+  lhs.elem_ulong *= rhs.elem_ulong;
+  lhs.elem_ulong2 *= rhs.elem_ulong2;
+  lhs.elem_ulong3 *= rhs.elem_ulong3;
+  lhs.elem_ulong4 *= rhs.elem_ulong4;
+  lhs.elem_ushort *= rhs.elem_ushort;
+  lhs.elem_ushort2 *= rhs.elem_ushort2;
+  lhs.elem_ushort3 *= rhs.elem_ushort3;
+  lhs.elem_ushort4 *= rhs.elem_ushort4;
+  return lhs;
+}
diff --git a/tests/P_reduce/stderr.txt.expect b/tests/P_reduce/stderr.txt.expect
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/P_reduce/stderr.txt.expect
diff --git a/tests/P_reduce/stdout.txt.expect b/tests/P_reduce/stdout.txt.expect
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/P_reduce/stdout.txt.expect
diff --git a/tests/P_root_void/root_void.rs b/tests/P_root_void/root_void.rs
new file mode 100644
index 0000000..13bed22
--- /dev/null
+++ b/tests/P_root_void/root_void.rs
@@ -0,0 +1,83 @@
+#pragma version(1)
+#pragma rs java_package_name(foo)
+
+void root(const void *ain, void *aout, const void *usrData,
+          uint32_t x, uint32_t y) {
+}
+
+void in_only(const void *ain) {
+}
+
+void in_x_only(const void *ain, uint32_t x) {
+}
+
+void in_y_only(const void *ain, uint32_t y) {
+}
+
+void in_x_y_only(const void *ain, uint32_t x, uint32_t y) {
+}
+
+void in_usrdata_only(const void *ain, const void *usrData) {
+}
+
+void in_usrdata_x_only(const void *ain, const void *usrData, uint32_t x) {
+}
+
+void in_usrdata_y_only(const void *ain, const void *usrData, uint32_t y) {
+}
+
+void in_usrdata_x_y_only(const void *ain, const void *usrData, uint32_t x,
+                         uint32_t y) {
+}
+
+void out_only(void *aout) {
+}
+
+void out_x_only(void *aout, uint32_t x) {
+}
+
+void out_y_only(void *aout, uint32_t y) {
+}
+
+void out_x_y_only(void *aout, uint32_t x, uint32_t y) {
+}
+
+void out_usrdata_only(void *aout, const void *usrData) {
+}
+
+void out_usrdata_x_only(void *aout, const void *usrData, uint32_t x) {
+}
+
+void out_usrdata_y_only(void *aout, const void *usrData, uint32_t y) {
+}
+
+void out_usrdata_x_y_only(void *aout, const void *usrData, uint32_t x,
+                         uint32_t y) {
+}
+
+void in_out_only(const void *ain, void *aout) {
+}
+
+void in_out_x_only(const void *ain, void *aout, uint32_t x) {
+}
+
+void in_out_y_only(const void *ain, void *aout, uint32_t y) {
+}
+
+void in_out_x_y_only(const void *ain, void *aout, uint32_t x, uint32_t y) {
+}
+
+void in_out_usrdata_only(const void *ain, void *aout, const void *usrData) {
+}
+
+void in_out_usrdata_x_only(const void *ain, void *aout, const void *usrData,
+                           uint32_t x) {
+}
+
+void in_out_usrdata_y_only(const void *ain, void *aout, const void *usrData,
+                           uint32_t y) {
+}
+
+void in_out_usrdata_x_y_only(const void *ain, void *aout, const void *usrData,
+                             uint32_t x, uint32_t y) {
+}
diff --git a/tests/P_root_void/stderr.txt.expect b/tests/P_root_void/stderr.txt.expect
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/P_root_void/stderr.txt.expect
diff --git a/tests/P_root_void/stdout.txt.expect b/tests/P_root_void/stdout.txt.expect
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/P_root_void/stdout.txt.expect
