am bf37e8d5: am 3c182c60: Merge "Add C++ reflection for reduce-style kernels in slang."

* commit 'bf37e8d524338199a81b70d3a549e5a8043b90e7':
  Add C++ reflection for reduce-style kernels in slang.
diff --git a/lit-tests/reduce_reflection/reduce_reflection_cpp.rs b/lit-tests/reduce_reflection/reduce_reflection_cpp.rs
new file mode 100644
index 0000000..0da4d0a
--- /dev/null
+++ b/lit-tests/reduce_reflection/reduce_reflection_cpp.rs
@@ -0,0 +1,408 @@
+// RUN: %Slang -target-api 0 -reflect-c++ %s
+// RUN: %scriptc-filecheck-wrapper --lang=C++ %s
+
+#pragma version(1)
+#pragma rs java_package_name(foo)
+
+// CHECK: void reduce_mul_half(android::RSC::sp<android::RSC::Allocation> ain,
+// Array variants of kernels with the half type are unsupported.
+// CHECK-NOT: half reduce_mul_half(const half in[],
+half __attribute__((kernel("reduce"))) mul_half(half lhs, half rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_half2(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK-NOT: Half2 reduce_mul_half2(const half in[],
+half2 __attribute__((kernel("reduce"))) mul_half2(half2 lhs, half2 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_half3(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK-NOT: Half3 reduce_mul_half3(const half in[],
+half3 __attribute__((kernel("reduce"))) mul_half3(half3 lhs, half3 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_half4(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK-NOT: Half4 reduce_mul_half4(const half in[],
+half4 __attribute__((kernel("reduce"))) mul_half4(half4 lhs, half4 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_bool(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: bool reduce_mul_bool(const bool in[],
+bool __attribute__((kernel("reduce")))
+mul_bool(bool lhs, bool rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_char(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: int8_t reduce_mul_char(const int8_t in[],
+char __attribute__((kernel("reduce")))
+mul_char(char lhs, char rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_char2(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: Byte2 reduce_mul_char2(const int8_t in[],
+char2 __attribute__((kernel("reduce")))
+mul_char2(char2 lhs, char2 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_char3(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: Byte3 reduce_mul_char3(const int8_t in[],
+char3 __attribute__((kernel("reduce")))
+mul_char3(char3 lhs, char3 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_char4(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: Byte4 reduce_mul_char4(const int8_t in[],
+char4 __attribute__((kernel("reduce")))
+mul_char4(char4 lhs, char4 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_double(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: double reduce_mul_double(const double in[],
+double __attribute__((kernel("reduce")))
+mul_double(double lhs, double rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_double2(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: Double2 reduce_mul_double2(const double in[],
+double2 __attribute__((kernel("reduce")))
+mul_double2(double2 lhs, double2 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_double3(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: Double3 reduce_mul_double3(const double in[],
+double3 __attribute__((kernel("reduce")))
+mul_double3(double3 lhs, double3 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_double4(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: Double4 reduce_mul_double4(const double in[],
+double4 __attribute__((kernel("reduce")))
+mul_double4(double4 lhs, double4 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_float(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: float reduce_mul_float(const float in[],
+float __attribute__((kernel("reduce")))
+mul_float(float lhs, float rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_float2(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: Float2 reduce_mul_float2(const float in[],
+float2 __attribute__((kernel("reduce")))
+mul_float2(float2 lhs, float2 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_float3(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: Float3 reduce_mul_float3(const float in[],
+float3 __attribute__((kernel("reduce")))
+mul_float3(float3 lhs, float3 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_float4(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: Float4 reduce_mul_float4(const float in[],
+float4 __attribute__((kernel("reduce")))
+mul_float4(float4 lhs, float4 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_int(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: int32_t reduce_mul_int(const int32_t in[],
+int __attribute__((kernel("reduce")))
+mul_int(int lhs, int rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_int2(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: Int2 reduce_mul_int2(const int32_t in[],
+int2 __attribute__((kernel("reduce")))
+mul_int2(int2 lhs, int2 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_int3(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: Int3 reduce_mul_int3(const int32_t in[],
+int3 __attribute__((kernel("reduce")))
+mul_int3(int3 lhs, int3 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_int4(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: Int4 reduce_mul_int4(const int32_t in[],
+int4 __attribute__((kernel("reduce")))
+mul_int4(int4 lhs, int4 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_long(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: int64_t reduce_mul_long(const int64_t in[],
+long __attribute__((kernel("reduce")))
+mul_long(long lhs, long rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_long2(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: Long2 reduce_mul_long2(const int64_t in[],
+long2 __attribute__((kernel("reduce")))
+mul_long2(long2 lhs, long2 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_long3(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: Long3 reduce_mul_long3(const int64_t in[],
+long3 __attribute__((kernel("reduce")))
+mul_long3(long3 lhs, long3 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_long4(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: Long4 reduce_mul_long4(const int64_t in[],
+long4 __attribute__((kernel("reduce")))
+mul_long4(long4 lhs, long4 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_short(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: int16_t reduce_mul_short(const int16_t in[],
+short __attribute__((kernel("reduce")))
+mul_short(short lhs, short rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_short2(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: Short2 reduce_mul_short2(const int16_t in[],
+short2 __attribute__((kernel("reduce")))
+mul_short2(short2 lhs, short2 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_short3(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: Short3 reduce_mul_short3(const int16_t in[],
+short3 __attribute__((kernel("reduce")))
+mul_short3(short3 lhs, short3 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_short4(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: Short4 reduce_mul_short4(const int16_t in[],
+short4 __attribute__((kernel("reduce")))
+mul_short4(short4 lhs, short4 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_uchar(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: uint8_t reduce_mul_uchar(const uint8_t in[],
+uchar __attribute__((kernel("reduce")))
+mul_uchar(uchar lhs, uchar rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_uchar2(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: UByte2 reduce_mul_uchar2(const uint8_t in[],
+uchar2 __attribute__((kernel("reduce")))
+mul_uchar2(uchar2 lhs, uchar2 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_uchar3(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: UByte3 reduce_mul_uchar3(const uint8_t in[],
+uchar3 __attribute__((kernel("reduce")))
+mul_uchar3(uchar3 lhs, uchar3 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_uchar4(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: UByte4 reduce_mul_uchar4(const uint8_t in[],
+uchar4 __attribute__((kernel("reduce")))
+mul_uchar4(uchar4 lhs, uchar4 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_uint(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: uint32_t reduce_mul_uint(const uint32_t in[],
+uint __attribute__((kernel("reduce")))
+mul_uint(uint lhs, uint rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_uint2(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: UInt2 reduce_mul_uint2(const uint32_t in[],
+uint2 __attribute__((kernel("reduce")))
+mul_uint2(uint2 lhs, uint2 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_uint3(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: UInt3 reduce_mul_uint3(const uint32_t in[],
+uint3 __attribute__((kernel("reduce")))
+mul_uint3(uint3 lhs, uint3 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_uint4(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: UInt4 reduce_mul_uint4(const uint32_t in[],
+uint4 __attribute__((kernel("reduce")))
+mul_uint4(uint4 lhs, uint4 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_ulong(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: uint64_t reduce_mul_ulong(const uint64_t in[],
+ulong __attribute__((kernel("reduce")))
+mul_ulong(ulong lhs, ulong rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_ulong2(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: ULong2 reduce_mul_ulong2(const uint64_t in[],
+ulong2 __attribute__((kernel("reduce")))
+mul_ulong2(ulong2 lhs, ulong2 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_ulong3(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: ULong3 reduce_mul_ulong3(const uint64_t in[],
+ulong3 __attribute__((kernel("reduce")))
+mul_ulong3(ulong3 lhs, ulong3 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_ulong4(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: ULong4 reduce_mul_ulong4(const uint64_t in[],
+ulong4 __attribute__((kernel("reduce")))
+mul_ulong4(ulong4 lhs, ulong4 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_ushort(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: uint16_t reduce_mul_ushort(const uint16_t in[],
+ushort __attribute__((kernel("reduce")))
+mul_ushort(ushort lhs, ushort rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_ushort2(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: UShort2 reduce_mul_ushort2(const uint16_t in[],
+ushort2 __attribute__((kernel("reduce")))
+mul_ushort2(ushort2 lhs, ushort2 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_ushort3(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: UShort3 reduce_mul_ushort3(const uint16_t in[],
+ushort3 __attribute__((kernel("reduce")))
+mul_ushort3(ushort3 lhs, ushort3 rhs) {
+  return lhs * rhs;
+}
+
+// CHECK: void reduce_mul_ushort4(android::RSC::sp<android::RSC::Allocation> ain,
+// CHECK: UShort4 reduce_mul_ushort4(const uint16_t in[],
+ushort4 __attribute__((kernel("reduce")))
+mul_ushort4(ushort4 lhs, ushort4 rhs) {
+  return lhs * rhs;
+}
+
+struct indirect {
+  bool elem_bool;
+  char elem_char;
+  char2 elem_char2;
+  char3 elem_char3;
+  char4 elem_char4;
+  double elem_double;
+  double2 elem_double2;
+  double3 elem_double3;
+  double4 elem_double4;
+  float elem_float;
+  float2 elem_float2;
+  float3 elem_float3;
+  float4 elem_float4;
+  int elem_int;
+  int2 elem_int2;
+  int3 elem_int3;
+  int4 elem_int4;
+  long elem_long;
+  long2 elem_long2;
+  long3 elem_long3;
+  long4 elem_long4;
+  short elem_short;
+  short2 elem_short2;
+  short3 elem_short3;
+  short4 elem_short4;
+  uchar elem_uchar;
+  uchar2 elem_uchar2;
+  uchar3 elem_uchar3;
+  uchar4 elem_uchar4;
+  uint elem_uint;
+  uint2 elem_uint2;
+  uint3 elem_uint3;
+  uint4 elem_uint4;
+  ulong elem_ulong;
+  ulong2 elem_ulong2;
+  ulong3 elem_ulong3;
+  ulong4 elem_ulong4;
+  ushort elem_ushort;
+  ushort2 elem_ushort2;
+  ushort3 elem_ushort3;
+  ushort4 elem_ushort4;
+};
+
+// CHECK: void reduce_mul_indirect(android::RSC::sp<android::RSC::Allocation> ain,
+struct indirect __attribute__((kernel("reduce")))
+mul_indirect(struct indirect lhs, struct indirect rhs) {
+  lhs.elem_bool *= rhs.elem_bool;
+  lhs.elem_char *= rhs.elem_char;
+  lhs.elem_char2 *= rhs.elem_char2;
+  lhs.elem_char3 *= rhs.elem_char3;
+  lhs.elem_char4 *= rhs.elem_char4;
+  lhs.elem_double *= rhs.elem_double;
+  lhs.elem_double2 *= rhs.elem_double2;
+  lhs.elem_double3 *= rhs.elem_double3;
+  lhs.elem_double4 *= rhs.elem_double4;
+  lhs.elem_float *= rhs.elem_float;
+  lhs.elem_float2 *= rhs.elem_float2;
+  lhs.elem_float3 *= rhs.elem_float3;
+  lhs.elem_float4 *= rhs.elem_float4;
+  lhs.elem_int *= rhs.elem_int;
+  lhs.elem_int2 *= rhs.elem_int2;
+  lhs.elem_int3 *= rhs.elem_int3;
+  lhs.elem_int4 *= rhs.elem_int4;
+  lhs.elem_long *= rhs.elem_long;
+  lhs.elem_long2 *= rhs.elem_long2;
+  lhs.elem_long3 *= rhs.elem_long3;
+  lhs.elem_long4 *= rhs.elem_long4;
+  lhs.elem_short *= rhs.elem_short;
+  lhs.elem_short2 *= rhs.elem_short2;
+  lhs.elem_short3 *= rhs.elem_short3;
+  lhs.elem_short4 *= rhs.elem_short4;
+  lhs.elem_uchar *= rhs.elem_uchar;
+  lhs.elem_uchar2 *= rhs.elem_uchar2;
+  lhs.elem_uchar3 *= rhs.elem_uchar3;
+  lhs.elem_uchar4 *= rhs.elem_uchar4;
+  lhs.elem_uint *= rhs.elem_uint;
+  lhs.elem_uint2 *= rhs.elem_uint2;
+  lhs.elem_uint3 *= rhs.elem_uint3;
+  lhs.elem_uint4 *= rhs.elem_uint4;
+  lhs.elem_ulong *= rhs.elem_ulong;
+  lhs.elem_ulong2 *= rhs.elem_ulong2;
+  lhs.elem_ulong3 *= rhs.elem_ulong3;
+  lhs.elem_ulong4 *= rhs.elem_ulong4;
+  lhs.elem_ushort *= rhs.elem_ushort;
+  lhs.elem_ushort2 *= rhs.elem_ushort2;
+  lhs.elem_ushort3 *= rhs.elem_ushort3;
+  lhs.elem_ushort4 *= rhs.elem_ushort4;
+  return lhs;
+}
diff --git a/slang_rs_reflection_cpp.cpp b/slang_rs_reflection_cpp.cpp
index 03f2ee5..7d29627 100644
--- a/slang_rs_reflection_cpp.cpp
+++ b/slang_rs_reflection_cpp.cpp
@@ -25,13 +25,13 @@
 #include <algorithm>
 #include <sstream>
 #include <string>
-#include <utility>
 
 #include "os_sep.h"
 #include "slang_rs_context.h"
 #include "slang_rs_export_var.h"
 #include "slang_rs_export_foreach.h"
 #include "slang_rs_export_func.h"
+#include "slang_rs_export_reduce.h"
 #include "slang_rs_reflect_utils.h"
 #include "slang_version.h"
 
@@ -41,9 +41,11 @@
 
 namespace slang {
 
-#define RS_TYPE_ITEM_CLASS_NAME "Item"
-
-#define RS_ELEM_PREFIX "__rs_elem_"
+const char kRsTypeItemClassName[] = "Item";
+const char kRsElemPrefix[] = "__rs_elem_";
+// The name of the Allocation type that is reflected in C++
+const char kAllocationSp[] = "android::RSC::sp<android::RSC::Allocation>";
+const char kConstRsScriptCall[] = "const RsScriptCall";
 
 static const char *GetMatrixTypeName(const RSExportMatrixType *EMT) {
   static const char *MatrixTypeCNameMap[] = {
@@ -75,7 +77,7 @@
         static_cast<const RSExportPointerType *>(ET)->getPointeeType();
 
     if (PointeeType->getClass() != RSExportType::ExportClassRecord)
-      return "android::RSC::sp<android::RSC::Allocation>";
+      return kAllocationSp;
     else
       return PointeeType->getElementName();
   }
@@ -101,7 +103,7 @@
   }
   case RSExportType::ExportClassRecord: {
     // TODO: Fix for C structs!
-    return ET->getElementName() + "." RS_TYPE_ITEM_CLASS_NAME;
+    return ET->getElementName() + "." + kRsTypeItemClassName;
   }
   default: { slangAssert(false && "Unknown class of type"); }
   }
@@ -109,13 +111,23 @@
   return "";
 }
 
+static bool canExportReduceArrayVariant(const RSExportType *Type) {
+  // FIXME: No half types available for C++ reflection yet
+  if (Type->getElementName().find("F16") == 0) {
+    return false;
+  }
+  return Type->getClass() == RSExportType::ExportClassPrimitive ||
+    Type->getClass() == RSExportType::ExportClassVector;
+}
+
 RSReflectionCpp::RSReflectionCpp(const RSContext *Context,
                                  const string &OutputDirectory,
                                  const string &RSSourceFileName,
                                  const string &BitCodeFileName)
     : mRSContext(Context), mRSSourceFilePath(RSSourceFileName),
       mBitCodeFilePath(BitCodeFileName), mOutputDirectory(OutputDirectory),
-      mNextExportVarSlot(0), mNextExportFuncSlot(0), mNextExportForEachSlot(0) {
+      mNextExportVarSlot(0), mNextExportFuncSlot(0), mNextExportForEachSlot(0),
+      mNextExportReduceSlot(0) {
   mCleanedRSFileName = RootNameFromRSFileName(mRSSourceFilePath);
   mClassName = "ScriptC_" + mCleanedRSFileName;
 }
@@ -155,6 +167,7 @@
 
   genFieldsToStoreExportVariableValues();
   genTypeInstancesUsedInForEach();
+  genTypeInstancesUsedInReduce();
   genFieldsForAllocationTypeVerification();
 
   mOut.decreaseIndent();
@@ -167,6 +180,7 @@
 
   genExportVariablesGetterAndSetter();
   genForEachDeclarations();
+  genReduceDeclarations();
   genExportFunctionDeclarations();
 
   mOut.endBlock(true);
@@ -175,9 +189,8 @@
 }
 
 void RSReflectionCpp::genTypeInstancesUsedInForEach() {
-  for (RSContext::const_export_foreach_iterator
-           I = mRSContext->export_foreach_begin(),
-           E = mRSContext->export_foreach_end();
+  for (auto I = mRSContext->export_foreach_begin(),
+            E = mRSContext->export_foreach_end();
        I != E; I++) {
     const RSExportForEach *EF = *I;
     const RSExportType *OET = EF->getOutType();
@@ -196,6 +209,15 @@
   }
 }
 
+// Ensure that the type of the reduce kernel is reflected.
+void RSReflectionCpp::genTypeInstancesUsedInReduce() {
+  for (auto I = mRSContext->export_reduce_begin(),
+            E = mRSContext->export_reduce_end();
+       I != E; ++I) {
+    genTypeInstance((*I)->getType());
+  }
+}
+
 void RSReflectionCpp::genFieldsForAllocationTypeVerification() {
   bool CommentAdded = false;
   for (std::set<std::string>::iterator I = mTypesToCheck.begin(),
@@ -207,7 +229,7 @@
       CommentAdded = true;
     }
     mOut.indent() << "android::RSC::sp<const android::RSC::Element> "
-                  << RS_ELEM_PREFIX << *I << ";\n";
+                  << kRsElemPrefix << *I << ";\n";
   }
 }
 
@@ -266,13 +288,11 @@
     for (RSExportForEach::InIter BI = Ins.begin(), EI = Ins.end();
          BI != EI; BI++) {
 
-      Arguments.push_back(std::make_pair(
-        "android::RSC::sp<const android::RSC::Allocation>", (*BI)->getName()));
+      Arguments.push_back(Argument(kAllocationSp, (*BI)->getName()));
     }
 
     if (ForEach->hasOut() || ForEach->hasReturn()) {
-      Arguments.push_back(std::make_pair(
-          "android::RSC::sp<const android::RSC::Allocation>", "aout"));
+      Arguments.push_back(Argument(kAllocationSp, "aout"));
     }
 
     const RSExportRecordType *ERT = ForEach->getParamPacketType();
@@ -282,7 +302,7 @@
            i != e; i++) {
         RSReflectionTypeData rtd;
         (*i)->getType()->convertToRTD(&rtd);
-        Arguments.push_back(std::make_pair(rtd.type->c_name, (*i)->getName()));
+        Arguments.push_back(Argument(rtd.type->c_name, (*i)->getName()));
       }
     }
     genArguments(Arguments, FunctionStart.length());
@@ -290,6 +310,21 @@
   }
 }
 
+void RSReflectionCpp::genReduceDeclarations() {
+  bool CommentAdded = false;
+  for (auto I = mRSContext->export_reduce_begin(),
+            E = mRSContext->export_reduce_end(); I != E; I++) {
+    if (!CommentAdded) {
+      mOut.comment("For each reduce kernel of the script, there is an entry "
+                   "point to call the reduce kernel.");
+      CommentAdded = true;
+    }
+
+    makeReduceSignatureAllocationVariant(false, *I);
+    makeReduceSignatureArrayVariant(false, *I);
+  }
+}
+
 void RSReflectionCpp::genExportFunctionDeclarations() {
   for (RSContext::const_export_func_iterator
            I = mRSContext->export_funcs_begin(),
@@ -301,80 +336,11 @@
   }
 }
 
-bool RSReflectionCpp::genEncodedBitCode() {
-  FILE *pfin = fopen(mBitCodeFilePath.c_str(), "rb");
-  if (pfin == nullptr) {
-    fprintf(stderr, "Error: could not read file %s\n",
-            mBitCodeFilePath.c_str());
-    return false;
-  }
-
-  unsigned char buf[16];
-  int read_length;
-  mOut.indent() << "static const unsigned char __txt[] =";
-  mOut.startBlock();
-  while ((read_length = fread(buf, 1, sizeof(buf), pfin)) > 0) {
-    mOut.indent();
-    for (int i = 0; i < read_length; i++) {
-      char buf2[16];
-      snprintf(buf2, sizeof(buf2), "0x%02x,", buf[i]);
-      mOut << buf2;
-    }
-    mOut << "\n";
-  }
-  mOut.endBlock(true);
-  mOut << "\n";
-  return true;
-}
-
-bool RSReflectionCpp::writeImplementationFile() {
-  if (!mOut.startFile(mOutputDirectory, mClassName + ".cpp", mRSSourceFilePath,
-                      mRSContext->getLicenseNote(), false,
-                      mRSContext->getVerbose())) {
-    return false;
-  }
-
-  mOut.indent() << "#include \"" << mClassName << ".h\"\n\n";
-
-  genEncodedBitCode();
-  mOut.indent() << "\n\n";
-
-  const std::string &packageName = mRSContext->getReflectJavaPackageName();
-  mOut.indent() << mClassName << "::" << mClassName
-                << "(android::RSC::sp<android::RSC::RS> rs):\n"
-                   "        ScriptC(rs, __txt, sizeof(__txt), \""
-                << mCleanedRSFileName << "\", " << mCleanedRSFileName.length()
-                << ", \"/data/data/" << packageName << "/app\", sizeof(\""
-                << packageName << "\"))";
-  mOut.startBlock();
-  for (std::set<std::string>::iterator I = mTypesToCheck.begin(),
-                                       E = mTypesToCheck.end();
-       I != E; I++) {
-    mOut.indent() << RS_ELEM_PREFIX << *I << " = android::RSC::Element::" << *I
-                  << "(mRS);\n";
-  }
-
-  for (RSContext::const_export_var_iterator I = mRSContext->export_vars_begin(),
-                                            E = mRSContext->export_vars_end();
-       I != E; I++) {
-    const RSExportVar *EV = *I;
-    if (!EV->getInit().isUninit()) {
-      genInitExportVariable(EV->getType(), EV->getName(), EV->getInit());
-    } else {
-      genZeroInitExportVariable(EV->getName());
-    }
-  }
-  mOut.endBlock();
-
-  mOut.indent() << mClassName << "::~" << mClassName << "()";
-  mOut.startBlock();
-  mOut.endBlock();
-
-  // Reflect export for each functions
+// forEach_* implementation
+void RSReflectionCpp::genExportForEachBodies() {
   uint32_t slot = 0;
-  for (RSContext::const_export_foreach_iterator
-           I = mRSContext->export_foreach_begin(),
-           E = mRSContext->export_foreach_end();
+  for (auto I = mRSContext->export_foreach_begin(),
+            E = mRSContext->export_foreach_end();
        I != E; I++, slot++) {
     const RSExportForEach *ef = *I;
     if (ef->isDummyRoot()) {
@@ -390,13 +356,11 @@
     if (ef->hasIns()) {
       // FIXME: Add support for kernels with multiple inputs.
       slangAssert(ef->getIns().size() == 1);
-      Arguments.push_back(std::make_pair(
-          "android::RSC::sp<const android::RSC::Allocation>", "ain"));
+      Arguments.push_back(Argument(kAllocationSp, "ain"));
     }
 
     if (ef->hasOut() || ef->hasReturn()) {
-      Arguments.push_back(std::make_pair(
-          "android::RSC::sp<const android::RSC::Allocation>", "aout"));
+      Arguments.push_back(Argument(kAllocationSp, "aout"));
     }
 
     const RSExportRecordType *ERT = ef->getParamPacketType();
@@ -406,7 +370,7 @@
            i != e; i++) {
         RSReflectionTypeData rtd;
         (*i)->getType()->convertToRTD(&rtd);
-        Arguments.push_back(std::make_pair(rtd.type->c_name, (*i)->getName()));
+        Arguments.push_back(Argument(rtd.type->c_name, (*i)->getName()));
       }
     }
     genArguments(Arguments, FunctionStart.length());
@@ -453,12 +417,111 @@
     mOut << "NULL, 0);\n";
     mOut.endBlock();
   }
+}
 
-  slot = 0;
+// reduce_* implementation
+void RSReflectionCpp::genExportReduceBodies() {
+  for (auto I = mRSContext->export_reduce_begin(),
+            E = mRSContext->export_reduce_end();
+       I != E; ++I) {
+    const RSExportReduce &Reduce = **I;
+    const RSExportType *Type = Reduce.getType();
+
+    // Allocation variant
+    //
+    // void reduce_foo(sp<Allocation> ain, sp<Allocation> aout,
+    //                 const RsScriptCall *sc);
+    makeReduceSignatureAllocationVariant(true, &Reduce);
+    mOut.startBlock();
+
+    // Type check
+    genTypeCheck(Type, "ain");
+    genTypeCheck(Type, "aout");
+
+    // Dimension check
+    gen1DCheck("ain");
+
+    const uint32_t Slot = getNextExportReduceSlot();
+
+    // Call into RenderScript.
+    mOut.indent() << "reduce(" << Slot << ", "
+                  << "ain, aout, sc);\n";
+    mOut.endBlock();
+
+    if (!canExportReduceArrayVariant(Type)) {
+      continue;
+    }
+
+    // Array variant
+    //
+    // Ty reduce_foo(const ElemTy[] in, uint32_t x1, uint32_t x2, uint32_t inLen);
+    // "Ty" could be different from "ElemTy" in the case of vectors.
+    makeReduceSignatureArrayVariant(true, &Reduce);
+    mOut.startBlock();
+
+    const std::string ReturnType = GetTypeName(Type);
+    const std::string DefaultReturnValue = ReturnType + "()";
+
+    genNullOrEmptyArrayCheck("in", "inLen", DefaultReturnValue);
+
+    RSReflectionTypeData TypeData;
+    Type->convertToRTD(&TypeData);
+    const uint32_t VecSize = TypeData.vecSize;
+    std::string InLength = "inLen";
+    // Adjust the length so that it corresponds to the number of elements in the allocation.
+    if (VecSize > 1) {
+      InLength += " / " + std::to_string(VecSize);
+    }
+    genVectorLengthCompatibilityCheck("inLen", VecSize, DefaultReturnValue);
+
+    mOut.indent() << "if (x1 >= x2 || x2 > " << InLength << ")";
+    mOut.startBlock();
+    mOut.indent() << "mRS->throwError(RS_ERROR_RUNTIME_ERROR, "
+                  << "\"Input bounds are invalid\");\n";
+    mOut.indent() << "return " << DefaultReturnValue << ";\n";
+    mOut.endBlock();
+
+    mOut.indent() << kAllocationSp
+                  << " ain = android::RSC::Allocation::createSized(mRS, "
+                  << kRsElemPrefix << Type->getElementName() << ", "
+                  << "x2 - x1);\n";
+
+    mOut.indent() << "ain->setAutoPadding(true);\n";
+
+    mOut.indent() << kAllocationSp
+                  << " aout = android::RSC::Allocation::createSized(mRS, "
+                  << kRsElemPrefix << Type->getElementName() << ", 1);\n";
+
+    mOut.indent() << "aout->setAutoPadding(true);\n";
+
+    const std::string ArrayElementType = TypeData.type->c_name;
+
+    std::string StartOffset = "x1";
+    if (VecSize > 1) {
+      StartOffset += " * " + std::to_string(VecSize);
+    }
+    mOut.indent() << "ain->copy1DRangeFrom(0, x2 - x1, &in[" << StartOffset << "]);\n";
+    mOut.indent() << "reduce_" << Reduce.getName() << "(ain, aout);\n";
+    mOut.indent() << ArrayElementType << " outArray[" << VecSize << "];\n";
+
+    mOut.indent() << "aout->copy1DRangeTo(0, 1, &outArray[0]);\n";
+
+    mOut.indent() << "return " << ReturnType << "(";
+    for (uint32_t VecElem = 0; VecElem < VecSize; ++VecElem) {
+      if (VecElem > 0) mOut << ", ";
+      mOut << "outArray[" << VecElem << "]";
+    }
+    mOut << ");\n";
+    mOut.endBlock();
+  }
+}
+
+// invoke_* implementation
+void RSReflectionCpp::genExportFunctionBodies() {
+  uint32_t slot = 0;
   // Reflect export function
-  for (RSContext::const_export_func_iterator
-           I = mRSContext->export_funcs_begin(),
-           E = mRSContext->export_funcs_end();
+  for (auto I = mRSContext->export_funcs_begin(),
+            E = mRSContext->export_funcs_end();
        I != E; I++) {
     const RSExportFunc *ef = *I;
 
@@ -483,6 +546,84 @@
 
     slot++;
   }
+}
+
+bool RSReflectionCpp::genEncodedBitCode() {
+  FILE *pfin = fopen(mBitCodeFilePath.c_str(), "rb");
+  if (pfin == nullptr) {
+    fprintf(stderr, "Error: could not read file %s\n",
+            mBitCodeFilePath.c_str());
+    return false;
+  }
+
+  unsigned char buf[16];
+  int read_length;
+  mOut.indent() << "static const unsigned char __txt[] =";
+  mOut.startBlock();
+  while ((read_length = fread(buf, 1, sizeof(buf), pfin)) > 0) {
+    mOut.indent();
+    for (int i = 0; i < read_length; i++) {
+      char buf2[16];
+      snprintf(buf2, sizeof(buf2), "0x%02x,", buf[i]);
+      mOut << buf2;
+    }
+    mOut << "\n";
+  }
+  mOut.endBlock(true);
+  mOut << "\n";
+  return true;
+}
+
+bool RSReflectionCpp::writeImplementationFile() {
+  if (!mOut.startFile(mOutputDirectory, mClassName + ".cpp", mRSSourceFilePath,
+                      mRSContext->getLicenseNote(), false,
+                      mRSContext->getVerbose())) {
+    return false;
+  }
+
+  // Front matter
+  mOut.indent() << "#include \"" << mClassName << ".h\"\n\n";
+
+  genEncodedBitCode();
+  mOut.indent() << "\n\n";
+
+  // Constructor
+  const std::string &packageName = mRSContext->getReflectJavaPackageName();
+  mOut.indent() << mClassName << "::" << mClassName
+                << "(android::RSC::sp<android::RSC::RS> rs):\n"
+                   "        ScriptC(rs, __txt, sizeof(__txt), \""
+                << mCleanedRSFileName << "\", " << mCleanedRSFileName.length()
+                << ", \"/data/data/" << packageName << "/app\", sizeof(\""
+                << packageName << "\"))";
+  mOut.startBlock();
+  for (std::set<std::string>::iterator I = mTypesToCheck.begin(),
+                                       E = mTypesToCheck.end();
+       I != E; I++) {
+    mOut.indent() << kRsElemPrefix << *I << " = android::RSC::Element::" << *I
+                  << "(mRS);\n";
+  }
+
+  for (RSContext::const_export_var_iterator I = mRSContext->export_vars_begin(),
+                                            E = mRSContext->export_vars_end();
+       I != E; I++) {
+    const RSExportVar *EV = *I;
+    if (!EV->getInit().isUninit()) {
+      genInitExportVariable(EV->getType(), EV->getName(), EV->getInit());
+    } else {
+      genZeroInitExportVariable(EV->getName());
+    }
+  }
+  mOut.endBlock();
+
+  // Destructor
+  mOut.indent() << mClassName << "::~" << mClassName << "()";
+  mOut.startBlock();
+  mOut.endBlock();
+
+  // Function bodies
+  genExportForEachBodies();
+  genExportReduceBodies();
+  genExportFunctionBodies();
 
   mOut.closeFile();
   return true;
@@ -546,7 +687,7 @@
     mOut.indent() << "setVar(" << getNextExportVarSlot() << ", ";
     if (EPT->isRSObjectType()) {
       mOut << "v";
-    } else {
+   } else {
       mOut << "&v, sizeof(v)";
     }
     mOut << ");\n";
@@ -677,6 +818,131 @@
   }
 }
 
+void RSReflectionCpp::makeReduceSignatureAllocationVariant(bool IsDefinition,
+                                                           const RSExportReduce *ER) {
+  // void reduce_foo(sp<Allocation> ain, sp<Allocation> aout,
+  //                 const RsScriptCall *sc = nullptr);
+  std::string FunctionStart = "void ";
+  if (IsDefinition) {
+    FunctionStart += mClassName +  "::";
+  }
+  FunctionStart += "reduce_" + ER->getName() + "(";
+
+  ArgumentList Arguments{
+    Argument(kAllocationSp, "ain"),
+    Argument(kAllocationSp, "aout"),
+    Argument(kConstRsScriptCall, "*sc", IsDefinition ? "" : "nullptr")
+  };
+
+  mOut.indent() << FunctionStart;
+
+  genArguments(Arguments, FunctionStart.length());
+
+  if (IsDefinition) {
+    mOut << ")";
+  } else {
+    mOut << ");\n\n";
+  }
+}
+
+void RSReflectionCpp::makeReduceSignatureArrayVariant(bool IsDefinition,
+                                                      const RSExportReduce *ER) {
+  // Ty reduce_foo(const ElemTy[] in, uint32_t x1, uint32_t x2, size_t inLen);
+  // "Ty" could be different from "ElemTy" in the case of vectors.
+
+  const RSExportType *Type = ER->getType();
+  if (!canExportReduceArrayVariant(Type)) {
+      return;
+  }
+
+  RSReflectionTypeData TypeData;
+  Type->convertToRTD(&TypeData);
+
+  const std::string ReturnType = GetTypeName(Type);
+  std::string FunctionStart = ReturnType + " ";
+  if (IsDefinition) {
+    FunctionStart += mClassName +  "::";
+  }
+  FunctionStart += "reduce_" + ER->getName() + "(";
+
+  const std::string ArrayElementType = TypeData.type->c_name;
+
+  ArgumentList Arguments{
+    Argument("const " + ArrayElementType, "in[]"),
+    Argument("uint32_t", "x1"),
+    Argument("uint32_t", "x2"),
+    Argument("size_t", "inLen")
+  };
+
+  mOut.indent() << FunctionStart;
+  genArguments(Arguments, FunctionStart.size());
+
+  if (IsDefinition) {
+    mOut << ")";
+  } else {
+    mOut << ");\n\n";
+  }
+
+  if (!IsDefinition) {
+    // We reflect three more variants in the header. First, there is
+    //
+    //   Ty reduce_foo(const ElemTy[] in, size_t inLen);
+    //
+    // Note the inLen is the number of primitive elements in the array, as opposed to the
+    // bounds whose units are allocation elements. The other variants use templates to infer
+    // the array length statically:
+    //
+    //   template<size_t inLen> Ty reduce_foo(const ElemTy (&in)[inLen]);
+    //   template<size_t inLen> Ty reduce_foo(const ElemTy (&in)[inLen], uint32_t x1, uint32_t x2);
+
+    // Generate inLen variant
+    const uint32_t VecSize = TypeData.vecSize;
+    std::string X2 = "inLen";
+
+    const std::string FunctionName = ER->getName();
+
+    auto ForwardReduce = [this, &FunctionName](const std::string &x1,
+                                               const std::string &x2,
+                                               const std::string &inLen) {
+      this->mOut.indent() << "    return reduce_" << FunctionName << "(in, "
+                          << x1 << ", " << x2 << ", " << inLen << ");\n";
+      this->mOut.indent() << "}\n\n";
+    };
+
+    const std::string DefaultValue = ReturnType + "()";
+
+    ArgumentList InLenVariantArguments{
+      Argument("const " + ArrayElementType, "in[]"), Argument("size_t", "inLen")
+    };
+    mOut.indent() << FunctionStart;
+    genArguments(InLenVariantArguments, FunctionStart.size());
+    mOut << ") {\n";
+    if (VecSize > 1) {
+      genVectorLengthCompatibilityCheck("inLen", VecSize, DefaultValue, 2);
+      X2 += " / " + std::to_string(VecSize);
+    }
+    ForwardReduce("0", X2, "inLen");
+
+    // Generate template variants
+    ArgumentList TemplateVariantArguments{
+      Argument("const " + ArrayElementType, "(&in)[inLen]")
+    };
+
+    mOut.indent() << "template<size_t inLen>\n";
+    mOut.indent() << FunctionStart;
+    genArguments(TemplateVariantArguments, FunctionStart.size());
+    mOut << ") {\n        return reduce_" << FunctionName << "(in, inLen);\n    }\n\n";
+
+    TemplateVariantArguments.push_back(Argument("uint32_t", "x1"));
+    TemplateVariantArguments.push_back(Argument("uint32_t", "x2"));
+    mOut.indent() << "template<size_t inLen>\n";
+    mOut.indent() << FunctionStart;
+    genArguments(TemplateVariantArguments, FunctionStart.size());
+    mOut << ") {\n";
+    ForwardReduce("x1", "x2", "inLen");
+  }
+}
+
 void RSReflectionCpp::genArguments(const ArgumentList &Arguments, int Offset) {
   bool FirstArg = true;
 
@@ -689,7 +955,10 @@
       FirstArg = false;
     }
 
-    mOut << I->first << " " << I->second;
+    mOut << I->Type << " " << I->Name;
+    if (!I->DefaultValue.empty()) {
+      mOut << " = " << I->DefaultValue;
+    }
   }
 }
 
@@ -822,7 +1091,7 @@
   if (!TypeName.empty()) {
     mOut.indent() << "if (!" << VarName
                   << "->getType()->getElement()->isCompatible("
-                  << RS_ELEM_PREFIX << TypeName << "))";
+                  << kRsElemPrefix << TypeName << "))";
     mOut.startBlock();
     mOut.indent() << "mRS->throwError(RS_ERROR_RUNTIME_ERROR, "
                      "\"Incompatible type\");\n";
@@ -831,6 +1100,60 @@
   }
 }
 
+// Ensure that the input is 1 dimensional.
+void RSReflectionCpp::gen1DCheck(const std::string &VarName) {
+  mOut.indent() << "// check that " << VarName << " is 1d\n";
+  mOut.indent() << "sp<const Type> t0 = " << VarName << "->getType();\n";
+  mOut.indent() << "if (t0->getY() != 0 ||\n";
+  mOut.indent() << "    t0->hasFaces()  ||\n";
+  mOut.indent() << "    t0->hasMipmaps())";
+  mOut.startBlock();
+  mOut.indent() << "mRS->throwError(RS_ERROR_INVALID_PARAMETER, "
+                << "\"" << VarName << " is not 1D!\");\n";
+  mOut.indent() << "return;\n";
+  mOut.endBlock();
+}
+
+// Generates code to ensure that the supplied array length is a multiple of the vector size.
+void RSReflectionCpp::genVectorLengthCompatibilityCheck(const std::string &Length,
+                                                        unsigned VecSize,
+                                                        const std::string &ValueToReturn,
+                                                        unsigned IndentLevels) {
+  auto Indenter = [this, IndentLevels]() -> std::ofstream& {
+    GeneratedFile &Out = this->mOut;
+    for (unsigned Level = 0; Level < IndentLevels; ++Level) {
+      Out.indent();
+    }
+    return Out;
+  };
+
+  Indenter() << "// Verify that the array length is a multiple of the vector size.\n";
+  Indenter() << "if (" << Length << " % " << std::to_string(VecSize) << " != 0) {\n";
+  Indenter() << "    mRS->throwError(RS_ERROR_INVALID_PARAMETER, "
+             << "\"Input array length is not a multiple of "
+             << std::to_string(VecSize) << "\");\n";
+  Indenter() << "    return " << ValueToReturn << ";\n";
+  Indenter() << "}\n\n";
+}
+
+// Generates code to ensure that the supplied array is non-null and nonzero in length.
+void RSReflectionCpp::genNullOrEmptyArrayCheck(const std::string &ArrayName,
+                                               const std::string &Length,
+                                               const std::string &ValueToReturn) {
+  mOut.indent() << "// Verify that the array is non-null and non-empty.\n";
+  mOut.indent() << "if (" << ArrayName << " == nullptr) {\n";
+  mOut.indent() << "    mRS->throwError(RS_ERROR_INVALID_PARAMETER, "
+                << "\"Input array is null\");\n";
+  mOut.indent() << "    return " << ValueToReturn << ";\n";
+  mOut.indent() << "}\n\n";
+
+  mOut.indent() << "if (" << Length << " == 0) {\n";
+  mOut.indent() << "    mRS->throwError(RS_ERROR_INVALID_PARAMETER, "
+                << "\"Input array is zero-length\");\n";
+  mOut.indent() << "    return " << ValueToReturn << ";\n";
+  mOut.indent() << "}\n\n";
+}
+
 void RSReflectionCpp::genTypeInstanceFromPointer(const RSExportType *ET) {
   if (ET->getClass() == RSExportType::ExportClassPointer) {
     // For pointer parameters to original forEach kernels.
diff --git a/slang_rs_reflection_cpp.h b/slang_rs_reflection_cpp.h
index f451ce6..9c55ad8 100644
--- a/slang_rs_reflection_cpp.h
+++ b/slang_rs_reflection_cpp.h
@@ -36,8 +36,14 @@
   bool reflect();
 
  private:
-  // List of of (type, name) pairs.
-  typedef std::vector<std::pair<std::string, std::string> > ArgumentList;
+  struct Argument {
+    std::string Type;
+    std::string Name;
+    std::string DefaultValue;
+    Argument(std::string Type, std::string Name, std::string DefaultValue = "")
+      : Type(Type), Name(Name), DefaultValue(DefaultValue) {}
+  };
+  typedef std::vector<Argument> ArgumentList;
 
   // Information coming from the compiler about the code we're reflecting.
   const RSContext *mRSContext;
@@ -58,6 +64,7 @@
   unsigned int mNextExportVarSlot;
   unsigned int mNextExportFuncSlot;
   unsigned int mNextExportForEachSlot;
+  unsigned int mNextExportReduceSlot;
 
   // Generated RS Elements for type-checking code.
   std::set<std::string> mTypesToCheck;
@@ -66,6 +73,7 @@
     mNextExportVarSlot = 0;
     mNextExportFuncSlot = 0;
     mNextExportForEachSlot = 0;
+    mNextExportReduceSlot = 0;
     mTypesToCheck.clear();
   }
 
@@ -84,17 +92,37 @@
     return mNextExportForEachSlot++;
   }
 
+  inline unsigned int getNextExportReduceSlot() {
+    return mNextExportReduceSlot++;
+  }
+
   bool writeHeaderFile();
   bool writeImplementationFile();
+
+  // Write out signatures both in the header and implementation.
   void makeFunctionSignature(bool isDefinition, const RSExportFunc *ef);
+  void makeReduceSignatureAllocationVariant(bool isDefinition, const RSExportReduce *er);
+  void makeReduceSignatureArrayVariant(bool isDefinition, const RSExportReduce *er);
+
   bool genEncodedBitCode();
   void genFieldsToStoreExportVariableValues();
   void genTypeInstancesUsedInForEach();
+  void genTypeInstancesUsedInReduce();
   void genFieldsForAllocationTypeVerification();
+
+  // Write out the code for the getters and setters.
   void genExportVariablesGetterAndSetter();
+
+  // Write out the code for the declaration of the kernel entry points.
   void genForEachDeclarations();
+  void genReduceDeclarations();
   void genExportFunctionDeclarations();
 
+  // Write out code for the definitions of the kernel entry points.
+  void genExportForEachBodies();
+  void genExportReduceBodies();
+  void genExportFunctionBodies();
+
   bool startScriptHeader();
 
   // Write out code for an export variable initialization.
@@ -128,7 +156,20 @@
   // Generate a runtime type check for VarName.
   void genTypeCheck(const RSExportType *ET, const char *VarName);
 
-  // Generate a type instance for a given forEach argument type.
+  // Generate a runtime check that VarName is 1-dimensional.
+  void gen1DCheck(const std::string &VarName);
+
+  // Generate a runtime check that VarName is non-null.
+  void genNullOrEmptyArrayCheck(const std::string &ArrayName, const std::string &Length,
+                                const std::string &ValueToReturn);
+
+  // Generate a runtime check that ArrayName's length is a multiple of
+  // a vector size.
+  void genVectorLengthCompatibilityCheck(const std::string &Length, unsigned VecSize,
+                                         const std::string &ValueToReturn,
+                                         unsigned IndentLevels = 1);
+
+  // Generate a type instance for a given type.
   void genTypeInstanceFromPointer(const RSExportType *ET);
   void genTypeInstance(const RSExportType *ET);
 
diff --git a/tests/P_reduce_cpp/reduce_cpp.rs b/tests/P_reduce_cpp/reduce_cpp.rs
new file mode 100644
index 0000000..a4ddeb5
--- /dev/null
+++ b/tests/P_reduce_cpp/reduce_cpp.rs
@@ -0,0 +1,8 @@
+// -target-api 0 -reflect-c++
+
+#pragma version(1)
+#pragma rs java_package_name(foo)
+
+int __attribute__((kernel("reduce"))) add(int a, int b) {
+  return a + b;
+}
diff --git a/tests/P_reduce_cpp/stderr.txt.expect b/tests/P_reduce_cpp/stderr.txt.expect
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/P_reduce_cpp/stderr.txt.expect
diff --git a/tests/P_reduce_cpp/stdout.txt.expect b/tests/P_reduce_cpp/stdout.txt.expect
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/P_reduce_cpp/stdout.txt.expect