Add a basic implementation of the reduce kernel API to the CPU
reference implementation.

Bug: 22631253

For now, this just runs a serial reduction on one thread.

Change-Id: I34c96d24bb6f44274de72bb53160abcf79d143b0
diff --git a/rsDriverLoader.cpp b/rsDriverLoader.cpp
index 125a6df..43e4294 100644
--- a/rsDriverLoader.cpp
+++ b/rsDriverLoader.cpp
@@ -70,6 +70,7 @@
     ret &= fn(RS_HAL_SCRIPT_INVOKE_FUNCTION, (void **)&rsc->mHal.funcs.script.invokeFunction);
     ret &= fn(RS_HAL_SCRIPT_INVOKE_ROOT, (void **)&rsc->mHal.funcs.script.invokeRoot);
     ret &= fn(RS_HAL_SCRIPT_INVOKE_FOR_EACH, (void **)&rsc->mHal.funcs.script.invokeForEach);
+    ret &= fn(RS_HAL_SCRIPT_INVOKE_REDUCE, (void **)&rsc->mHal.funcs.script.invokeReduce);
     ret &= fn(RS_HAL_SCRIPT_INVOKE_INIT, (void **)&rsc->mHal.funcs.script.invokeInit);
     ret &= fn(RS_HAL_SCRIPT_INVOKE_FREE_CHILDREN, (void **)&rsc->mHal.funcs.script.invokeFreeChildren);
     ret &= fn(RS_HAL_SCRIPT_SET_GLOBAL_VAR, (void **)&rsc->mHal.funcs.script.setGlobalVar);