Adds float16 support for RNN.

Bug: 118608628
Test: NeuralNetworksTest_static
Change-Id: I9e87c27169a046e10357bba27ee9999595dd7170
Merged-In: I9e87c27169a046e10357bba27ee9999595dd7170
(cherry picked from commit d49668d85f5776f19caa9461eee569c3e4a27c44)
diff --git a/nn/common/Utils.cpp b/nn/common/Utils.cpp
index 8d98417..f40a458 100644
--- a/nn/common/Utils.cpp
+++ b/nn/common/Utils.cpp
@@ -1696,19 +1696,38 @@
                 logInvalidInOutNumber(6, 2);
                 return ANEURALNETWORKS_BAD_DATA;
             }
-            std::vector<OperandType> inExpectedTypes = {OperandType::TENSOR_FLOAT32,
-                                                        OperandType::TENSOR_FLOAT32,
-                                                        OperandType::TENSOR_FLOAT32,
-                                                        OperandType::TENSOR_FLOAT32,
-                                                        OperandType::TENSOR_FLOAT32,
-                                                        OperandType::INT32};
-            std::vector<OperandType> outExpectedTypes = {OperandType::TENSOR_FLOAT32,
-                                                         OperandType::TENSOR_FLOAT32};
-            NN_RETURN_IF_ERROR(validateHalVersion(opType, halVersion, HalVersion::V1_0));
-            return validateOperationOperandTypes(operands,
-                                                 inputCount, inputIndexes,
-                                                 inExpectedTypes,
-                                                 outputCount, outputIndexes,
+            OperandType inputType = operands[inputIndexes[0]].type;
+            std::vector<OperandType> inExpectedTypes;
+            std::vector<OperandType> outExpectedTypes;
+            if (inputType == OperandType::TENSOR_FLOAT32) {
+                NN_RETURN_IF_ERROR(validateHalVersion(opType, halVersion, HalVersion::V1_0));
+                inExpectedTypes = {
+                        OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32,
+                        OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32,
+                        OperandType::TENSOR_FLOAT32, OperandType::INT32,
+                };
+                outExpectedTypes = {
+                        OperandType::TENSOR_FLOAT32,
+                        OperandType::TENSOR_FLOAT32,
+                };
+            } else if (inputType == OperandType::TENSOR_FLOAT16) {
+                NN_RETURN_IF_ERROR(validateHalVersion(opType, halVersion, HalVersion::V1_2));
+                inExpectedTypes = {
+                        OperandType::TENSOR_FLOAT16, OperandType::TENSOR_FLOAT16,
+                        OperandType::TENSOR_FLOAT16, OperandType::TENSOR_FLOAT16,
+                        OperandType::TENSOR_FLOAT16, OperandType::INT32,
+                };
+                outExpectedTypes = {
+                        OperandType::TENSOR_FLOAT16,
+                        OperandType::TENSOR_FLOAT16,
+                };
+            } else {
+                LOG(ERROR) << "Unsupported input tensor type for operation "
+                           << getOperationName(opType);
+                return ANEURALNETWORKS_BAD_DATA;
+            }
+            return validateOperationOperandTypes(operands, inputCount, inputIndexes,
+                                                 inExpectedTypes, outputCount, outputIndexes,
                                                  outExpectedTypes);
         }
         case ANEURALNETWORKS_SVDF: {
diff --git a/nn/common/operations/RNN.cpp b/nn/common/operations/RNN.cpp
index 4d7a4c9..6e39216 100644
--- a/nn/common/operations/RNN.cpp
+++ b/nn/common/operations/RNN.cpp
@@ -17,6 +17,7 @@
 #include "RNN.h"
 
 #include "CpuExecutor.h"
+#include "CpuOperationUtils.h"
 #include "HalInterfaces.h"
 
 #include "Tracing.h"
@@ -82,65 +83,108 @@
 }
 
 bool RNN::Eval() {
-  NNTRACE_COMP("RNN::Eval");
+    switch (input_->type) {
+        case OperandType::TENSOR_FLOAT16: {
+            std::vector<float> inputDataFloat32(getNumberOfElements(input_->shape()));
+            convertFloat16ToFloat32(reinterpret_cast<_Float16*>(input_->buffer), &inputDataFloat32);
+            std::vector<float> hiddenStateDataFloat32(
+                    getNumberOfElements(hidden_state_in_->shape()));
+            convertFloat16ToFloat32(reinterpret_cast<_Float16*>(hidden_state_in_->buffer),
+                                    &hiddenStateDataFloat32);
+            std::vector<float> biasDataFloat32(getNumberOfElements(bias_->shape()));
+            convertFloat16ToFloat32(reinterpret_cast<_Float16*>(bias_->buffer), &biasDataFloat32);
+            std::vector<float> weightsDataFloat32(getNumberOfElements(weights_->shape()));
+            convertFloat16ToFloat32(reinterpret_cast<_Float16*>(weights_->buffer),
+                                    &weightsDataFloat32);
+            std::vector<float> recurrentWeightsDataFloat32(
+                    getNumberOfElements(recurrent_weights_->shape()));
+            convertFloat16ToFloat32(reinterpret_cast<_Float16*>(recurrent_weights_->buffer),
+                                    &recurrentWeightsDataFloat32);
+            std::vector<float> outputDataFloat32(getNumberOfElements(output_->shape()));
+            std::vector<float> hiddenStateOutputDataFloat32(
+                    getNumberOfElements(hidden_state_out_->shape()));
 
-  const float* bias_ptr = reinterpret_cast<float*>(bias_->buffer);
+            EvalFloat32(inputDataFloat32.data(), hiddenStateDataFloat32.data(),
+                        biasDataFloat32.data(), weightsDataFloat32.data(),
+                        recurrentWeightsDataFloat32.data(), outputDataFloat32.data(),
+                        hiddenStateOutputDataFloat32.data());
+            convertFloat32ToFloat16(outputDataFloat32,
+                                    reinterpret_cast<_Float16*>(output_->buffer));
+            convertFloat32ToFloat16(hiddenStateOutputDataFloat32,
+                                    reinterpret_cast<_Float16*>(hidden_state_out_->buffer));
+            break;
+        }
+        case OperandType::TENSOR_FLOAT32: {
+            EvalFloat32(reinterpret_cast<float*>(input_->buffer),
+                        reinterpret_cast<float*>(hidden_state_in_->buffer),
+                        reinterpret_cast<float*>(bias_->buffer),
+                        reinterpret_cast<float*>(weights_->buffer),
+                        reinterpret_cast<float*>(recurrent_weights_->buffer),
+                        reinterpret_cast<float*>(output_->buffer),
+                        reinterpret_cast<float*>(hidden_state_out_->buffer));
+            break;
+        }
+        default: {
+            LOG(ERROR) << "Unsupported data type: " << static_cast<int>(input_->type);
+            return false;
+        }
+    }
+    return true;
+}
 
-  const uint32_t batch_size = input_->shape().dimensions[0];
-  const uint32_t num_units = weights_->shape().dimensions[0];
-  const uint32_t input_size = input_->shape().dimensions[1];
-  const uint32_t input_weights_stride = weights_->shape().dimensions[1];
-  const uint32_t recurrent_weights_stride =
-      recurrent_weights_->shape().dimensions[1];
+bool RNN::EvalFloat32(const float* inputData, const float* hiddenStateInputData,
+                      const float* biasData, const float* weightsData,
+                      const float* recurrentWeightsData, float* outputData,
+                      float* hiddenStateOutputData) {
+    NNTRACE_COMP("RNN::Eval");
 
-  // For each batch
-  for (uint32_t b = 0; b < batch_size; b++) {
-    // Initialize the pointer to input, output and bias.
-    const float* input_ptr_batch =
-        reinterpret_cast<float*>(input_->buffer) + b * input_size;
-    const float* hidden_state_in_ptr_batch =
-        reinterpret_cast<float*>(hidden_state_in_->buffer) + b * num_units;
-    float* output_ptr_batch =
-        reinterpret_cast<float*>(output_->buffer) + b * num_units;
-    float* hidden_state_out_ptr_batch =
-        reinterpret_cast<float*>(hidden_state_out_->buffer) + b * num_units;
+    const uint32_t batch_size = input_->shape().dimensions[0];
+    const uint32_t num_units = weights_->shape().dimensions[0];
+    const uint32_t input_size = input_->shape().dimensions[1];
+    const uint32_t input_weights_stride = weights_->shape().dimensions[1];
+    const uint32_t recurrent_weights_stride = recurrent_weights_->shape().dimensions[1];
 
-    // Initialize input_weights and recurrent_weights.
-    const float* input_weights_ptr = reinterpret_cast<float*>(weights_->buffer);
-    const float* recurrent_weights_ptr =
-        reinterpret_cast<float*>(recurrent_weights_->buffer);
+    // For each batch
+    for (uint32_t b = 0; b < batch_size; b++) {
+        // Initialize the pointer to input, output and bias.
+        const float* input_ptr_batch = inputData + b * input_size;
+        const float* hidden_state_in_ptr_batch = hiddenStateInputData + b * num_units;
+        float* output_ptr_batch = outputData + b * num_units;
+        float* hidden_state_out_ptr_batch = hiddenStateOutputData + b * num_units;
 
-    // Output = bias
-    for (uint32_t o = 0; o < num_units; o++) {
-      output_ptr_batch[o] = bias_ptr[o];
+        // Initialize input_weights and recurrent_weights.
+        const float* input_weights_ptr = weightsData;
+        const float* recurrent_weights_ptr = recurrentWeightsData;
+
+        // Output = bias
+        for (uint32_t o = 0; o < num_units; o++) {
+            output_ptr_batch[o] = biasData[o];
+        }
+
+        // Output += input * input_weights
+        for (uint32_t o = 0; o < num_units; o++) {
+            for (uint32_t i = 0; i < input_size; i++) {
+                output_ptr_batch[o] += input_ptr_batch[i] * input_weights_ptr[i];
+            }
+            input_weights_ptr += input_weights_stride;
+        }
+
+        // Output += recurrent_weights * hidden_state
+        for (uint32_t o = 0; o < num_units; o++) {
+            for (uint32_t h = 0; h < num_units; h++) {
+                output_ptr_batch[o] += hidden_state_in_ptr_batch[h] * recurrent_weights_ptr[h];
+            }
+            recurrent_weights_ptr += recurrent_weights_stride;
+        }
+
+        // Output = activation(Output) and update hidden_state
+        for (uint32_t o = 0; o < num_units; o++) {
+            output_ptr_batch[o] = (ActivationFunctor(activation_))(output_ptr_batch[o]);
+            hidden_state_out_ptr_batch[o] = output_ptr_batch[o];
+        }
     }
 
-    // Output += input * input_weights
-    for (uint32_t o = 0; o < num_units; o++) {
-      for (uint32_t i = 0; i < input_size; i++) {
-        output_ptr_batch[o] += input_ptr_batch[i] * input_weights_ptr[i];
-      }
-      input_weights_ptr += input_weights_stride;
-    }
-
-    // Output += recurrent_weights * hidden_state
-    for (uint32_t o = 0; o < num_units; o++) {
-      for (uint32_t h = 0; h < num_units; h++) {
-        output_ptr_batch[o] +=
-            hidden_state_in_ptr_batch[h] * recurrent_weights_ptr[h];
-      }
-      recurrent_weights_ptr += recurrent_weights_stride;
-    }
-
-    // Output = activation(Output) and update hidden_state
-    for (uint32_t o = 0; o < num_units; o++) {
-      output_ptr_batch[o] =
-          (ActivationFunctor(activation_))(output_ptr_batch[o]);
-      hidden_state_out_ptr_batch[o] = output_ptr_batch[o];
-    }
-  }
-
-  return true;
+    return true;
 }
 
 }  // namespace nn
diff --git a/nn/common/operations/RNN.h b/nn/common/operations/RNN.h
index da31f46..9eaa03b 100644
--- a/nn/common/operations/RNN.h
+++ b/nn/common/operations/RNN.h
@@ -48,6 +48,10 @@
   static constexpr int kOutputTensor = 1;
 
  private:
+  bool EvalFloat32(const float* inputData, const float* hiddenStateInputData, const float* biasData,
+                   const float* weightsData, const float* recurrentWeightsData, float* outputData,
+                   float* hiddenStateOutputData);
+
   ActivationFn activation_;
 
   const RunTimeOperandInfo *input_;
diff --git a/nn/runtime/include/NeuralNetworks.h b/nn/runtime/include/NeuralNetworks.h
index 4847142..d27747a 100644
--- a/nn/runtime/include/NeuralNetworks.h
+++ b/nn/runtime/include/NeuralNetworks.h
@@ -1488,27 +1488,26 @@
      *   argument (if not “NONE”).
      *
      * Supported tensor {@link OperandCode}:
+     * * {@link ANEURALNETWORKS_TENSOR_FLOAT16} (since API level 29)
      * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
      *
+     * The input tensors must all be the same type.
+     *
      * Inputs:
      * * 0: input.
-     *      A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32} of shape
-     *      [batch_size, input_size], where “batch_size” corresponds to the
-     *      batching dimension, and “input_size” is the size of the input.
+     *      A 2-D tensor of shape [batch_size, input_size], where “batch_size”
+     *      corresponds to the batching dimension, and “input_size” is the size
+     *      of the input.
      * * 1: weights.
-     *      A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape
-     *      [num_units, input_size], where “num_units” corresponds to the
-     *      number of units.
+     *      A 2-D tensor of shape [num_units, input_size], where “num_units”
+     *      corresponds to the number of units.
      * * 2: recurrent_weights.
-     *      A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape
-     *      [num_units, num_units], with columns corresponding to the weights
-     *      from each unit.
+     *      A 2-D tensor of shape [num_units, num_units], with columns
+     *      corresponding to the weights from each unit.
      * * 3: bias.
-     *      A 1-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape
-     *      [num_units].
+     *      A 1-D tensor of shape [num_units].
      * * 4: hidden state (in).
-     *      A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape
-     *      [batch_size, num_units].
+     *      A 2-D tensor of shape [batch_size, num_units].
      * * 5: fused_activation_function.
      *      An optional {@link FuseCode} value indicating the
      *      activation function. If “NONE” is specified then it results in a
@@ -1516,13 +1515,11 @@
      *
      * Outputs:
      * * 0: hidden state (out).
-     *      A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape
-     *      [batch_size, num_units].
+     *      A 2-D tensor of shape [batch_size, num_units].
      *
      * * 1: output.
-     *      A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape
-     *      [batch_size, num_units]. This is effectively the same as the
-     *      current state value.
+     *      A 2-D tensor of shape [batch_size, num_units]. This is effectively
+     *      the same as the current state value.
      *
      * Available since API level 27.
      */
diff --git a/nn/runtime/test/TestValidateOperations.cpp b/nn/runtime/test/TestValidateOperations.cpp
index 4b9a575..3ba9fbb 100644
--- a/nn/runtime/test/TestValidateOperations.cpp
+++ b/nn/runtime/test/TestValidateOperations.cpp
@@ -1473,6 +1473,45 @@
     EXPECT_TRUE(multinomialTest.testMutatingOutputOperandCounts());
 }
 
+TEST(OperationValidationTest, RNN_float16) {
+    uint32_t oneDimensional[1] = {5};
+    uint32_t twoDimensional[2] = {5, 5};
+    ANeuralNetworksOperandType floatTensor1D = {.type = ANEURALNETWORKS_TENSOR_FLOAT16,
+                                                .dimensionCount = 1,
+                                                .dimensions = oneDimensional,
+                                                .scale = 0.0f,
+                                                .zeroPoint = 0};
+    ANeuralNetworksOperandType floatTensor2D = {.type = ANEURALNETWORKS_TENSOR_FLOAT16,
+                                                .dimensionCount = 2,
+                                                .dimensions = twoDimensional,
+                                                .scale = 0.0f,
+                                                .zeroPoint = 0};
+    ANeuralNetworksOperandType intScalar = {.type = ANEURALNETWORKS_INT32,
+                                            .dimensionCount = 0,
+                                            .dimensions = nullptr,
+                                            .scale = 0.0f,
+                                            .zeroPoint = 0};
+
+    ANeuralNetworksOperandType input = floatTensor2D;
+    ANeuralNetworksOperandType weights = floatTensor2D;
+    ANeuralNetworksOperandType recurrentWeights = floatTensor2D;
+    ANeuralNetworksOperandType bias = floatTensor1D;
+    ANeuralNetworksOperandType hiddenStateIn = floatTensor2D;
+    ANeuralNetworksOperandType activation = intScalar;
+
+    ANeuralNetworksOperandType hiddenStateOut = floatTensor2D;
+    ANeuralNetworksOperandType output = floatTensor2D;
+
+    OperationTestBase rnnTest(ANEURALNETWORKS_RNN,
+                              {input, weights, recurrentWeights, bias, hiddenStateIn, activation},
+                              {hiddenStateOut, output});
+
+    EXPECT_TRUE(rnnTest.testMutatingInputOperandCode());
+    EXPECT_TRUE(rnnTest.testMutatingInputOperandCounts());
+    EXPECT_TRUE(rnnTest.testMutatingOutputOperandCode());
+    EXPECT_TRUE(rnnTest.testMutatingOutputOperandCounts());
+}
+
 TEST(OperationValidationTest, RNN_float32) {
     uint32_t oneDimensional[1] = {5};
     uint32_t twoDimensional[2] = {5, 5};
diff --git a/nn/runtime/test/for-cts/TestGeneratedOneFile.cpp b/nn/runtime/test/for-cts/TestGeneratedOneFile.cpp
index 93d5cae..62f6ca9 100644
--- a/nn/runtime/test/for-cts/TestGeneratedOneFile.cpp
+++ b/nn/runtime/test/for-cts/TestGeneratedOneFile.cpp
@@ -409,6 +409,7 @@
 #include "../generated/tests/relu_float16_2.mod.py.cpp"
 #include "../generated/tests/reshape_float16.mod.py.cpp"
 #include "../generated/tests/resize_bilinear_v1_2.mod.py.cpp"
+#include "../generated/tests/rnn_float16.mod.py.cpp"
 #include "../generated/tests/roi_align.mod.py.cpp"
 #include "../generated/tests/roi_pooling.mod.py.cpp"
 #include "../generated/tests/rotated_bbox_transform.mod.py.cpp"
diff --git a/nn/runtime/test/generated/all_generated_V1_2_vts_tests.cpp b/nn/runtime/test/generated/all_generated_V1_2_vts_tests.cpp
index d8aeb1c..1b2eb76 100644
--- a/nn/runtime/test/generated/all_generated_V1_2_vts_tests.cpp
+++ b/nn/runtime/test/generated/all_generated_V1_2_vts_tests.cpp
@@ -18463,6 +18463,29 @@
 }
 
 
+// Generated from: rnn_float16.mod.py.
+namespace rnn_float16 {
+// Generated rnn_float16 test
+#include "examples/rnn_float16.example.cpp"
+// Generated model constructor
+#include "vts_models/rnn_float16.model.cpp"
+} // namespace rnn_float16
+
+TEST_F(NeuralnetworksHidlTest, rnn_float16) {
+  generated_tests::Execute(device,
+                           rnn_float16::createTestModel,
+                           rnn_float16::is_ignored,
+                           rnn_float16::get_examples());
+}
+
+TEST_F(ValidationTest, rnn_float16) {
+  const Model model = rnn_float16::createTestModel();
+  const std::vector<Request> requests = createRequests(rnn_float16::get_examples());
+  validateModel(model);
+  validateRequests(model, requests);
+}
+
+
 // Generated from: roi_align.mod.py.
 namespace roi_align {
 // Generated roi_align test
diff --git a/nn/runtime/test/generated/examples/rnn_float16.example.cpp b/nn/runtime/test/generated/examples/rnn_float16.example.cpp
new file mode 100644
index 0000000..a932c87
--- /dev/null
+++ b/nn/runtime/test/generated/examples/rnn_float16.example.cpp
@@ -0,0 +1,43 @@
+// clang-format off
+// Generated file (from: rnn_float16.mod.py). Do not edit
+std::vector<MixedTypedExample>& get_examples() {
+static std::vector<MixedTypedExample> examples = {
+// Begin of an example
+{
+.operands = {
+//Input(s)
+{ // See tools/test_generator/include/TestHarness.h:MixedTyped
+  // int -> FLOAT32 map
+  {},
+  // int -> INT32 map
+  {},
+  // int -> QUANT8_ASYMM map
+  {},
+  // int -> QUANT16_SYMM map
+  {},
+  // int -> FLOAT16 map
+  {{0, {0.23689353f, 0.285385f, 0.037029743f, -0.19858193f, -0.27569133f, 0.43773448f, 0.60379338f, 0.35562468f, 0.23689353f, 0.285385f, 0.037029743f, -0.19858193f, -0.27569133f, 0.43773448f, 0.60379338f, 0.35562468f}}, {1, {0.461459f, 0.153381f, 0.529743f, -0.00371218f, 0.676267f, -0.211346f, 0.317493f, 0.969689f, -0.343251f, 0.186423f, 0.398151f, 0.152399f, 0.448504f, 0.317662f, 0.523556f, -0.323514f, 0.480877f, 0.333113f, -0.757714f, -0.674487f, -0.643585f, 0.217766f, -0.0251462f, 0.79512f, -0.595574f, -0.422444f, 0.371572f, -0.452178f, -0.556069f, -0.482188f, -0.685456f, -0.727851f, 0.841829f, 0.551535f, -0.232336f, 0.729158f, -0.00294906f, -0.69754f, 0.766073f, -0.178424f, 0.369513f, -0.423241f, 0.548547f, -0.0152023f, -0.757482f, -0.85491f, 0.251331f, -0.989183f, 0.306261f, -0.340716f, 0.886103f, -0.0726757f, -0.723523f, -0.784303f, 0.0354295f, 0.566564f, -0.485469f, -0.620498f, 0.832546f, 0.697884f, -0.279115f, 0.294415f, -0.584313f, 0.548772f, 0.0648819f, 0.968726f, 0.723834f, -0.0080452f, -0.350386f, -0.272803f, 0.115121f, -0.412644f, -0.824713f, -0.992843f, -0.592904f, -0.417893f, 0.863791f, -0.423461f, -0.147601f, -0.770664f, -0.479006f, 0.654782f, 0.587314f, -0.639158f, 0.816969f, -0.337228f, 0.659878f, 0.73107f, 0.754768f, -0.337042f, 0.0960841f, 0.368357f, 0.244191f, -0.817703f, -0.211223f, 0.442012f, 0.37225f, -0.623598f, -0.405423f, 0.455101f, 0.673656f, -0.145345f, -0.511346f, -0.901675f, -0.81252f, -0.127006f, 0.809865f, -0.721884f, 0.636255f, 0.868989f, -0.347973f, -0.10179f, -0.777449f, 0.917274f, 0.819286f, 0.206218f, -0.00785118f, 0.167141f, 0.45872f, 0.972934f, -0.276798f, 0.837861f, 0.747958f, -0.0151566f, -0.330057f, -0.469077f, 0.277308f, 0.415818f}}, {2, {0.1f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.1f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.1f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.1f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.1f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.1f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.1f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.1f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.1f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.1f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.1f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.1f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.1f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.1f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.1f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.1f}}, {3, {0.065691948f, -0.69055247f, 0.1107955f, -0.97084129f, -0.23957068f, -0.23566568f, -0.389184f, 0.47481549f, -0.4791103f, 0.29931796f, 0.10463274f, 0.83918178f, 0.37197268f, 0.61957061f, 0.3956964f, -0.37609905f}}, {4, {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}}},
+  // int -> BOOL8 map
+  {},
+},
+//Output(s)
+{ // See tools/test_generator/include/TestHarness.h:MixedTyped
+  // int -> FLOAT32 map
+  {},
+  // int -> INT32 map
+  {},
+  // int -> QUANT8_ASYMM map
+  {},
+  // int -> QUANT16_SYMM map
+  {},
+  // int -> FLOAT16 map
+  {{0, {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}}, {1, {0.496726f, 0.0f, 0.965996f, 0.0f, 0.0584254f, 0.0f, 0.0f, 0.12315f, 0.0f, 0.0f, 0.612266f, 0.456601f, 0.0f, 0.52286f, 1.16099f, 0.0291232f, 0.496726f, 0.0f, 0.965996f, 0.0f, 0.0584254f, 0.0f, 0.0f, 0.12315f, 0.0f, 0.0f, 0.612266f, 0.456601f, 0.0f, 0.52286f, 1.16099f, 0.0291232f}}},
+  // int -> BOOL8 map
+  {},
+}
+},
+}, // End of an example
+};
+return examples;
+};
+
diff --git a/nn/runtime/test/generated/models/rnn_float16.model.cpp b/nn/runtime/test/generated/models/rnn_float16.model.cpp
new file mode 100644
index 0000000..3b39a9b
--- /dev/null
+++ b/nn/runtime/test/generated/models/rnn_float16.model.cpp
@@ -0,0 +1,34 @@
+// clang-format off
+// Generated file (from: rnn_float16.mod.py). Do not edit
+void CreateModel(Model *model) {
+  OperandType type0(Type::TENSOR_FLOAT16, {2, 8});
+  OperandType type1(Type::TENSOR_FLOAT16, {16, 8});
+  OperandType type2(Type::TENSOR_FLOAT16, {16, 16});
+  OperandType type3(Type::TENSOR_FLOAT16, {16});
+  OperandType type4(Type::TENSOR_FLOAT16, {2, 16});
+  OperandType type5(Type::INT32, {});
+  // Phase 1, operands
+  auto input = model->addOperand(&type0);
+  auto weights = model->addOperand(&type1);
+  auto recurrent_weights = model->addOperand(&type2);
+  auto bias = model->addOperand(&type3);
+  auto hidden_state_in = model->addOperand(&type4);
+  auto activation_param = model->addOperand(&type5);
+  auto hidden_state_out = model->addOperand(&type4);
+  auto output = model->addOperand(&type4);
+  // Phase 2, operations
+  static int32_t activation_param_init[] = {1};
+  model->setOperandValue(activation_param, activation_param_init, sizeof(int32_t) * 1);
+  model->addOperation(ANEURALNETWORKS_RNN, {input, weights, recurrent_weights, bias, hidden_state_in, activation_param}, {hidden_state_out, output});
+  // Phase 3, inputs and outputs
+  model->identifyInputsAndOutputs(
+    {input, weights, recurrent_weights, bias, hidden_state_in},
+    {hidden_state_out, output});
+  assert(model->isValid());
+}
+
+inline bool is_ignored(int i) {
+  static std::set<int> ignore = {0};
+  return ignore.find(i) != ignore.end();
+}
+
diff --git a/nn/runtime/test/generated/tests/rnn_float16.mod.py.cpp b/nn/runtime/test/generated/tests/rnn_float16.mod.py.cpp
new file mode 100644
index 0000000..59b1b78
--- /dev/null
+++ b/nn/runtime/test/generated/tests/rnn_float16.mod.py.cpp
@@ -0,0 +1,17 @@
+// clang-format off
+// Generated file (from: rnn_float16.mod.py). Do not edit
+#include "../../TestGenerated.h"
+
+namespace rnn_float16 {
+// Generated rnn_float16 test
+#include "generated/examples/rnn_float16.example.cpp"
+// Generated model constructor
+#include "generated/models/rnn_float16.model.cpp"
+} // namespace rnn_float16
+
+TEST_F(GeneratedTests, rnn_float16) {
+    execute(rnn_float16::CreateModel,
+            rnn_float16::is_ignored,
+            rnn_float16::get_examples());
+}
+
diff --git a/nn/runtime/test/generated/vts_models/rnn_float16.model.cpp b/nn/runtime/test/generated/vts_models/rnn_float16.model.cpp
new file mode 100644
index 0000000..6cba0c6
--- /dev/null
+++ b/nn/runtime/test/generated/vts_models/rnn_float16.model.cpp
@@ -0,0 +1,109 @@
+// clang-format off
+// Generated file (from: rnn_float16.mod.py). Do not edit
+// Create the model
+Model createTestModel() {
+    const std::vector<Operand> operands = {
+        {
+            .type = OperandType::TENSOR_FLOAT16,
+            .dimensions = {2, 8},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT16,
+            .dimensions = {16, 8},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT16,
+            .dimensions = {16, 16},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT16,
+            .dimensions = {16},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT16,
+            .dimensions = {2, 16},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 0, .length = 4},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT16,
+            .dimensions = {2, 16},
+            .numberOfConsumers = 0,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_OUTPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT16,
+            .dimensions = {2, 16},
+            .numberOfConsumers = 0,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_OUTPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        }
+    };
+
+    const std::vector<Operation> operations = {
+        {
+            .type = OperationType::RNN,
+            .inputs = {0, 1, 2, 3, 4, 5},
+            .outputs = {6, 7},
+        }
+    };
+
+    const std::vector<uint32_t> inputIndexes = {0, 1, 2, 3, 4};
+    const std::vector<uint32_t> outputIndexes = {6, 7};
+    std::vector<uint8_t> operandValues = {
+      1, 0, 0, 0
+    };
+    const std::vector<hidl_memory> pools = {};
+
+    return {
+        .operands = operands,
+        .operations = operations,
+        .inputIndexes = inputIndexes,
+        .outputIndexes = outputIndexes,
+        .operandValues = operandValues,
+        .pools = pools,
+    };
+}
+
+inline bool is_ignored(int i) {
+  static std::set<int> ignore = {0};
+  return ignore.find(i) != ignore.end();
+}
+
diff --git a/nn/runtime/test/specs/V1_2/rnn_float16.mod.py b/nn/runtime/test/specs/V1_2/rnn_float16.mod.py
new file mode 100644
index 0000000..7968c55
--- /dev/null
+++ b/nn/runtime/test/specs/V1_2/rnn_float16.mod.py
@@ -0,0 +1,201 @@
+#
+# Copyright (C) 2018 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+batches = 2
+units = 16
+input_size = 8
+
+model = Model()
+
+input = Input("input", "TENSOR_FLOAT16", "{%d, %d}" % (batches, input_size))
+weights = Input("weights", "TENSOR_FLOAT16", "{%d, %d}" % (units, input_size))
+recurrent_weights = Input("recurrent_weights", "TENSOR_FLOAT16", "{%d, %d}" % (units, units))
+bias = Input("bias", "TENSOR_FLOAT16", "{%d}" % (units))
+hidden_state_in = Input("hidden_state_in", "TENSOR_FLOAT16", "{%d, %d}" % (batches, units))
+
+activation_param = Int32Scalar("activation_param", 1)  # Relu
+
+hidden_state_out = IgnoredOutput("hidden_state_out", "TENSOR_FLOAT16", "{%d, %d}" % (batches, units))
+output = Output("output", "TENSOR_FLOAT16", "{%d, %d}" % (batches, units))
+
+model = model.Operation("RNN", input, weights, recurrent_weights, bias, hidden_state_in,
+                        activation_param).To([hidden_state_out, output])
+
+input0 = {
+    weights: [
+        0.461459,    0.153381,   0.529743,    -0.00371218, 0.676267,   -0.211346,
+       0.317493,    0.969689,   -0.343251,   0.186423,    0.398151,   0.152399,
+       0.448504,    0.317662,   0.523556,    -0.323514,   0.480877,   0.333113,
+       -0.757714,   -0.674487,  -0.643585,   0.217766,    -0.0251462, 0.79512,
+       -0.595574,   -0.422444,  0.371572,    -0.452178,   -0.556069,  -0.482188,
+       -0.685456,   -0.727851,  0.841829,    0.551535,    -0.232336,  0.729158,
+       -0.00294906, -0.69754,   0.766073,    -0.178424,   0.369513,   -0.423241,
+       0.548547,    -0.0152023, -0.757482,   -0.85491,    0.251331,   -0.989183,
+       0.306261,    -0.340716,  0.886103,    -0.0726757,  -0.723523,  -0.784303,
+       0.0354295,   0.566564,   -0.485469,   -0.620498,   0.832546,   0.697884,
+       -0.279115,   0.294415,   -0.584313,   0.548772,    0.0648819,  0.968726,
+       0.723834,    -0.0080452, -0.350386,   -0.272803,   0.115121,   -0.412644,
+       -0.824713,   -0.992843,  -0.592904,   -0.417893,   0.863791,   -0.423461,
+       -0.147601,   -0.770664,  -0.479006,   0.654782,    0.587314,   -0.639158,
+       0.816969,    -0.337228,  0.659878,    0.73107,     0.754768,   -0.337042,
+       0.0960841,   0.368357,   0.244191,    -0.817703,   -0.211223,  0.442012,
+       0.37225,     -0.623598,  -0.405423,   0.455101,    0.673656,   -0.145345,
+       -0.511346,   -0.901675,  -0.81252,    -0.127006,   0.809865,   -0.721884,
+       0.636255,    0.868989,   -0.347973,   -0.10179,    -0.777449,  0.917274,
+       0.819286,    0.206218,   -0.00785118, 0.167141,    0.45872,    0.972934,
+       -0.276798,   0.837861,   0.747958,    -0.0151566,  -0.330057,  -0.469077,
+       0.277308,    0.415818
+    ],
+    recurrent_weights: [
+        0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0.1
+    ],
+    bias: [
+        0.065691948, -0.69055247, 0.1107955, -0.97084129, -0.23957068,
+        -0.23566568, -0.389184, 0.47481549, -0.4791103, 0.29931796,
+        0.10463274, 0.83918178, 0.37197268, 0.61957061, 0.3956964,
+        -0.37609905
+    ],
+}
+
+
+test_inputs = [
+    0.23689353,   0.285385,     0.037029743, -0.19858193,  -0.27569133,
+    0.43773448,   0.60379338,   0.35562468,  -0.69424844,  -0.93421471,
+    -0.87287879,  0.37144363,   -0.62476718, 0.23791671,   0.40060222,
+    0.1356622,    -0.99774903,  -0.98858172, -0.38952237,  -0.47685933,
+    0.31073618,   0.71511042,   -0.63767755, -0.31729108,  0.33468103,
+    0.75801885,   0.30660987,   -0.37354088, 0.77002847,   -0.62747043,
+    -0.68572164,  0.0069220066, 0.65791464,  0.35130811,   0.80834007,
+    -0.61777675,  -0.21095741,  0.41213346,  0.73784804,   0.094794154,
+    0.47791874,   0.86496925,   -0.53376222, 0.85315156,   0.10288584,
+    0.86684,      -0.011186242, 0.10513687,  0.87825835,   0.59929144,
+    0.62827742,   0.18899453,   0.31440187,  0.99059987,   0.87170351,
+    -0.35091716,  0.74861872,   0.17831337,  0.2755419,    0.51864719,
+    0.55084288,   0.58982027,   -0.47443086, 0.20875752,   -0.058871567,
+    -0.66609079,  0.59098077,   0.73017097,  0.74604273,   0.32882881,
+    -0.17503482,  0.22396147,   0.19379807,  0.29120302,   0.077113032,
+    -0.70331609,  0.15804303,   -0.93407321, 0.40182066,   0.036301374,
+    0.66521823,   0.0300982,    -0.7747041,  -0.02038002,  0.020698071,
+    -0.90300065,  0.62870288,   -0.23068321, 0.27531278,   -0.095755219,
+    -0.712036,    -0.17384434,  -0.50593495, -0.18646687,  -0.96508682,
+    0.43519354,   0.14744234,   0.62589407,  0.1653645,    -0.10651493,
+    -0.045277178, 0.99032974,   -0.88255352, -0.85147917,  0.28153265,
+    0.19455957,   -0.55479527,  -0.56042433, 0.26048636,   0.84702539,
+    0.47587705,   -0.074295521, -0.12287641, 0.70117295,   0.90532446,
+    0.89782166,   0.79817224,   0.53402734,  -0.33286154,  0.073485017,
+    -0.56172788,  -0.044897556, 0.89964068,  -0.067662835, 0.76863563,
+    0.93455386,   -0.6324693,   -0.083922029
+]
+
+golden_outputs = [
+    0.496726,   0,          0.965996,  0,         0.0584254, 0,
+    0,          0.12315,    0,         0,         0.612266,  0.456601,
+    0,          0.52286,    1.16099,   0.0291232,
+
+    0,          0,          0.524901,  0,         0,         0,
+    0,          1.02116,    0,         1.35762,   0,         0.356909,
+    0.436415,   0.0355727,  0,         0,
+
+    0,          0,          0,         0.262335,  0,         0,
+    0,          1.33992,    0,         2.9739,    0,         0,
+    1.31914,    2.66147,    0,         0,
+
+    0.942568,   0,          0,         0,         0.025507,  0,
+    0,          0,          0.321429,  0.569141,  1.25274,   1.57719,
+    0.8158,     1.21805,    0.586239,  0.25427,
+
+    1.04436,    0,          0.630725,  0,         0.133801,  0.210693,
+    0.363026,   0,          0.533426,  0,         1.25926,   0.722707,
+    0,          1.22031,    1.30117,   0.495867,
+
+    0.222187,   0,          0.72725,   0,         0.767003,  0,
+    0,          0.147835,   0,         0,         0,         0.608758,
+    0.469394,   0.00720298, 0.927537,  0,
+
+    0.856974,   0.424257,   0,         0,         0.937329,  0,
+    0,          0,          0.476425,  0,         0.566017,  0.418462,
+    0.141911,   0.996214,   1.13063,   0,
+
+    0.967899,   0,          0,         0,         0.0831304, 0,
+    0,          1.00378,    0,         0,         0,         1.44818,
+    1.01768,    0.943891,   0.502745,  0,
+
+    0.940135,   0,          0,         0,         0,         0,
+    0,          2.13243,    0,         0.71208,   0.123918,  1.53907,
+    1.30225,    1.59644,    0.70222,   0,
+
+    0.804329,   0,          0.430576,  0,         0.505872,  0.509603,
+    0.343448,   0,          0.107756,  0.614544,  1.44549,   1.52311,
+    0.0454298,  0.300267,   0.562784,  0.395095,
+
+    0.228154,   0,          0.675323,  0,         1.70536,   0.766217,
+    0,          0,          0,         0.735363,  0.0759267, 1.91017,
+    0.941888,   0,          0,         0,
+
+    0,          0,          1.5909,    0,         0,         0,
+    0,          0.5755,     0,         0.184687,  0,         1.56296,
+    0.625285,   0,          0,         0,
+
+    0,          0,          0.0857888, 0,         0,         0,
+    0,          0.488383,   0.252786,  0,         0,         0,
+    1.02817,    1.85665,    0,         0,
+
+    0.00981836, 0,          1.06371,   0,         0,         0,
+    0,          0,          0,         0.290445,  0.316406,  0,
+    0.304161,   1.25079,    0.0707152, 0,
+
+    0.986264,   0.309201,   0,         0,         0,         0,
+    0,          1.64896,    0.346248,  0,         0.918175,  0.78884,
+    0.524981,   1.92076,    2.07013,   0.333244,
+
+    0.415153,   0.210318,   0,         0,         0,         0,
+    0,          2.02616,    0,         0.728256,  0.84183,   0.0907453,
+    0.628881,   3.58099,    1.49974,   0
+]
+
+input_sequence_size = int(len(test_inputs) / input_size / batches)
+
+# TODO: enable the other data points after fixing reference issues
+#for i in range(input_sequence_size):
+for i in range(1):
+  input_begin = i * input_size
+  input_end = input_begin + input_size
+  input0[input] = test_inputs[input_begin:input_end]
+  input0[input].extend(input0[input])
+  input0[hidden_state_in] = [0 for x in range(batches * units)]
+  output0 = {
+    hidden_state_out: [0 for x in range(batches * units)],
+  }
+  golden_start = i * units
+  golden_end = golden_start + units
+  output0[output] = golden_outputs[golden_start:golden_end]
+  output0[output].extend(output0[output])
+  Example((input0, output0))