arm_compute v20.05

commit: 6a7771e460abeac7d401d6d38a0fcf0a0d2c3cbe [log] [tgz]
author: Jenkins <bsgcomp@arm.com> Thu May 28 11:28:36 2020 +0100
committer: Jenkins <bsgcomp@arm.com> Thu May 28 11:28:36 2020 +0100
tree: 7dfde1c353a752c65400d4b3854f7675891f86a7
parent: 575c81f38edecaa662f3ee45d04ad8efded4fa81 [diff] [blame]
diff --git a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
index e5f1278..c57fcc9 100644
--- a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
+++ b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -62,6 +62,18 @@
                                      ICLTensor *cell_state_in, const ICLTensor *output_state_in,
                                      ICLTensor *cell_state_out, ICLTensor *output_state_out)
 {
+    configure(CLKernelLibrary::get().get_compile_context(), input, input_to_input_weights, input_to_forget_weights, input_to_cell_weights, input_to_output_weights, recurrent_to_input_weights,
+              recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights, input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in, cell_state_out,
+              output_state_out);
+}
+
+void CLLSTMLayerQuantized::configure(const CLCompileContext &compile_context, const ICLTensor *input,
+                                     const ICLTensor *input_to_input_weights, const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,
+                                     const ICLTensor *recurrent_to_input_weights, const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,
+                                     const ICLTensor *input_gate_bias, const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
+                                     ICLTensor *cell_state_in, const ICLTensor *output_state_in,
+                                     ICLTensor *cell_state_out, ICLTensor *output_state_out)
+{
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, input_to_input_weights, input_to_forget_weights, input_to_cell_weights, input_to_output_weights,
                                  recurrent_to_input_weights, recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights,
                                  input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in, cell_state_out, output_state_out);
@@ -107,18 +119,18 @@
     recurrent_weights_vector.emplace_back(recurrent_to_output_weights);
 
     _input_weights.allocator()->init(TensorInfo(TensorShape(input_size, 4 * output_size), 1, DataType::QASYMM8, qweights));
-    _concat_input_weights.configure(inputs_weights_vector, &_input_weights, Window::DimY);
+    _concat_input_weights.configure(compile_context, inputs_weights_vector, &_input_weights, Window::DimY);
 
     _recurrent_weights.allocator()->init(TensorInfo(TensorShape(output_size, 4 * output_size), 1, DataType::QASYMM8, qweights));
-    _concat_recurrent_weights.configure(recurrent_weights_vector, &_recurrent_weights, Window::DimY);
+    _concat_recurrent_weights.configure(compile_context, recurrent_weights_vector, &_recurrent_weights, Window::DimY);
 
     std::vector<const ICLTensor *> weights_vector;
     weights_vector.emplace_back(&_recurrent_weights);
     weights_vector.emplace_back(&_input_weights);
 
     _weights.allocator()->init(TensorInfo(TensorShape(output_size + input_size, 4 * output_size), 1, DataType::QASYMM8, qweights));
-    _concat_weights.configure(weights_vector, &_weights, Window::DimX);
-    _transpose_weights.configure(&_weights, &_weights_transposed);
+    _concat_weights.configure(compile_context, weights_vector, &_weights, Window::DimX);
+    _transpose_weights.configure(compile_context, &_weights, &_weights_transposed);
 
     // Input concatenation
     std::vector<const ICLTensor *> input_vector;
@@ -127,7 +139,7 @@
 
     _memory_group.manage(&_input);
     _input.allocator()->init(TensorInfo(TensorShape(output_size + input_size, batch_size), 1, DataType::QASYMM8, qasymm));
-    _concat_inputs.configure(input_vector, &_input, Window::DimX);
+    _concat_inputs.configure(compile_context, input_vector, &_input, Window::DimX);
 
     // Bias concatenation
     std::vector<const ICLTensor *> bias_vector;
@@ -137,7 +149,7 @@
     bias_vector.emplace_back(output_gate_bias);
 
     _bias.allocator()->init(TensorInfo(TensorShape(4 * output_size), 1, DataType::S32));
-    _concat_bias.configure(bias_vector, &_bias, Window::DimX);
+    _concat_bias.configure(compile_context, bias_vector, &_bias, Window::DimX);
 
     // Invert the offset for gemmlowp
     _input.info()->set_quantization_info(QuantizationInfo(qasymm.uniform().scale, -qasymm.uniform().offset));
@@ -146,7 +158,7 @@
     // Run gemmlowp
     _memory_group.manage(&_output_highp);
     _output_highp.allocator()->init(TensorInfo(TensorShape(4 * output_size, batch_size), 1, DataType::S32));
-    _gemmlowp.configure(&_input, &_weights_transposed, nullptr, &_output_highp);
+    _gemmlowp.configure(compile_context, &_input, &_weights_transposed, nullptr, &_output_highp);
     _input.allocator()->allocate();
 
     // Set the offset back
@@ -162,7 +174,7 @@
     quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
 
     _memory_group.manage(&_output_lowp);
-    _output_stage.configure(&_output_highp, &_bias, &_output_lowp, output_multiplier, output_shift);
+    _output_stage.configure(compile_context, &_output_highp, &_bias, &_output_lowp, output_multiplier, output_shift);
     _output_highp.allocator()->allocate();
     _bias.allocator()->allocate();
 
@@ -170,86 +182,86 @@
     if(batch_size > 1)
     {
         _memory_group.manage(&_input_gate_input);
-        _slice_input_tensor.configure(&_output_lowp, &_input_gate_input, { 0, 0 }, { output_size, batch_size });
+        _slice_input_tensor.configure(compile_context, &_output_lowp, &_input_gate_input, { 0, 0 }, { output_size, batch_size });
         _memory_group.manage(&_forget_gate_input);
-        _slice_forget_tensor.configure(&_output_lowp, &_forget_gate_input, { output_size, 0 }, { 2 * output_size, batch_size });
+        _slice_forget_tensor.configure(compile_context, &_output_lowp, &_forget_gate_input, { output_size, 0 }, { 2 * output_size, batch_size });
         _memory_group.manage(&_input_modulation_gate_input);
-        _slice_cell_tensor.configure(&_output_lowp, &_input_modulation_gate_input, { 2 * output_size, 0 }, { 3 * output_size, batch_size });
+        _slice_cell_tensor.configure(compile_context, &_output_lowp, &_input_modulation_gate_input, { 2 * output_size, 0 }, { 3 * output_size, batch_size });
         _memory_group.manage(&_output_gate_input);
-        _slice_output_tensor.configure(&_output_lowp, &_output_gate_input, { 3 * output_size, 0 }, { 4 * output_size, batch_size });
+        _slice_output_tensor.configure(compile_context, &_output_lowp, &_output_gate_input, { 3 * output_size, 0 }, { 4 * output_size, batch_size });
         _output_lowp.allocator()->allocate();
     }
     else
     {
         _memory_group.manage(&_input_gate_input);
-        _slice_input_tensor.configure(&_output_lowp, &_input_gate_input, { 0 }, { output_size });
+        _slice_input_tensor.configure(compile_context, &_output_lowp, &_input_gate_input, { 0 }, { output_size });
         _memory_group.manage(&_forget_gate_input);
-        _slice_forget_tensor.configure(&_output_lowp, &_forget_gate_input, { output_size }, { 2 * output_size });
+        _slice_forget_tensor.configure(compile_context, &_output_lowp, &_forget_gate_input, { output_size }, { 2 * output_size });
         _memory_group.manage(&_input_modulation_gate_input);
-        _slice_cell_tensor.configure(&_output_lowp, &_input_modulation_gate_input, { 2 * output_size }, { 3 * output_size });
+        _slice_cell_tensor.configure(compile_context, &_output_lowp, &_input_modulation_gate_input, { 2 * output_size }, { 3 * output_size });
         _memory_group.manage(&_output_gate_input);
-        _slice_output_tensor.configure(&_output_lowp, &_output_gate_input, { 3 * output_size }, { 4 * output_size });
+        _slice_output_tensor.configure(compile_context, &_output_lowp, &_output_gate_input, { 3 * output_size }, { 4 * output_size });
         _output_lowp.allocator()->allocate();
     }
 
     // Forget gate
     _memory_group.manage(&_forget_gate_output);
     _forget_gate_output.allocator()->init(TensorInfo(_forget_gate_input.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0));
-    _sigmoid_forget_gate.configure(&_forget_gate_input, &_forget_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
+    _sigmoid_forget_gate.configure(compile_context, &_forget_gate_input, &_forget_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
     _forget_gate_input.allocator()->allocate();
 
     // Input gate
     _memory_group.manage(&_input_gate_output);
     _input_gate_output.allocator()->init(TensorInfo(_input_gate_input.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0));
-    _sigmoid_input_gate.configure(&_input_gate_input, &_input_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
+    _sigmoid_input_gate.configure(compile_context, &_input_gate_input, &_input_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
     _input_gate_input.allocator()->allocate();
 
     // Input modulation gate equation
     _memory_group.manage(&_input_modulation_gate_output);
     _input_modulation_gate_output.allocator()->init(TensorInfo(_input_modulation_gate_input.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0));
-    _tanh_modulation_gate.configure(&_input_modulation_gate_input, &_input_modulation_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f));
+    _tanh_modulation_gate.configure(compile_context, &_input_modulation_gate_input, &_input_modulation_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f));
     _input_modulation_gate_input.allocator()->allocate();
 
     // Output gate
     _memory_group.manage(&_output_gate_output);
     _output_gate_output.allocator()->init(TensorInfo(_output_gate_input.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0));
-    _sigmoid_output_gate.configure(&_output_gate_input, &_output_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
+    _sigmoid_output_gate.configure(compile_context, &_output_gate_input, &_output_gate_output, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
     _output_gate_input.allocator()->allocate();
 
     // Long term memory
     _memory_group.manage(&_cell_state_tmp1);
     _cell_state_tmp1.allocator()->init(TensorInfo(_forget_gate_output.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_4));
-    _mul_forget_gate_cell_state.configure(&_forget_gate_output, cell_state_in, &_cell_state_tmp1, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
+    _mul_forget_gate_cell_state.configure(compile_context, &_forget_gate_output, cell_state_in, &_cell_state_tmp1, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
     _forget_gate_output.allocator()->allocate();
 
     _memory_group.manage(&_cell_state_tmp2);
     _cell_state_tmp2.allocator()->init(TensorInfo(_input_gate_output.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_4));
-    _mul_input_gate_input_mod_gate.configure(&_input_gate_output, &_input_modulation_gate_output, &_cell_state_tmp2, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
+    _mul_input_gate_input_mod_gate.configure(compile_context, &_input_gate_output, &_input_modulation_gate_output, &_cell_state_tmp2, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
     _input_modulation_gate_output.allocator()->allocate();
     _input_gate_output.allocator()->allocate();
 
-    _add_cell_state_tmps.configure(&_cell_state_tmp1, &_cell_state_tmp2, cell_state_out, ConvertPolicy::SATURATE);
+    _add_cell_state_tmps.configure(compile_context, &_cell_state_tmp1, &_cell_state_tmp2, cell_state_out, ConvertPolicy::SATURATE);
     _cell_state_tmp1.allocator()->allocate();
     _cell_state_tmp2.allocator()->allocate();
 
     // Short term memory
     _memory_group.manage(&_output_state_tmp);
     _output_state_tmp.allocator()->init(TensorInfo(cell_state_out->info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0));
-    _tanh_output_state.configure(cell_state_out, &_output_state_tmp, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f));
+    _tanh_output_state.configure(compile_context, cell_state_out, &_output_state_tmp, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f));
 
     _memory_group.manage(&_output_state_out_symm);
     _output_state_out_symm.allocator()->init(TensorInfo(_output_gate_output.info()->tensor_shape(), 1, DataType::QSYMM16, qsymm_0));
-    _mul_output_state_tmp_output_gate.configure(&_output_state_tmp, &_output_gate_output, &_output_state_out_symm, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
+    _mul_output_state_tmp_output_gate.configure(compile_context, &_output_state_tmp, &_output_gate_output, &_output_state_out_symm, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
     _output_gate_output.allocator()->allocate();
     _output_state_tmp.allocator()->allocate();
 
     // Requantize the output state from QSYMM16 to QASYMM8
     _memory_group.manage(&_output_state_out_f32);
     _output_state_out_f32.allocator()->init(TensorInfo(_output_state_out_symm.info()->tensor_shape(), 1, DataType::F32));
-    _dequantize.configure(&_output_state_out_symm, &_output_state_out_f32);
+    _dequantize.configure(compile_context, &_output_state_out_symm, &_output_state_out_f32);
     _output_state_out_symm.allocator()->allocate();
 
-    _quantize.configure(&_output_state_out_f32, output_state_out);
+    _quantize.configure(compile_context, &_output_state_out_f32, output_state_out);
     _output_state_out_f32.allocator()->allocate();
 }
commit	6a7771e460abeac7d401d6d38a0fcf0a0d2c3cbe	[log] [tgz]
author	Jenkins <bsgcomp@arm.com>	Thu May 28 11:28:36 2020 +0100
committer	Jenkins <bsgcomp@arm.com>	Thu May 28 11:28:36 2020 +0100
tree	7dfde1c353a752c65400d4b3854f7675891f86a7
parent	575c81f38edecaa662f3ee45d04ad8efded4fa81 [diff] [blame]