Clarify the expectation of scale and zeroPoint for affected ops.

  - Remove the OperandType constructor taking f_min and f_max as
    quantization parameters as it causes confusions for ops like
    LOGISTIC and SOFTMAX.
  - Update the documenation to clearly state the expected scale and
    zeroPoint for LOGISTIC, SOFTMAX and CONCATENATION.
  - Update the tests to directly input scale and zeroPoint.

Bug: 63905942
Test: mm
Test: NeuralNetworksTest pass
Change-Id: Ia450d6ce9509205d22e6383bd7e454afa0568cbb
diff --git a/nn/common/operations/Activation.cpp b/nn/common/operations/Activation.cpp
index 6850b39..091419e 100644
--- a/nn/common/operations/Activation.cpp
+++ b/nn/common/operations/Activation.cpp
@@ -127,7 +127,7 @@
 
 bool logisticQuant8(const uint8_t* inputData, const Shape& inputShape,
                     uint8_t* outputData, const Shape& outputShape) {
-    if (outputShape.offset != 0 || outputShape.scale != 1.f / 255) {
+    if (outputShape.offset != 0 || outputShape.scale != 1.f / 256) {
         LOG(ERROR) << "incorrect scale / offset for output";
         return false;
     }
@@ -176,7 +176,7 @@
         return false;
     }
 
-    if (outputShape.offset != 0 || outputShape.scale != 1.f / 255) {
+    if (outputShape.offset != 0 || outputShape.scale != 1.f / 256) {
         LOG(ERROR) << "incorrect scale / offset for output";
         return false;
     }
diff --git a/nn/runtime/include/NeuralNetworks.h b/nn/runtime/include/NeuralNetworks.h
index 4600b6b..fdfddc0 100644
--- a/nn/runtime/include/NeuralNetworks.h
+++ b/nn/runtime/include/NeuralNetworks.h
@@ -172,10 +172,12 @@
      * Supported tensor rank: up to 4
      *
      * Inputs:
-     * 0 ~ n: The list on n input tensors, of shape [D0, D1, ..., Daxis(i), ..., Dm]
-     * n+1: An INT32 value, specifying the concatenation axis.
-     * n+2: An INT32 value, and has to be one of the {@link FuseCode} values.
-     *      Specifies the activation to invoke on the result of each addition.
+     * * 0 ~ n: The list on n input tensors, of shape [D0, D1, ..., Daxis(i), ..., Dm].
+     *          For inputs of type {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, all
+     *          input tensors must have the same scale and zeroPoint.
+     * * n+1: An INT32 value, specifying the concatenation axis.
+     * * n+2: An INT32 value, and has to be one of the {@link FuseCode} values.
+     *        Specifies the activation to invoke on the result of each addition.
      *
      * Outputs:
      * * 0: The output, a tensor of the same type as the input tensors.
@@ -506,7 +508,9 @@
      * * 0: A tensor, specifying the input.
      *
      * Outputs:
-     * * 0: The output tensor of same shape as input0, with range [0.0, 1.0].
+     * * 0: The output tensor of same shape as input0.
+     *      For {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type,
+     *      the scale must be 1.f / 256 and the zeroPoint must be 0.
      */
     ANEURALNETWORKS_LOGISTIC = 14,
 
@@ -872,7 +876,9 @@
      * * 1: A FLOAT32 value, specifying the positive scaling factor for the exponent, beta.
      *
      * Outputs:
-     * * 0: The output tensor of same shape as input0, with range [0.0, 1.0].
+     * * 0: The output tensor of same shape as input0.
+     *      For {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type,
+     *      the scale must be 1.f / 256 and the zeroPoint must be 0.
      */
     ANEURALNETWORKS_SOFTMAX = 25,
 
diff --git a/nn/runtime/include/NeuralNetworksWrapper.h b/nn/runtime/include/NeuralNetworksWrapper.h
index 133e754..3521fc7 100644
--- a/nn/runtime/include/NeuralNetworksWrapper.h
+++ b/nn/runtime/include/NeuralNetworksWrapper.h
@@ -56,31 +56,15 @@
     // int32_t type;
     std::vector<uint32_t> dimensions;
 
-    OperandType(Type type, const std::vector<uint32_t>& d) : dimensions(d) {
+    OperandType(Type type, const std::vector<uint32_t>& d,
+                float scale = 0.0f, int32_t zeroPoint = 0) : dimensions(d) {
         operandType.type = static_cast<int32_t>(type);
-        operandType.scale = 0.0f;
-        operandType.zeroPoint = 0;
+        operandType.scale = scale;
+        operandType.zeroPoint = zeroPoint;
 
         operandType.dimensionCount = static_cast<uint32_t>(dimensions.size());
         operandType.dimensions = dimensions.data();
     }
-
-    OperandType(Type type, float scale, const std::vector<uint32_t>& d) : OperandType(type, d) {
-        operandType.scale = scale;
-    }
-
-    OperandType(Type type, float f_min, float f_max, const std::vector<uint32_t>& d)
-        : OperandType(type, d) {
-        uint8_t q_min = std::numeric_limits<uint8_t>::min();
-        uint8_t q_max = std::numeric_limits<uint8_t>::max();
-        float range = q_max - q_min;
-        float scale = (f_max - f_min) / range;
-        int32_t zeroPoint =
-                    fmin(q_max, fmax(q_min, static_cast<uint8_t>(round(q_min - f_min / scale))));
-
-        operandType.scale = scale;
-        operandType.zeroPoint = zeroPoint;
-    }
 };
 
 class Memory {
diff --git a/nn/runtime/test/generated/models/avg_pool_quant8.model.cpp b/nn/runtime/test/generated/models/avg_pool_quant8.model.cpp
index ea3bf49..562aa39 100644
--- a/nn/runtime/test/generated/models/avg_pool_quant8.model.cpp
+++ b/nn/runtime/test/generated/models/avg_pool_quant8.model.cpp
@@ -1,7 +1,7 @@
 // Generated file (from: avg_pool_quant8.mod.py). Do not edit
 void CreateModel(Model *model) {
   OperandType type1(Type::INT32, {});
-  OperandType type0(Type::TENSOR_QUANT8_ASYMM, 0.0f, 127.5f, {1, 2, 2, 1});
+  OperandType type0(Type::TENSOR_QUANT8_ASYMM, {1, 2, 2, 1}, 0.5f, 0);
   // Phase 1, operands
   auto op1 = model->addOperand(&type0);
   auto cons1 = model->addOperand(&type1);
diff --git a/nn/runtime/test/generated/models/concat_quant8.model.cpp b/nn/runtime/test/generated/models/concat_quant8.model.cpp
index 30e771d..4b3ff8e 100644
--- a/nn/runtime/test/generated/models/concat_quant8.model.cpp
+++ b/nn/runtime/test/generated/models/concat_quant8.model.cpp
@@ -1,8 +1,8 @@
 // Generated file (from: concat_quant8.mod.py). Do not edit
 void CreateModel(Model *model) {
   OperandType type1(Type::INT32, {});
-  OperandType type0(Type::TENSOR_QUANT8_ASYMM, 0.0f, 127.5f, {2, 3});
-  OperandType type2(Type::TENSOR_QUANT8_ASYMM, 0.0f, 127.5f, {2, 6});
+  OperandType type0(Type::TENSOR_QUANT8_ASYMM, {2, 3}, 0.5f, 0);
+  OperandType type2(Type::TENSOR_QUANT8_ASYMM, {2, 6}, 0.5f, 0);
   // Phase 1, operands
   auto op1 = model->addOperand(&type0);
   auto op2 = model->addOperand(&type0);
diff --git a/nn/runtime/test/generated/models/conv_quant8.model.cpp b/nn/runtime/test/generated/models/conv_quant8.model.cpp
index e148228..5e48efd 100644
--- a/nn/runtime/test/generated/models/conv_quant8.model.cpp
+++ b/nn/runtime/test/generated/models/conv_quant8.model.cpp
@@ -1,10 +1,10 @@
 // Generated file (from: conv_quant8.mod.py). Do not edit
 void CreateModel(Model *model) {
   OperandType type3(Type::INT32, {});
-  OperandType type1(Type::TENSOR_QUANT8_ASYMM, 0.0f, 127.5f, {1, 2, 2, 1});
-  OperandType type0(Type::TENSOR_QUANT8_ASYMM, 0.0f, 127.5f, {1, 3, 3, 1});
-  OperandType type4(Type::TENSOR_QUANT8_ASYMM, 0.0f, 255.0f, {1, 2, 2, 1});
-  OperandType type2(Type::TENSOR_QUANT8_ASYMM, 0.0f, 63.75f, {1});
+  OperandType type1(Type::TENSOR_QUANT8_ASYMM, {1, 2, 2, 1}, 0.5f, 0);
+  OperandType type4(Type::TENSOR_QUANT8_ASYMM, {1, 2, 2, 1}, 1.f, 0);
+  OperandType type0(Type::TENSOR_QUANT8_ASYMM, {1, 3, 3, 1}, 0.5f, 0);
+  OperandType type2(Type::TENSOR_QUANT8_ASYMM, {1}, 0.25f, 0);
   // Phase 1, operands
   auto op1 = model->addOperand(&type0);
   auto op2 = model->addOperand(&type1);
diff --git a/nn/runtime/test/generated/models/depth_to_space_quant8_1.model.cpp b/nn/runtime/test/generated/models/depth_to_space_quant8_1.model.cpp
index 53aa350..fe2b0fb 100644
--- a/nn/runtime/test/generated/models/depth_to_space_quant8_1.model.cpp
+++ b/nn/runtime/test/generated/models/depth_to_space_quant8_1.model.cpp
@@ -1,7 +1,7 @@
 // Generated file (from: depth_to_space_quant8_1.mod.py). Do not edit
 void CreateModel(Model *model) {
   OperandType type1(Type::INT32, {});
-  OperandType type0(Type::TENSOR_QUANT8_ASYMM, 0.0f, 127.5f, {1, 1, 1, 8});
+  OperandType type0(Type::TENSOR_QUANT8_ASYMM, {1, 1, 1, 8}, 0.5f, 0);
   OperandType type2(Type::TENSOR_QUANT8_ASYMM, {1, 2, 2, 2});
   // Phase 1, operands
   auto input = model->addOperand(&type0);
diff --git a/nn/runtime/test/generated/models/depthwise_conv_2d_quant8.model.cpp b/nn/runtime/test/generated/models/depthwise_conv_2d_quant8.model.cpp
index f2842c8..4aa76cf 100644
--- a/nn/runtime/test/generated/models/depthwise_conv_2d_quant8.model.cpp
+++ b/nn/runtime/test/generated/models/depthwise_conv_2d_quant8.model.cpp
@@ -1,9 +1,9 @@
 // Generated file (from: depthwise_conv_2d_quant8.mod.py). Do not edit
 void CreateModel(Model *model) {
   OperandType type2(Type::INT32, {});
-  OperandType type1(Type::TENSOR_INT32, 0.0f, 63.75f, {2});
-  OperandType type0(Type::TENSOR_QUANT8_ASYMM, 0.0f, 127.5f, {1, 2, 2, 2});
-  OperandType type3(Type::TENSOR_QUANT8_ASYMM, 0.0f, 255.0f, {2});
+  OperandType type1(Type::TENSOR_INT32, {2}, 0.25f, 0);
+  OperandType type0(Type::TENSOR_QUANT8_ASYMM, {1, 2, 2, 2}, 0.5f, 0);
+  OperandType type3(Type::TENSOR_QUANT8_ASYMM, {2}, 1.f, 0);
   // Phase 1, operands
   auto op1 = model->addOperand(&type0);
   auto op2 = model->addOperand(&type0);
diff --git a/nn/runtime/test/generated/models/dequantize.model.cpp b/nn/runtime/test/generated/models/dequantize.model.cpp
index 202f263..f0194b1 100644
--- a/nn/runtime/test/generated/models/dequantize.model.cpp
+++ b/nn/runtime/test/generated/models/dequantize.model.cpp
@@ -1,7 +1,7 @@
 // Generated file (from: dequantize.mod.py). Do not edit
 void CreateModel(Model *model) {
   OperandType type1(Type::TENSOR_FLOAT32, {1, 2, 2, 1});
-  OperandType type0(Type::TENSOR_QUANT8_ASYMM, 0.0f, 255.0f, {1, 2, 2, 1});
+  OperandType type0(Type::TENSOR_QUANT8_ASYMM, {1, 2, 2, 1}, 1.f, 0);
   // Phase 1, operands
   auto op1 = model->addOperand(&type0);
   auto op2 = model->addOperand(&type1);
diff --git a/nn/runtime/test/generated/models/fully_connected_quant8.model.cpp b/nn/runtime/test/generated/models/fully_connected_quant8.model.cpp
index 2b2e8a5..4a644fa 100644
--- a/nn/runtime/test/generated/models/fully_connected_quant8.model.cpp
+++ b/nn/runtime/test/generated/models/fully_connected_quant8.model.cpp
@@ -1,10 +1,10 @@
 // Generated file (from: fully_connected_quant8.mod.py). Do not edit
 void CreateModel(Model *model) {
   OperandType type4(Type::INT32, {});
-  OperandType type1(Type::TENSOR_QUANT8_ASYMM, 0.0f, 127.5f, {1, 1});
-  OperandType type0(Type::TENSOR_QUANT8_ASYMM, 0.0f, 127.5f, {3});
-  OperandType type3(Type::TENSOR_QUANT8_ASYMM, 0.0f, 255.0f,{3});
-  OperandType type2(Type::TENSOR_QUANT8_ASYMM, 0.0f, 63.75f, {1});
+  OperandType type1(Type::TENSOR_QUANT8_ASYMM, {1, 1}, 0.5f, 0);
+  OperandType type2(Type::TENSOR_QUANT8_ASYMM, {1}, 0.25f, 0);
+  OperandType type0(Type::TENSOR_QUANT8_ASYMM, {3}, 0.5f, 0);
+  OperandType type3(Type::TENSOR_QUANT8_ASYMM, {3}, 1.f, 0);
   // Phase 1, operands
   auto op1 = model->addOperand(&type0);
   auto op2 = model->addOperand(&type1);
diff --git a/nn/runtime/test/generated/models/hashtable_lookup_quant8.model.cpp b/nn/runtime/test/generated/models/hashtable_lookup_quant8.model.cpp
index f7f1575..7483868 100644
--- a/nn/runtime/test/generated/models/hashtable_lookup_quant8.model.cpp
+++ b/nn/runtime/test/generated/models/hashtable_lookup_quant8.model.cpp
@@ -2,8 +2,8 @@
 void CreateModel(Model *model) {
   OperandType type1(Type::TENSOR_FLOAT32, {3});
   OperandType type0(Type::TENSOR_FLOAT32, {4});
-  OperandType type2(Type::TENSOR_QUANT8_ASYMM, 0.0f, 127.5f, {3, 2});
-  OperandType type3(Type::TENSOR_QUANT8_ASYMM, 0.0f, 127.5f, {4, 2});
+  OperandType type2(Type::TENSOR_QUANT8_ASYMM, {3, 2}, 0.5f, 0);
+  OperandType type3(Type::TENSOR_QUANT8_ASYMM, {4, 2}, 0.5f, 0);
   // Phase 1, operands
   auto lookup = model->addOperand(&type0);
   auto key = model->addOperand(&type1);
diff --git a/nn/runtime/test/generated/models/max_pool_quant8.model.cpp b/nn/runtime/test/generated/models/max_pool_quant8.model.cpp
index fff1a61..da3e993 100644
--- a/nn/runtime/test/generated/models/max_pool_quant8.model.cpp
+++ b/nn/runtime/test/generated/models/max_pool_quant8.model.cpp
@@ -1,7 +1,7 @@
 // Generated file (from: max_pool_quant8.mod.py). Do not edit
 void CreateModel(Model *model) {
   OperandType type1(Type::INT32, {});
-  OperandType type0(Type::TENSOR_QUANT8_ASYMM, 0.0f, 127.5f, {1, 2, 2, 1});
+  OperandType type0(Type::TENSOR_QUANT8_ASYMM, {1, 2, 2, 1}, 0.5f, 0);
   // Phase 1, operands
   auto op1 = model->addOperand(&type0);
   auto cons1 = model->addOperand(&type1);
diff --git a/nn/runtime/test/generated/models/relu1_quant8.model.cpp b/nn/runtime/test/generated/models/relu1_quant8.model.cpp
index 124ba4e..75a407d 100644
--- a/nn/runtime/test/generated/models/relu1_quant8.model.cpp
+++ b/nn/runtime/test/generated/models/relu1_quant8.model.cpp
@@ -1,6 +1,6 @@
 // Generated file (from: relu1_quant8.mod.py). Do not edit
 void CreateModel(Model *model) {
-  OperandType type0(Type::TENSOR_QUANT8_ASYMM, 0.0f, 127.5f, {1, 2, 2, 1});
+  OperandType type0(Type::TENSOR_QUANT8_ASYMM, {1, 2, 2, 1}, 0.5f, 0);
   // Phase 1, operands
   auto op1 = model->addOperand(&type0);
   auto op2 = model->addOperand(&type0);
diff --git a/nn/runtime/test/generated/models/relu6_quant8.model.cpp b/nn/runtime/test/generated/models/relu6_quant8.model.cpp
index c8d3e15..fa0b41e 100644
--- a/nn/runtime/test/generated/models/relu6_quant8.model.cpp
+++ b/nn/runtime/test/generated/models/relu6_quant8.model.cpp
@@ -1,6 +1,6 @@
 // Generated file (from: relu6_quant8.mod.py). Do not edit
 void CreateModel(Model *model) {
-  OperandType type0(Type::TENSOR_QUANT8_ASYMM, 0.0f, 127.5f, {1, 2, 2, 1});
+  OperandType type0(Type::TENSOR_QUANT8_ASYMM, {1, 2, 2, 1}, 0.5f, 0);
   // Phase 1, operands
   auto op1 = model->addOperand(&type0);
   auto op2 = model->addOperand(&type0);
diff --git a/nn/runtime/test/generated/models/relu_quant8.model.cpp b/nn/runtime/test/generated/models/relu_quant8.model.cpp
index 77e5f32..3c2423b 100644
--- a/nn/runtime/test/generated/models/relu_quant8.model.cpp
+++ b/nn/runtime/test/generated/models/relu_quant8.model.cpp
@@ -1,6 +1,6 @@
 // Generated file (from: relu_quant8.mod.py). Do not edit
 void CreateModel(Model *model) {
-  OperandType type0(Type::TENSOR_QUANT8_ASYMM, -127.5f, 127.5f, {1, 2, 2, 1});
+  OperandType type0(Type::TENSOR_QUANT8_ASYMM, {1, 2, 2, 1}, 1.f, 128);
   // Phase 1, operands
   auto op1 = model->addOperand(&type0);
   auto op2 = model->addOperand(&type0);
diff --git a/nn/runtime/test/generated/models/reshape_quant8.model.cpp b/nn/runtime/test/generated/models/reshape_quant8.model.cpp
index 6eeb96f..19f30cc 100644
--- a/nn/runtime/test/generated/models/reshape_quant8.model.cpp
+++ b/nn/runtime/test/generated/models/reshape_quant8.model.cpp
@@ -1,8 +1,8 @@
 // Generated file (from: reshape_quant8.mod.py). Do not edit
 void CreateModel(Model *model) {
   OperandType type1(Type::TENSOR_INT32, {1});
-  OperandType type0(Type::TENSOR_QUANT8_ASYMM, 0.0f, 255.0f, {1, 1, 3, 3});
-  OperandType type2(Type::TENSOR_QUANT8_ASYMM, 0.0f, 255.0f, {9});
+  OperandType type0(Type::TENSOR_QUANT8_ASYMM, {1, 1, 3, 3}, 1.f, 0);
+  OperandType type2(Type::TENSOR_QUANT8_ASYMM, {9}, 1.f, 0);
   // Phase 1, operands
   auto op1 = model->addOperand(&type0);
   auto op2 = model->addOperand(&type1);
diff --git a/nn/runtime/test/generated/models/sigmoid_quant8.model.cpp b/nn/runtime/test/generated/models/sigmoid_quant8.model.cpp
index 21afe65..4f7009f 100644
--- a/nn/runtime/test/generated/models/sigmoid_quant8.model.cpp
+++ b/nn/runtime/test/generated/models/sigmoid_quant8.model.cpp
@@ -1,7 +1,7 @@
 // Generated file (from: sigmoid_quant8.mod.py). Do not edit
 void CreateModel(Model *model) {
-  OperandType type1(Type::TENSOR_QUANT8_ASYMM, 0.0f, 1.0f, {1, 2, 2, 1});
-  OperandType type0(Type::TENSOR_QUANT8_ASYMM, 0.0f, 127.0f, {1, 2, 2, 1});
+  OperandType type0(Type::TENSOR_QUANT8_ASYMM, {1, 2, 2, 1}, 0.5f, 0);
+  OperandType type1(Type::TENSOR_QUANT8_ASYMM, {1, 2, 2, 1}, 1.f/256, 0);
   // Phase 1, operands
   auto op1 = model->addOperand(&type0);
   auto op3 = model->addOperand(&type1);
diff --git a/nn/runtime/test/generated/models/softmax_quant8_1.model.cpp b/nn/runtime/test/generated/models/softmax_quant8_1.model.cpp
index 6c4f9a0..04e664b 100644
--- a/nn/runtime/test/generated/models/softmax_quant8_1.model.cpp
+++ b/nn/runtime/test/generated/models/softmax_quant8_1.model.cpp
@@ -1,8 +1,8 @@
 // Generated file (from: softmax_quant8_1.mod.py). Do not edit
 void CreateModel(Model *model) {
   OperandType type1(Type::FLOAT32, {});
-  OperandType type2(Type::TENSOR_QUANT8_ASYMM, 0.0f, 1.0f, {1, 4});
-  OperandType type0(Type::TENSOR_QUANT8_ASYMM, 0.0f, 127.5f, {1, 4});
+  OperandType type0(Type::TENSOR_QUANT8_ASYMM, {1, 4}, 0.5f, 0);
+  OperandType type2(Type::TENSOR_QUANT8_ASYMM, {1, 4}, 1.f / 256, 0);
   // Phase 1, operands
   auto input = model->addOperand(&type0);
   auto beta = model->addOperand(&type1);
diff --git a/nn/runtime/test/generated/models/softmax_quant8_2.model.cpp b/nn/runtime/test/generated/models/softmax_quant8_2.model.cpp
index 4dfca7d..d51b685 100644
--- a/nn/runtime/test/generated/models/softmax_quant8_2.model.cpp
+++ b/nn/runtime/test/generated/models/softmax_quant8_2.model.cpp
@@ -1,8 +1,8 @@
 // Generated file (from: softmax_quant8_2.mod.py). Do not edit
 void CreateModel(Model *model) {
   OperandType type1(Type::FLOAT32, {});
-  OperandType type2(Type::TENSOR_QUANT8_ASYMM, 0.0f, 1.0f, {2, 5});
-  OperandType type0(Type::TENSOR_QUANT8_ASYMM, 0.0f, 127.5f, {2, 5});
+  OperandType type0(Type::TENSOR_QUANT8_ASYMM, {2, 5}, 0.5f, 0);
+  OperandType type2(Type::TENSOR_QUANT8_ASYMM, {2, 5}, 1.f / 256, 0);
   // Phase 1, operands
   auto input = model->addOperand(&type0);
   auto beta = model->addOperand(&type1);
diff --git a/nn/runtime/test/generated/models/space_to_depth_quant8_1.model.cpp b/nn/runtime/test/generated/models/space_to_depth_quant8_1.model.cpp
index c9b1a4b..fc3c8e0 100644
--- a/nn/runtime/test/generated/models/space_to_depth_quant8_1.model.cpp
+++ b/nn/runtime/test/generated/models/space_to_depth_quant8_1.model.cpp
@@ -1,8 +1,8 @@
 // Generated file (from: space_to_depth_quant8_1.mod.py). Do not edit
 void CreateModel(Model *model) {
   OperandType type1(Type::INT32, {});
-  OperandType type0(Type::TENSOR_QUANT8_ASYMM, 0.0f, 127.5f, {1, 2, 2, 2});
   OperandType type2(Type::TENSOR_QUANT8_ASYMM, {1, 1, 1, 8});
+  OperandType type0(Type::TENSOR_QUANT8_ASYMM, {1, 2, 2, 2}, 0.5f, 0);
   // Phase 1, operands
   auto input = model->addOperand(&type0);
   auto radius = model->addOperand(&type1);
diff --git a/nn/runtime/test/generated/vts_models/sigmoid_quant8.model.cpp b/nn/runtime/test/generated/vts_models/sigmoid_quant8.model.cpp
index 12ba2ea..eb5ba1b 100644
--- a/nn/runtime/test/generated/vts_models/sigmoid_quant8.model.cpp
+++ b/nn/runtime/test/generated/vts_models/sigmoid_quant8.model.cpp
@@ -6,7 +6,7 @@
             .type = OperandType::TENSOR_QUANT8_ASYMM,
             .dimensions = {1, 2, 2, 1},
             .numberOfConsumers = 1,
-            .scale = 0.4980392156862745f,
+            .scale = 0.5f,
             .zeroPoint = 0,
             .lifetime = OperandLifeTime::MODEL_INPUT,
             .location = {.poolIndex = 0, .offset = 0, .length = 0},
@@ -15,7 +15,7 @@
             .type = OperandType::TENSOR_QUANT8_ASYMM,
             .dimensions = {1, 2, 2, 1},
             .numberOfConsumers = 0,
-            .scale = 0.4980392156862745f,
+            .scale = 1.f / 256,
             .zeroPoint = 0,
             .lifetime = OperandLifeTime::MODEL_OUTPUT,
             .location = {.poolIndex = 0, .offset = 0, .length = 0},
diff --git a/nn/runtime/test/generated/vts_models/softmax_quant8_1.model.cpp b/nn/runtime/test/generated/vts_models/softmax_quant8_1.model.cpp
index 7cd6e27..faf8df3 100644
--- a/nn/runtime/test/generated/vts_models/softmax_quant8_1.model.cpp
+++ b/nn/runtime/test/generated/vts_models/softmax_quant8_1.model.cpp
@@ -24,7 +24,7 @@
             .type = OperandType::TENSOR_QUANT8_ASYMM,
             .dimensions = {1, 4},
             .numberOfConsumers = 0,
-            .scale = 0.5f,
+            .scale = 1.f / 256,
             .zeroPoint = 0,
             .lifetime = OperandLifeTime::MODEL_OUTPUT,
             .location = {.poolIndex = 0, .offset = 0, .length = 0},
@@ -42,7 +42,7 @@
     const std::vector<uint32_t> inputIndexes = {0};
     const std::vector<uint32_t> outputIndexes = {2};
     std::vector<uint8_t> operandValues = {
-      0, 0, 0, 0
+      172, 197, 39, 55
     };
     const std::vector<hidl_memory> pools = {};
 
diff --git a/nn/runtime/test/generated/vts_models/softmax_quant8_2.model.cpp b/nn/runtime/test/generated/vts_models/softmax_quant8_2.model.cpp
index 1d342c5..7ff021a 100644
--- a/nn/runtime/test/generated/vts_models/softmax_quant8_2.model.cpp
+++ b/nn/runtime/test/generated/vts_models/softmax_quant8_2.model.cpp
@@ -24,7 +24,7 @@
             .type = OperandType::TENSOR_QUANT8_ASYMM,
             .dimensions = {2, 5},
             .numberOfConsumers = 0,
-            .scale = 0.5f,
+            .scale = 1.f / 256,
             .zeroPoint = 0,
             .lifetime = OperandLifeTime::MODEL_OUTPUT,
             .location = {.poolIndex = 0, .offset = 0, .length = 0},
diff --git a/nn/runtime/test/specs/avg_pool_quant8.mod.py b/nn/runtime/test/specs/avg_pool_quant8.mod.py
index 88da209..e40a02c 100644
--- a/nn/runtime/test/specs/avg_pool_quant8.mod.py
+++ b/nn/runtime/test/specs/avg_pool_quant8.mod.py
@@ -1,10 +1,10 @@
 # model
 model = Model()
-i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "0.0f, 127.5f, {1, 2, 2, 1}")
+i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 0.5f, 0")
 cons1 = Int32Scalar("cons1", 1)
 pad0 = Int32Scalar("pad0", 0)
 act = Int32Scalar("act", 0)
-o = Output("op3", "TENSOR_QUANT8_ASYMM", "0.0f, 127.5f, {1, 2, 2, 1}")
+o = Output("op3", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 0.5f, 0")
 model = model.Operation("AVERAGE_POOL_2D", i1, pad0, pad0, pad0, pad0, cons1, cons1, cons1, cons1, act).To(o)
 
 # Example 1. Input in operand 0,
diff --git a/nn/runtime/test/specs/concat_quant8.mod.py b/nn/runtime/test/specs/concat_quant8.mod.py
index c9e7340..b9256f6 100644
--- a/nn/runtime/test/specs/concat_quant8.mod.py
+++ b/nn/runtime/test/specs/concat_quant8.mod.py
@@ -1,10 +1,10 @@
 # model
 model = Model()
-i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "0.0f, 127.5f, {2, 3}") # input 0
-i2 = Input("op2", "TENSOR_QUANT8_ASYMM", "0.0f, 127.5f, {2, 3}") # input 1
+i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{2, 3}, 0.5f, 0") # input 0
+i2 = Input("op2", "TENSOR_QUANT8_ASYMM", "{2, 3}, 0.5f, 0") # input 1
 axis1 = Int32Scalar("axis1", 1)
 act0 = Int32Scalar("act0", 0)
-r = Output("result", "TENSOR_QUANT8_ASYMM", "0.0f, 127.5f, {2, 6}") # output
+r = Output("result", "TENSOR_QUANT8_ASYMM", "{2, 6}, 0.5f, 0") # output
 model = model.Operation("CONCATENATION", i1, i2, axis1, act0).To(r)
 
 # Example 1.
diff --git a/nn/runtime/test/specs/conv_quant8.mod.py b/nn/runtime/test/specs/conv_quant8.mod.py
index 615601e..9793987 100644
--- a/nn/runtime/test/specs/conv_quant8.mod.py
+++ b/nn/runtime/test/specs/conv_quant8.mod.py
@@ -1,13 +1,13 @@
 model = Model()
-i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "0.0f, 127.5f, {1, 3, 3, 1}")
-f1 = Input("op2", "TENSOR_QUANT8_ASYMM", "0.0f, 127.5f, {1, 2, 2, 1}")
-b1 = Input("op3", "TENSOR_QUANT8_ASYMM", "0.0f, 63.75f, {1}")
+i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{1, 3, 3, 1}, 0.5f, 0")
+f1 = Input("op2", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 0.5f, 0")
+b1 = Input("op3", "TENSOR_QUANT8_ASYMM", "{1}, 0.25f, 0")
 pad0 = Int32Scalar("pad0", 0)
 act = Int32Scalar("act", 0)
 stride = Int32Scalar("stride", 1)
 # output dimension:
 #     (i1.height - f1.height + 1) x (i1.width - f1.width + 1)
-output = Output("op4", "TENSOR_QUANT8_ASYMM", "0.0f, 255.0f, {1, 2, 2, 1}")
+output = Output("op4", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 1.f, 0")
 
 model = model.Operation("CONV_2D", i1, f1, b1, pad0, pad0, pad0, pad0, stride, stride, act).To(output)
 
diff --git a/nn/runtime/test/specs/depth_to_space_quant8_1.mod.py b/nn/runtime/test/specs/depth_to_space_quant8_1.mod.py
index d062e10..c9414a8 100644
--- a/nn/runtime/test/specs/depth_to_space_quant8_1.mod.py
+++ b/nn/runtime/test/specs/depth_to_space_quant8_1.mod.py
@@ -1,5 +1,5 @@
 model = Model()
-i1 = Input("input", "TENSOR_QUANT8_ASYMM", "0.0f, 127.5f, {1, 1, 1, 8}")
+i1 = Input("input", "TENSOR_QUANT8_ASYMM", "{1, 1, 1, 8}, 0.5f, 0")
 block = Int32Scalar("radius", 2)
 output = Output("output", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 2}")
 
diff --git a/nn/runtime/test/specs/depthwise_conv_2d_quant8.mod.py b/nn/runtime/test/specs/depthwise_conv_2d_quant8.mod.py
index 457baa6..653caac 100644
--- a/nn/runtime/test/specs/depthwise_conv_2d_quant8.mod.py
+++ b/nn/runtime/test/specs/depthwise_conv_2d_quant8.mod.py
@@ -1,12 +1,12 @@
 model = Model()
-i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "0.0f, 127.5f, {1, 2, 2, 2}")
-f1 = Input("op2", "TENSOR_QUANT8_ASYMM", "0.0f, 127.5f, {1, 2, 2, 2}")
-b1 = Input("op3", "TENSOR_INT32", "0.0f, 63.75f, {2}")
+i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 2}, 0.5f, 0")
+f1 = Input("op2", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 2}, 0.5f, 0")
+b1 = Input("op3", "TENSOR_INT32", "{2}, 0.25f, 0")
 pad0 = Int32Scalar("pad0", 0)
 act = Int32Scalar("act", 0)
 stride = Int32Scalar("stride", 1)
 cm = Int32Scalar("channelMultiplier", 1)
-output = Output("op4", "TENSOR_QUANT8_ASYMM", "0.0f, 255.0f, {2}")
+output = Output("op4", "TENSOR_QUANT8_ASYMM", "{2}, 1.f, 0")
 
 model = model.Operation("DEPTHWISE_CONV_2D",
                         i1, f1, b1,
diff --git a/nn/runtime/test/specs/dequantize.mod.py b/nn/runtime/test/specs/dequantize.mod.py
index 6e491e3..bef7a32 100644
--- a/nn/runtime/test/specs/dequantize.mod.py
+++ b/nn/runtime/test/specs/dequantize.mod.py
@@ -1,6 +1,6 @@
 # model
 model = Model()
-i1 = Input("op1",  "TENSOR_QUANT8_ASYMM", "0.0f, 255.0f, {1, 2, 2, 1}")
+i1 = Input("op1",  "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 1.f, 0")
 i2 = Output("op2", "TENSOR_FLOAT32", "{1, 2, 2, 1}")
 model = model.Operation("DEQUANTIZE", i1).To(i2)
 
diff --git a/nn/runtime/test/specs/fully_connected_quant8.mod.py b/nn/runtime/test/specs/fully_connected_quant8.mod.py
index 1dc108c..3713bd3 100644
--- a/nn/runtime/test/specs/fully_connected_quant8.mod.py
+++ b/nn/runtime/test/specs/fully_connected_quant8.mod.py
@@ -1,10 +1,10 @@
 # model
 
 model = Model()
-in0 = Input("op1", "TENSOR_QUANT8_ASYMM", "0.0f, 127.5f, {3}")
-weights = Input("op2", "TENSOR_QUANT8_ASYMM", "0.0f, 127.5f, {1, 1}")
-bias = Input("b0", "TENSOR_QUANT8_ASYMM", "0.0f, 63.75f, {1}")
-out0 = Output("op3", "TENSOR_QUANT8_ASYMM", "0.0f, 255.0f,{3}")
+in0 = Input("op1", "TENSOR_QUANT8_ASYMM", "{3}, 0.5f, 0")
+weights = Input("op2", "TENSOR_QUANT8_ASYMM", "{1, 1}, 0.5f, 0")
+bias = Input("b0", "TENSOR_QUANT8_ASYMM", "{1}, 0.25f, 0")
+out0 = Output("op3", "TENSOR_QUANT8_ASYMM", "{3}, 1.f, 0")
 act = Int32Scalar("act", 0)
 model = model.Operation("FULLY_CONNECTED", in0, weights, bias, act).To(out0)
 
diff --git a/nn/runtime/test/specs/hashtable_lookup_quant8.mod.py b/nn/runtime/test/specs/hashtable_lookup_quant8.mod.py
index c5778aa..9666026 100644
--- a/nn/runtime/test/specs/hashtable_lookup_quant8.mod.py
+++ b/nn/runtime/test/specs/hashtable_lookup_quant8.mod.py
@@ -29,8 +29,8 @@
 # lookup, key, and hits are all int32 stored as float32.
 lookup = Input("lookup", "TENSOR_FLOAT32", "{%d}" % (lookups))
 key = Input("key", "TENSOR_FLOAT32", "{%d}" % (keys))
-value = Input("value", "TENSOR_QUANT8_ASYMM", "0.0f, 127.5f, {%d, %d}" % (rows, features))
-output = Output("output", "TENSOR_QUANT8_ASYMM", "0.0f, 127.5f, {%d, %d}" % (lookups, features))
+value = Input("value", "TENSOR_QUANT8_ASYMM", "{%d, %d}, 0.5f, 0" % (rows, features))
+output = Output("output", "TENSOR_QUANT8_ASYMM", "{%d, %d}, 0.5f, 0" % (lookups, features))
 hits = Output("hits", "TENSOR_FLOAT32", "{%d}" % (lookups))
 model = model.Operation("HASHTABLE_LOOKUP", lookup, key, value).To(output)
 
diff --git a/nn/runtime/test/specs/max_pool_quant8.mod.py b/nn/runtime/test/specs/max_pool_quant8.mod.py
index 922dc77..b2edd06 100644
--- a/nn/runtime/test/specs/max_pool_quant8.mod.py
+++ b/nn/runtime/test/specs/max_pool_quant8.mod.py
@@ -1,10 +1,10 @@
 # model
 model = Model()
-i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "0.0f, 127.5f, {1, 2, 2, 1}") # input 0
+i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 0.5f, 0") # input 0
 cons1 = Int32Scalar("cons1", 1)
 pad0 = Int32Scalar("pad0", 0)
 act = Int32Scalar("act", 0)
-i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "0.0f, 127.5f, {1, 2, 2, 1}") # output 0
+i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 0.5f, 0") # output 0
 model = model.Operation("MAX_POOL_2D", i1, pad0, pad0, pad0, pad0, cons1, cons1, cons1, cons1, act).To(i3)
 # Example 1. Input in operand 0,
 input0 = {i1: # input 0
diff --git a/nn/runtime/test/specs/relu1_quant8.mod.py b/nn/runtime/test/specs/relu1_quant8.mod.py
index fccee7d..31a1a71 100644
--- a/nn/runtime/test/specs/relu1_quant8.mod.py
+++ b/nn/runtime/test/specs/relu1_quant8.mod.py
@@ -1,7 +1,7 @@
 # model
 model = Model()
-i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "0.0f, 127.5f, {1, 2, 2, 1}") # input 0
-o = Output("op2", "TENSOR_QUANT8_ASYMM", "0.0f, 127.5f, {1, 2, 2, 1}") # output 0
+i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 0.5f, 0") # input 0
+o = Output("op2", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 0.5f, 0") # output 0
 model = model.Operation("RELU1", i1).To(o)
 
 # Example 1. Input in operand 0,
diff --git a/nn/runtime/test/specs/relu6_quant8.mod.py b/nn/runtime/test/specs/relu6_quant8.mod.py
index 26a6f09..386d73b 100644
--- a/nn/runtime/test/specs/relu6_quant8.mod.py
+++ b/nn/runtime/test/specs/relu6_quant8.mod.py
@@ -1,7 +1,7 @@
 # model
 model = Model()
-i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "0.0f, 127.5f, {1, 2, 2, 1}") # input 0
-i2 = Output("op2", "TENSOR_QUANT8_ASYMM", "0.0f, 127.5f, {1, 2, 2, 1}") # output 0
+i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 0.5f, 0") # input 0
+i2 = Output("op2", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 0.5f, 0") # output 0
 model = model.Operation("RELU6", i1).To(i2)
 
 # Example 1. Input in operand 0,
diff --git a/nn/runtime/test/specs/relu_quant8.mod.py b/nn/runtime/test/specs/relu_quant8.mod.py
index da718b3..05337b6 100644
--- a/nn/runtime/test/specs/relu_quant8.mod.py
+++ b/nn/runtime/test/specs/relu_quant8.mod.py
@@ -1,9 +1,9 @@
 # model
 model = Model()
 # input 0
-i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "-127.5f, 127.5f, {1, 2, 2, 1}")
+i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 1.f, 128")
 # output 0
-o = Output("op2", "TENSOR_QUANT8_ASYMM", "-127.5f, 127.5f, {1, 2, 2, 1}")
+o = Output("op2", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 1.f, 128")
 model = model.Operation("RELU", i1).To(o)
 
 # Example 1. Input in operand 0,
diff --git a/nn/runtime/test/specs/reshape_quant8.mod.py b/nn/runtime/test/specs/reshape_quant8.mod.py
index 043b86e..f1f6814 100644
--- a/nn/runtime/test/specs/reshape_quant8.mod.py
+++ b/nn/runtime/test/specs/reshape_quant8.mod.py
@@ -1,9 +1,9 @@
 # model
 model = Model()
 # a line of 3 pixels, 3 components/pixel
-i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "0.0f, 255.0f, {1, 1, 3, 3}")
+i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{1, 1, 3, 3}, 1.f, 0")
 i2 = Input("op2", "TENSOR_INT32", "{1}")
-i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "0.0f, 255.0f, {9}")
+i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{9}, 1.f, 0")
 model = model.Operation("RESHAPE", i1, i2).To(i3)
 
 # Example 1. Input in operand 0,
diff --git a/nn/runtime/test/specs/sigmoid_quant8.mod.py b/nn/runtime/test/specs/sigmoid_quant8.mod.py
index b26f43c..ebb5af2 100644
--- a/nn/runtime/test/specs/sigmoid_quant8.mod.py
+++ b/nn/runtime/test/specs/sigmoid_quant8.mod.py
@@ -1,8 +1,8 @@
 # model
 model = Model()
 
-i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "0.0f, 127.0f, {1, 2, 2, 1}")
-i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "0.0f, 1.0f, {1, 2, 2, 1}")
+i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 0.5f, 0")
+i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 1.f/256, 0")
 model = model.Operation("LOGISTIC", i1).To(i3)
 
 # Example 1. Input in operand 0,
diff --git a/nn/runtime/test/specs/softmax_quant8_1.mod.py b/nn/runtime/test/specs/softmax_quant8_1.mod.py
index 8fa0738..fcf426f 100644
--- a/nn/runtime/test/specs/softmax_quant8_1.mod.py
+++ b/nn/runtime/test/specs/softmax_quant8_1.mod.py
@@ -1,9 +1,9 @@
 # model
 model = Model()
 
-i1 = Input("input", "TENSOR_QUANT8_ASYMM", "0.0f, 127.5f, {1, 4}") # batch = 1, depth = 1
+i1 = Input("input", "TENSOR_QUANT8_ASYMM", "{1, 4}, 0.5f, 0") # batch = 1, depth = 1
 beta = Float32Scalar("beta", 0.00001) # close to 0
-output = Output("output", "TENSOR_QUANT8_ASYMM", "0.0f, 1.0f, {1, 4}")
+output = Output("output", "TENSOR_QUANT8_ASYMM", "{1, 4}, 1.f / 256, 0")
 
 # model 1
 model = model.Operation("SOFTMAX", i1, beta).To(output)
diff --git a/nn/runtime/test/specs/softmax_quant8_2.mod.py b/nn/runtime/test/specs/softmax_quant8_2.mod.py
index f12eec5..40f7356 100644
--- a/nn/runtime/test/specs/softmax_quant8_2.mod.py
+++ b/nn/runtime/test/specs/softmax_quant8_2.mod.py
@@ -1,9 +1,9 @@
 # model
 model = Model()
 
-i1 = Input("input", "TENSOR_QUANT8_ASYMM", "0.0f, 127.5f, {2, 5}") # batch = 2, depth = 5
+i1 = Input("input", "TENSOR_QUANT8_ASYMM", "{2, 5}, 0.5f, 0") # batch = 2, depth = 5
 beta = Float32Scalar("beta", 1.)
-output = Output("output", "TENSOR_QUANT8_ASYMM", "0.0f, 1.0f, {2, 5}")
+output = Output("output", "TENSOR_QUANT8_ASYMM", "{2, 5}, 1.f / 256, 0")
 
 # model 1
 model = model.Operation("SOFTMAX", i1, beta).To(output)
diff --git a/nn/runtime/test/specs/space_to_depth_quant8_1.mod.py b/nn/runtime/test/specs/space_to_depth_quant8_1.mod.py
index e814240..c0d56af 100644
--- a/nn/runtime/test/specs/space_to_depth_quant8_1.mod.py
+++ b/nn/runtime/test/specs/space_to_depth_quant8_1.mod.py
@@ -1,5 +1,5 @@
 model = Model()
-i1 = Input("input", "TENSOR_QUANT8_ASYMM", "0.0f, 127.5f, {1, 2, 2, 2}")
+i1 = Input("input", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 2}, 0.5f, 0")
 block = Int32Scalar("radius", 2)
 output = Output("output", "TENSOR_QUANT8_ASYMM", "{1, 1, 1, 8}")