Merge "Consolidate Manager's prepareModel methods"
diff --git a/nn/common/operations/DepthwiseConv2D.cpp b/nn/common/operations/DepthwiseConv2D.cpp
index 84fc4cb..f620533 100644
--- a/nn/common/operations/DepthwiseConv2D.cpp
+++ b/nn/common/operations/DepthwiseConv2D.cpp
@@ -16,6 +16,9 @@
#define LOG_TAG "Operations"
+#include <algorithm>
+#include <vector>
+
#include "CpuOperationUtils.h"
#include "Operations.h"
@@ -83,11 +86,11 @@
0 /*width_offset*/, 0 /*height_offset*/},
.stride_width = static_cast<int16>(strideWidth),
.stride_height = static_cast<int16>(strideHeight),
+ .dilation_width_factor = static_cast<int16>(dilationWidthFactor),
+ .dilation_height_factor = static_cast<int16>(dilationHeightFactor),
.depth_multiplier = static_cast<int16>(depthMultiplier),
.float_activation_min = output_activation_min,
.float_activation_max = output_activation_max,
- .dilation_width_factor = static_cast<int16>(dilationWidthFactor),
- .dilation_height_factor = static_cast<int16>(dilationHeightFactor),
};
NNTRACE_COMP_SWITCH("optimized_ops::DepthwiseConv");
tflite::optimized_ops::DepthwiseConv(
@@ -128,16 +131,16 @@
0 /*width_offset*/, 0 /*height_offset*/},
.stride_width = static_cast<int16>(strideWidth),
.stride_height = static_cast<int16>(strideHeight),
- .depth_multiplier = static_cast<int16>(depthMultiplier),
- .quantized_activation_min = output_activation_min,
- .quantized_activation_max = output_activation_max,
.dilation_width_factor = static_cast<int16>(dilationWidthFactor),
.dilation_height_factor = static_cast<int16>(dilationHeightFactor),
+ .depth_multiplier = static_cast<int16>(depthMultiplier),
.input_offset = -inputShape.offset,
.weights_offset = -filterShape.offset,
.output_offset = outputShape.offset,
- .output_shift = -output_shift,
.output_multiplier = output_multiplier,
+ .output_shift = -output_shift,
+ .quantized_activation_min = output_activation_min,
+ .quantized_activation_max = output_activation_max,
};
NNTRACE_COMP_SWITCH("optimized_ops::DepthwiseConv");
tflite::reference_ops::DepthwiseConv(params, convertShapeToTflshape(inputShape), inputData,
diff --git a/nn/common/operations/Pooling.cpp b/nn/common/operations/Pooling.cpp
index 5492a32..8e31aec 100644
--- a/nn/common/operations/Pooling.cpp
+++ b/nn/common/operations/Pooling.cpp
@@ -16,6 +16,8 @@
#define LOG_TAG "Operations"
+#include <vector>
+
#include "CpuOperationUtils.h"
#include "HalInterfaces.h"
#include "OperationResolver.h"
@@ -100,14 +102,15 @@
tflite::PoolParams toTfliteParam(const Shape& output) const {
tflite::PoolParams params = {
+ .padding_values = {.width = static_cast<int16_t>(padding_left),
+ .height = static_cast<int16_t>(padding_top),
+ .width_offset = 0,
+ .height_offset = 0},
.stride_height = stride_height,
.stride_width = stride_width,
.filter_height = filter_height,
.filter_width = filter_width,
- .padding_values = {.height = static_cast<int16_t>(padding_top),
- .width = static_cast<int16_t>(padding_left),
- .width_offset = 0,
- .height_offset = 0}};
+ };
if (output.type == OperandType::TENSOR_QUANT8_ASYMM) {
int32_t output_activation_min = 0;
int32_t output_activation_max = 0;
diff --git a/nn/runtime/ExecutionBuilder.cpp b/nn/runtime/ExecutionBuilder.cpp
index 561e912..97e847b 100644
--- a/nn/runtime/ExecutionBuilder.cpp
+++ b/nn/runtime/ExecutionBuilder.cpp
@@ -299,79 +299,62 @@
}
// Attempt synchronous execution of full model on CPU.
-// Ensure that executionCallback->notify() is called.
// TODO: How should we handle timing in this case?
// For Q this is irrelevant: We only support timing in conjunction
// with an explicit device list; and we do not support CPU fallback
// with an explicit device list. See CompilationBuilder::mExplicitDeviceList.
-static void cpuFallbackFull(ExecutionBuilder* executionBuilder,
- const sp<ExecutionCallback>& executionCallback) {
+static int cpuFallbackFull(ExecutionBuilder* executionBuilder,
+ sp<ExecutionCallback>* fallbackCallback) {
CHECK(executionBuilder != nullptr);
- CHECK(executionCallback != nullptr);
+ CHECK(fallbackCallback != nullptr);
+ *fallbackCallback = nullptr;
+
NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "cpuFallbackFull");
VLOG(EXECUTION) << "cpuFallbackFull";
+
+ // Get fallback executor.
StepExecutor executor(executionBuilder, executionBuilder->getModel(),
DeviceManager::getCpuDevice(), /*preparedModel=*/nullptr);
executor.mapInputsAndOutputsTrivially();
- sp<ExecutionCallback> fallbackCallback;
- int n = executor.startComputeOnCpuFallback(&fallbackCallback);
- if (n != ANEURALNETWORKS_NO_ERROR) {
- executionCallback->notify(convertResultCodeToErrorStatus(n), {}, kNoTiming);
- return;
- }
- fallbackCallback->wait();
- executionCallback->notify(fallbackCallback->getStatus(), fallbackCallback->getOutputShapes(),
- fallbackCallback->getTiming());
+
+ // Attempt fallback execution.
+ NN_RETURN_IF_ERROR(executor.startComputeOnCpuFallback(fallbackCallback));
+ CHECK(*fallbackCallback != nullptr);
+ (*fallbackCallback)->wait();
+ return ANEURALNETWORKS_NO_ERROR;
}
// Attempt synchronous execution on CPU.
-// (1) First, attempt to execute this step on CPU. If successful,
-// return true. (Do not call executionCallback->notify().)
-// (2) If unsuccessful, and the ExecutionPlan is compound, attempt to execute the
-// full model on CPU, ensure that executionCallback->notify() is called, and return
-// false.
+// fallbackExecutor is non-null i.f.f. ANEURALNETWORKS_NO_ERROR is returned.
+// fallbackCallback is non-null i.f.f. ANEURALNETWORKS_NO_ERROR is returned.
// TODO: How should we handle timing in this case?
// For Q this is irrelevant: We only support timing in conjunction
// with an explicit device list; and we do not support CPU fallback
// with an explicit device list. See CompilationBuilder::mExplicitDeviceList.
-static bool cpuFallbackPartial(ExecutionBuilder* executionBuilder, const ExecutionPlan* plan,
- std::shared_ptr<ExecutionPlan::Controller> controller,
- const sp<ExecutionCallback>& executionCallback,
- std::vector<OutputShape>* outputShapes) {
- CHECK(executionBuilder != nullptr);
+static int cpuFallbackPartial(const ExecutionPlan* plan,
+ std::shared_ptr<ExecutionPlan::Controller> controller,
+ std::shared_ptr<StepExecutor>* fallbackExecutor,
+ sp<ExecutionCallback>* fallbackCallback) {
CHECK(plan != nullptr);
- CHECK(executionCallback != nullptr);
- CHECK(outputShapes != nullptr);
+ CHECK(fallbackExecutor != nullptr);
+ *fallbackExecutor = nullptr;
+ CHECK(fallbackCallback != nullptr);
+ *fallbackCallback = nullptr;
+
NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "cpuFallbackPartial");
VLOG(EXECUTION) << "cpuFallbackPartial";
+
+ // Get fallback executor.
std::shared_ptr<StepExecutor> executor;
- int n = plan->fallback(controller, &executor);
- if (n != ANEURALNETWORKS_NO_ERROR || executor->isCpu()) {
- cpuFallbackFull(executionBuilder, executionCallback);
- return false;
- }
- sp<ExecutionCallback> fallbackCallback;
- if (executor->startComputeOnCpuFallback(&fallbackCallback) != ANEURALNETWORKS_NO_ERROR) {
- cpuFallbackFull(executionBuilder, executionCallback);
- return false;
- }
- fallbackCallback->wait();
- ErrorStatus status = fallbackCallback->getStatus();
- const auto& stepOutputShapes = fallbackCallback->getOutputShapes();
- if (!executor->updateOutputShapes(stepOutputShapes, outputShapes)) {
- status = ErrorStatus::GENERAL_FAILURE;
- }
- if (status != ErrorStatus::NONE) {
- // Do not fallback twice if the ExecutionPlan is simple.
- // OUTPUT_INSUFFICIENT_SIZE is not recoverable
- if (plan->isSimple() || status == ErrorStatus::OUTPUT_INSUFFICIENT_SIZE) {
- executionCallback->notify(status, *outputShapes, kNoTiming);
- } else {
- cpuFallbackFull(executionBuilder, executionCallback);
- }
- return false;
- }
- return true;
+ NN_RETURN_IF_ERROR(plan->fallback(controller, &executor));
+ CHECK(executor != nullptr);
+
+ // Attempt fallback execution.
+ NN_RETURN_IF_ERROR(executor->startComputeOnCpuFallback(fallbackCallback));
+ CHECK(*fallbackCallback != nullptr);
+ (*fallbackCallback)->wait();
+ *fallbackExecutor = executor;
+ return ANEURALNETWORKS_NO_ERROR;
}
static void asyncStartComputePartitioned(ExecutionBuilder* executionBuilder,
@@ -387,74 +370,142 @@
// Disallow fallback when the ExecutionPlan is simple on CPU.
allowFallback &= !plan->isSimpleCpu();
executionBuilder->initializeOutputShapes(&outputShapes);
+
while (true) {
- std::shared_ptr<StepExecutor> executor;
VLOG(EXECUTION) << "looking for next StepExecutor";
- std::shared_ptr<ExecutionBurstController> burstController = nullptr;
+
+ // Get the current step of the execution.
+ std::shared_ptr<StepExecutor> executor;
+ std::shared_ptr<ExecutionBurstController> burstController;
int n = plan->next(controller, &executor, &burstController);
if (n != ANEURALNETWORKS_NO_ERROR) {
- if (allowFallback) {
- cpuFallbackFull(executionBuilder, executionCallback);
- } else {
- executionCallback->notify(convertResultCodeToErrorStatus(n), {}, kNoTiming);
- }
+ if (allowFallback) break;
+ executionCallback->notify(convertResultCodeToErrorStatus(n), {}, kNoTiming);
return;
}
+
+ // If the code reached the end of the plan without error, then return
+ // with no error.
if (executor == nullptr) {
executionCallback->notify(ErrorStatus::NONE, outputShapes, timing);
return;
}
+ const bool executorIsCpu = executor->isCpu();
+ // Attempt to execute a single step of the execution.
sp<ExecutionCallback> stepCallback;
n = executor->startCompute(&stepCallback, burstController);
- if (n != ANEURALNETWORKS_NO_ERROR) {
- if (allowFallback) {
- if (cpuFallbackPartial(executionBuilder, plan, controller, executionCallback,
- &outputShapes)) {
- // Successfully executed one step on CPU.
- continue;
- } else {
- // Either successfully executed entire plan on
- // CPU, or tried and failed to do so.
- return;
- }
- } else {
- executionCallback->notify(convertResultCodeToErrorStatus(n), {}, kNoTiming);
- return;
+
+ // Immediately end execution if there was an error and fallback is not
+ // allowed.
+ if (n != ANEURALNETWORKS_NO_ERROR && !allowFallback) {
+ executionCallback->notify(convertResultCodeToErrorStatus(n), {}, kNoTiming);
+ return;
+ }
+
+ // If execution successfully launched, process the execution.
+ if (n == ANEURALNETWORKS_NO_ERROR) {
+ stepCallback->wait();
+ ErrorStatus status = stepCallback->getStatus();
+ const auto& stepOutputShapes = stepCallback->getOutputShapes();
+
+ // Update global outputs.
+ if (!executor->updateOutputShapes(stepOutputShapes, &outputShapes)) {
+ status = ErrorStatus::GENERAL_FAILURE;
}
- }
- stepCallback->wait();
- ErrorStatus status = stepCallback->getStatus();
- const auto& stepOutputShapes = stepCallback->getOutputShapes();
- if (!executor->updateOutputShapes(stepOutputShapes, &outputShapes)) {
- status = ErrorStatus::GENERAL_FAILURE;
- }
- if (status == ErrorStatus::NONE) {
- // We only support collection of timing information in the case of a
- // single step, so it's safe to just keep track of the last step's
- // timing information.
- timing = stepCallback->getTiming();
- } else {
- // OUTPUT_INSUFFICIENT_SIZE is not recoverable
- if (allowFallback && status != ErrorStatus::OUTPUT_INSUFFICIENT_SIZE) {
- if (cpuFallbackPartial(executionBuilder, plan, controller, executionCallback,
- &outputShapes)) {
- // Successfully executed one step on CPU.
- continue;
- } else {
- // Either successfully executed entire plan on
- // CPU, or tried and failed to do so.
- return;
- }
- } else if (status == ErrorStatus::OUTPUT_INSUFFICIENT_SIZE) {
+
+ // If execution was successful, continue to next step.
+ if (status == ErrorStatus::NONE) {
+ // We only support collection of timing information in the case of a
+ // single step, so it's safe to just keep track of the last step's
+ // timing information.
+ timing = stepCallback->getTiming();
+ continue;
+ }
+
+ // OUTPUT_INSUFFICIENT_SIZE is not recoverable, so end execution.
+ if (status == ErrorStatus::OUTPUT_INSUFFICIENT_SIZE) {
executionCallback->notify(status, outputShapes, kNoTiming);
return;
- } else {
+ }
+
+ // If fallback is not allowed and there was an error, end execution.
+ if (!allowFallback) {
executionCallback->notify(status, {}, kNoTiming);
return;
}
+
+ // Propagate error to fallback path.
+ n = convertErrorStatusToResultCode(status);
}
+
+ // If CPU execution was already attempted, either:
+ // (1) perform a full fallback if the plan is not simple, or
+ // (2) return from the function with an error
+ if (executorIsCpu) {
+ if (!plan->isSimple()) break;
+ executionCallback->notify(convertResultCodeToErrorStatus(n), {}, kNoTiming);
+ return;
+ }
+
+ // If the code reaches this point, attempt a partial fallback to CPU.
+ CHECK(allowFallback);
+ std::shared_ptr<StepExecutor> fallbackExecutor;
+ sp<ExecutionCallback> fallbackCallback;
+ n = cpuFallbackPartial(plan, controller, &fallbackExecutor, &fallbackCallback);
+
+ // Immediately fallback to full CPU execution if there was an error with
+ // the partial CPU fallback.
+ if (n != ANEURALNETWORKS_NO_ERROR) {
+ break;
+ }
+
+ // Get fallback execution results.
+ ErrorStatus fallbackStatus = fallbackCallback->getStatus();
+ const auto& fallbackOutputShapes = fallbackCallback->getOutputShapes();
+
+ // Update global outputs.
+ if (!fallbackExecutor->updateOutputShapes(fallbackOutputShapes, &outputShapes)) {
+ fallbackStatus = ErrorStatus::GENERAL_FAILURE;
+ }
+
+ // If execution was successful, continue to next step.
+ if (fallbackStatus == ErrorStatus::NONE) {
+ // We only support collection of timing information in the case of a
+ // single step, so it's safe to just keep track of the last step's
+ // timing information.
+ timing = fallbackCallback->getTiming();
+ continue;
+ }
+
+ // OUTPUT_INSUFFICIENT_SIZE is not recoverable, so end execution.
+ if (fallbackStatus == ErrorStatus::OUTPUT_INSUFFICIENT_SIZE) {
+ executionCallback->notify(fallbackStatus, outputShapes, kNoTiming);
+ return;
+ }
+
+ // Do not fallback twice if the ExecutionPlan is simple.
+ if (plan->isSimple()) {
+ executionCallback->notify(fallbackStatus, {}, kNoTiming);
+ return;
+ }
+
+ // If the code reaches this point, then there was an error with the
+ // fallback. In this case, attempt full fallback.
+ break;
}
+
+ // If the code has reached this point, a potentially recoverable error
+ // occurred during the step executions. Instead, do a full execution
+ // fallback on the CPU.
+ sp<ExecutionCallback> fallbackCallback;
+ int n = cpuFallbackFull(executionBuilder, &fallbackCallback);
+ if (n != ANEURALNETWORKS_NO_ERROR) {
+ executionCallback->notify(convertResultCodeToErrorStatus(n), {}, kNoTiming);
+ return;
+ }
+ executionCallback->notify(fallbackCallback->getStatus(), fallbackCallback->getOutputShapes(),
+ fallbackCallback->getTiming());
}
int ExecutionBuilder::compute(sp<ExecutionCallback>* synchronizationCallback,
diff --git a/nn/runtime/test/fuzzing/TestRandomGraph.cpp b/nn/runtime/test/fuzzing/TestRandomGraph.cpp
index 9cf4dea..5d85d11 100644
--- a/nn/runtime/test/fuzzing/TestRandomGraph.cpp
+++ b/nn/runtime/test/fuzzing/TestRandomGraph.cpp
@@ -17,6 +17,9 @@
#include <gtest/gtest.h>
#include <algorithm>
+#include <map>
+#include <memory>
+#include <set>
#include <string>
#include "TestNeuralNetworksWrapper.h"
@@ -396,38 +399,128 @@
// Most of these operations fall into categories of reshape or selection, e.g. RESHAPE, GATHER.
// Additionally, operations with only logical or comparison arithmetic also use this criteria, e.g.
// EQUAL, ARGMAX, TOPK_V2.
-const AccuracyCriteria kStrictCriteria = {
- .float32 = {.atol = 1e-6f, .rtol = 1e-6f, .bias = 1e-7f, .mse = 1e-10f},
- .float16 = {.atol = 1e-3f, .rtol = 1e-3f, .bias = 1e-4f, .mse = 1e-8f},
- .int32 = {.atol = 1},
- .quant8Asymm = {.atol = 1, .bias = 0.1f, .mse = 0.1f},
- .quant8Symm = {.atol = 1, .bias = 0.1f, .mse = 0.1f},
- .quant16Asymm = {.atol = 1, .bias = 0.1f, .mse = 0.1f},
- .quant16Symm = {.atol = 1, .bias = 0.1f, .mse = 0.1f}};
+const AccuracyCriteria kStrictCriteria = {.float32 =
+ {
+ .bias = 1e-7f,
+ .mse = 1e-10f,
+ .atol = 1e-6f,
+ .rtol = 1e-6f,
+ },
+ .float16 =
+ {
+ .bias = 1e-4f,
+ .mse = 1e-8f,
+ .atol = 1e-3f,
+ .rtol = 1e-3f,
+ },
+ .int32 = {.atol = 1},
+ .quant8Asymm =
+ {
+ .bias = 0.1f,
+ .mse = 0.1f,
+ .atol = 1,
+ },
+ .quant8Symm =
+ {
+ .bias = 0.1f,
+ .mse = 0.1f,
+ .atol = 1,
+ },
+ .quant16Asymm =
+ {
+ .bias = 0.1f,
+ .mse = 0.1f,
+ .atol = 1,
+ },
+ .quant16Symm = {
+ .bias = 0.1f,
+ .mse = 0.1f,
+ .atol = 1,
+ }};
// This is for operations that only do simple and single computation on buffer values, such as
// addition, multiplication, or requantization. Most of these operations fall into categories of
// broadcast or elementwise, e.g ADD, FLOOR.
-const AccuracyCriteria kMediumCriteria = {
- .float32 = {.atol = 1e-5f, .rtol = 1e-5f, .bias = 1e-6f, .mse = 1e-8f},
- .float16 = {.atol = 1e-2f, .rtol = 1e-2f, .bias = 1e-3f, .mse = 1e-6f},
- .int32 = {.atol = 1},
- .quant8Asymm = {.atol = 2, .bias = 0.5f, .mse = 0.5f},
- .quant8Symm = {.atol = 2, .bias = 0.5f, .mse = 0.5f},
- .quant16Asymm = {.atol = 2, .bias = 0.5f, .mse = 0.5f},
- .quant16Symm = {.atol = 2, .bias = 0.5f, .mse = 0.5f}};
+const AccuracyCriteria kMediumCriteria = {.float32 =
+ {
+ .bias = 1e-6f,
+ .mse = 1e-8f,
+ .atol = 1e-5f,
+ .rtol = 1e-5f,
+ },
+ .float16 =
+ {
+ .bias = 1e-3f,
+ .mse = 1e-6f,
+ .atol = 1e-2f,
+ .rtol = 1e-2f,
+ },
+ .int32 = {.atol = 1},
+ .quant8Asymm =
+ {
+ .bias = 0.5f,
+ .mse = 0.5f,
+ .atol = 2,
+ },
+ .quant8Symm =
+ {
+ .bias = 0.5f,
+ .mse = 0.5f,
+ .atol = 2,
+ },
+ .quant16Asymm =
+ {
+ .bias = 0.5f,
+ .mse = 0.5f,
+ .atol = 2,
+ },
+ .quant16Symm = {
+ .bias = 0.5f,
+ .mse = 0.5f,
+ .atol = 2,
+ }};
// This is for operations that involve sophisticated computations on buffer values, either a single
// but complex transformation, e.g. LOGISTIC, or multiple transformations with accumulated errors,
// e.g. CONV_2D, REDUCE_*.
-const AccuracyCriteria kRelaxedCriteria = {
- .float32 = {.atol = 1e-3f, .rtol = 1e-3f, .bias = 2e-5f, .mse = 1e-7f},
- .float16 = {.atol = 1.0f, .rtol = 1.0f, .bias = 5e-3f, .mse = 1e-4f},
- .int32 = {.atol = 1},
- .quant8Asymm = {.atol = 10, .bias = 1.5, .mse = 1.5},
- .quant8Symm = {.atol = 10, .bias = 1.5, .mse = 1.5},
- .quant16Asymm = {.atol = 10, .bias = 1.5, .mse = 1.5},
- .quant16Symm = {.atol = 10, .bias = 1.5, .mse = 1.5}};
+const AccuracyCriteria kRelaxedCriteria = {.float32 =
+ {
+ .bias = 2e-5f,
+ .mse = 1e-7f,
+ .atol = 1e-3f,
+ .rtol = 1e-3f,
+ },
+ .float16 =
+ {
+ .bias = 5e-3f,
+ .mse = 1e-4f,
+ .atol = 1.0f,
+ .rtol = 1.0f,
+ },
+ .int32 = {.atol = 1},
+ .quant8Asymm =
+ {
+ .bias = 1.5,
+ .mse = 1.5,
+ .atol = 10,
+ },
+ .quant8Symm =
+ {
+ .bias = 1.5,
+ .mse = 1.5,
+ .atol = 10,
+ },
+ .quant16Asymm =
+ {
+ .bias = 1.5,
+ .mse = 1.5,
+ .atol = 10,
+ },
+ .quant16Symm = {
+ .bias = 1.5,
+ .mse = 1.5,
+ .atol = 10,
+ }};
/*-- NNAPI 1.0 Operations ---------------------------------------------------*/
@@ -572,23 +665,83 @@
TEST_SINGLE_OPERATION(ROI_POOLING, V1_2, kRelaxedCriteria);
TEST_SINGLE_OPERATION(HEATMAP_MAX_KEYPOINT, V1_2, kRelaxedCriteria);
-const AccuracyCriteria kSmallGraphCriteria = {
- .float32 = {.atol = 1e-2f, .rtol = 1e-2f, .bias = 2e-5f, .mse = 1e-7f},
- .float16 = {.atol = 1.0f, .rtol = 1.0f, .bias = 5e-3f, .mse = 1e-4f},
- .int32 = {.atol = 1},
- .quant8Asymm = {.atol = 12, .bias = 2, .mse = 2},
- .quant8Symm = {.atol = 12, .bias = 2, .mse = 2},
- .quant16Asymm = {.atol = 12, .bias = 2, .mse = 2},
- .quant16Symm = {.atol = 12, .bias = 2, .mse = 2}};
+const AccuracyCriteria kSmallGraphCriteria = {.float32 =
+ {
+ .bias = 2e-5f,
+ .mse = 1e-7f,
+ .atol = 1e-2f,
+ .rtol = 1e-2f,
+ },
+ .float16 =
+ {
+ .bias = 5e-3f,
+ .mse = 1e-4f,
+ .atol = 1.0f,
+ .rtol = 1.0f,
+ },
+ .int32 = {.atol = 1},
+ .quant8Asymm =
+ {
+ .bias = 2,
+ .mse = 2,
+ .atol = 12,
+ },
+ .quant8Symm =
+ {
+ .bias = 2,
+ .mse = 2,
+ .atol = 12,
+ },
+ .quant16Asymm =
+ {
+ .bias = 2,
+ .mse = 2,
+ .atol = 12,
+ },
+ .quant16Symm = {
+ .bias = 2,
+ .mse = 2,
+ .atol = 12,
+ }};
-const AccuracyCriteria kLargeGraphCriteria = {
- .float32 = {.atol = 1e-1f, .rtol = 1e-1f, .bias = 1e-2f, .mse = 1e-4f},
- .float16 = {.atol = 1.0f, .rtol = 1.0f, .bias = 1e-1f, .mse = 5e-2f},
- .int32 = {.atol = 1},
- .quant8Asymm = {.atol = 12, .bias = 2, .mse = 2},
- .quant8Symm = {.atol = 12, .bias = 2, .mse = 2},
- .quant16Asymm = {.atol = 12, .bias = 2, .mse = 2},
- .quant16Symm = {.atol = 12, .bias = 2, .mse = 2}};
+const AccuracyCriteria kLargeGraphCriteria = {.float32 =
+ {
+ .bias = 1e-2f,
+ .mse = 1e-4f,
+ .atol = 1e-1f,
+ .rtol = 1e-1f,
+ },
+ .float16 =
+ {
+ .bias = 1e-1f,
+ .mse = 5e-2f,
+ .atol = 1.0f,
+ .rtol = 1.0f,
+ },
+ .int32 = {.atol = 1},
+ .quant8Asymm =
+ {
+ .bias = 2,
+ .mse = 2,
+ .atol = 12,
+ },
+ .quant8Symm =
+ {
+ .bias = 2,
+ .mse = 2,
+ .atol = 12,
+ },
+ .quant16Asymm =
+ {
+ .bias = 2,
+ .mse = 2,
+ .atol = 12,
+ },
+ .quant16Symm = {
+ .bias = 2,
+ .mse = 2,
+ .atol = 12,
+ }};
// Due to the limitation of the random graph generator, graphs generated with mixed-type or
// mixed-rank operations are likely to result in a disconnected network. Thus, we filter the