NNAPI Burst -- runtime and CTS
The NNAPI is introducing the notion of an "Execution Burst" object (or
more simply a "Burst" object), which is similar to an
ANeuralNetworksExecution, but is intended to be reused across multiple
executions and has lower IPC overheads. It achieves this low IPC
overhead by replacing HIDL HwBinder calls with FMQ messages.
This CL implements the NDK burst functions, implements the path through
the partitioner/scheduler, and creates CTS tests using the burst object.
Bug: 119570067
Test: mma
Test: NeuralNetworksTest_static
Change-Id: I1d2414f454910ad3ba4b2af728ab95ef8b609c9c
diff --git a/nn/common/ExecutionBurstServer.cpp b/nn/common/ExecutionBurstServer.cpp
index 64a4ee2..a9af004 100644
--- a/nn/common/ExecutionBurstServer.cpp
+++ b/nn/common/ExecutionBurstServer.cpp
@@ -14,9 +14,13 @@
* limitations under the License.
*/
+#define LOG_TAG "ExecutionBurstServer"
+
#include "ExecutionBurstServer.h"
#include <android-base/logging.h>
+#include <set>
+#include <string>
namespace android {
namespace nn {
@@ -27,31 +31,37 @@
std::lock_guard<std::mutex> guard(mMutex);
// find unique unknown slots
- std::vector<int32_t> unknownSlots = slots;
- std::sort(unknownSlots.begin(), unknownSlots.end());
- auto last = std::unique(unknownSlots.begin(), unknownSlots.end());
- unknownSlots.erase(last, unknownSlots.end());
+ std::set<int32_t> setOfUnknownSlots;
+ for (int32_t slot : slots) {
+ if (mSlotToMemoryCache.find(slot) == mSlotToMemoryCache.end()) {
+ setOfUnknownSlots.insert(slot);
+ }
+ }
+ const std::vector<int32_t> unknownSlots(setOfUnknownSlots.begin(), setOfUnknownSlots.end());
// retrieve unknown slots
- ErrorStatus errorStatus = ErrorStatus::GENERAL_FAILURE;
- std::vector<hidl_memory> returnedMemories;
- Return<void> ret = mCallback->getMemories(
- unknownSlots, [&errorStatus, &returnedMemories](ErrorStatus status,
- const hidl_vec<hidl_memory>& memories) {
- errorStatus = status;
- if (status == ErrorStatus::NONE) {
- returnedMemories = memories;
- }
- });
+ if (!unknownSlots.empty()) {
+ LOG(ERROR) << "server calling getMemories";
+ ErrorStatus errorStatus = ErrorStatus::GENERAL_FAILURE;
+ std::vector<hidl_memory> returnedMemories;
+ Return<void> ret = mCallback->getMemories(
+ unknownSlots, [&errorStatus, &returnedMemories](
+ ErrorStatus status, const hidl_vec<hidl_memory>& memories) {
+ errorStatus = status;
+ if (status == ErrorStatus::NONE) {
+ returnedMemories = memories;
+ }
+ });
- if (!ret.isOk() || errorStatus != ErrorStatus::NONE) {
- LOG(ERROR) << "Error retrieving memories";
- return {};
- }
+ if (!ret.isOk() || errorStatus != ErrorStatus::NONE) {
+ LOG(ERROR) << "Error retrieving memories";
+ return {};
+ }
- // add memories to unknown slots
- for (size_t i = 0; i < unknownSlots.size(); ++i) {
- mSlotToMemoryCache[unknownSlots[i]] = returnedMemories[i];
+ // add memories to unknown slots
+ for (size_t i = 0; i < unknownSlots.size(); ++i) {
+ mSlotToMemoryCache[unknownSlots[i]] = returnedMemories[i];
+ }
}
// get all slots
@@ -59,6 +69,7 @@
for (size_t i = 0; i < slots.size(); ++i) {
memories[i] = mSlotToMemoryCache[slots[i]];
}
+
return memories;
}
@@ -85,9 +96,13 @@
mTeardown = true;
// force unblock
+ // ExecutionBurstServer is by default waiting on a request packet. If the
+ // client process destroys its burst object, the server will still be
+ // waiting on the futex (assuming mBlocking is true). This force unblock
+ // wakes up any thread waiting on the futex.
if (mBlocking) {
- // TODO: look for a different/better way to signal/notify the futex to wake
- // up any thread waiting on it
+ // TODO: look for a different/better way to signal/notify the futex to
+ // wake up any thread waiting on it
FmqRequestDatum datum;
datum.packetInformation({/*.packetSize=*/0, /*.numberOfInputOperands=*/0,
/*.numberOfOutputOperands=*/0, /*.numberOfPools=*/0});
@@ -117,7 +132,13 @@
return {};
}
- // wait for request packet and read first element of result packet
+ // wait for request packet and read first element of request packet
+ // TODO: have a more elegant way to wait for data, and read it all at once.
+ // For example, EventFlag can be used to directly wait on the futex, and all
+ // the data can be read at once with a non-blocking call to
+ // MessageQueue::read. For further optimization, MessageQueue::beginRead and
+ // MessageQueue::commitRead can be used to avoid an extra copy of the
+ // metadata.
FmqRequestDatum datum;
bool success = false;
if (mBlocking) {
@@ -374,6 +395,10 @@
ErrorStatus errorStatus = ErrorStatus::GENERAL_FAILURE;
std::vector<OutputShape> outputShapes;
Timing returnedTiming;
+ // This call to IPreparedModel::executeSynchronously occurs entirely
+ // within the same process, so ignore the Return<> errors via .isOk().
+ // TODO: verify it is safe to always call isOk() here, or if there is
+ // any benefit to checking any potential errors.
mPreparedModel
->executeSynchronously(request, measure,
[&errorStatus, &outputShapes, &returnedTiming](