NNAPI Burst -- runtime utility

The NNAPI is introducing the notion of an "Execution Burst" object (or
more simply a "Burst" object), which is similar to an
ANeuralNetworksExecution, but is intended to be reused across multiple
executions and has lower IPC overheads. It achieves this low IPC
overhead by replacing HIDL HwBinder calls with FMQ messages.

The Burst utility objects are exposed in two flavors:
1) A Controller object which is able to launch an execution and later
   get the result. This will be used by the NN runtime, and is currently
   used in the native tests in a sibling CL.
2) An automated Server object which--when created with a reference to an
   IPreparedModel--will automatically receive a request to execute, call
   the model's synchronous execution method, and forward the result to
   an output channel.

To incorporate these changes, the service must implement
IPreparedModel::configureExecutionBurst. SampleDriver.cpp contains a
functional reference implementation.

Bug: 119570067
Test: mma
Test: VtsHalNeuralNetworksV1_2TargetTest
Change-Id: Ic7082f94c9a20f674c863af68ef106ba60f27d7f
diff --git a/nn/common/Android.bp b/nn/common/Android.bp
index e9c5188..7fb53ab 100644
--- a/nn/common/Android.bp
+++ b/nn/common/Android.bp
@@ -53,6 +53,8 @@
     export_include_dirs: ["include"],
     srcs: [
         "Utils.cpp",
+        "ExecutionBurstController.cpp",
+        "ExecutionBurstServer.cpp",
     ],
     header_libs: [
         "libneuralnetworks_headers",
@@ -64,6 +66,7 @@
         "libhidltransport",
         "libhidlmemory",
         "libnativewindow",
+        "libfmq",
         "android.hardware.neuralnetworks@1.0",
         "android.hardware.neuralnetworks@1.1",
         "android.hardware.neuralnetworks@1.2",
@@ -98,6 +101,8 @@
     ],
     srcs: [
         "CpuExecutor.cpp",
+        "ExecutionBurstController.cpp",
+        "ExecutionBurstServer.cpp",
         "GraphDump.cpp",
         "IndexedShapeWrapper.cpp",
         "OperationsUtils.cpp",
@@ -135,10 +140,12 @@
     ],
     shared_libs: [
         "libbase",
+        "libcutils",
         "libhidlbase",
         "libhidltransport",
         "libhidlmemory",
         "libnativewindow",
+        "libfmq",
         "libtextclassifier_hash",
         "liblog",
         "libutils",
diff --git a/nn/common/ExecutionBurstController.cpp b/nn/common/ExecutionBurstController.cpp
new file mode 100644
index 0000000..32231d3
--- /dev/null
+++ b/nn/common/ExecutionBurstController.cpp
@@ -0,0 +1,402 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ExecutionBurstController.h"
+
+#include <android-base/logging.h>
+
+namespace android {
+namespace nn {
+namespace {
+constexpr Timing invalidTiming = {UINT64_MAX, UINT64_MAX};
+}  // anonymous namespace
+
+Return<void> ExecutionBurstCallback::getMemories(const hidl_vec<int32_t>& slots,
+                                                 getMemories_cb cb) {
+    std::lock_guard<std::mutex> guard(mMutex);
+
+    // get all memories
+    hidl_vec<hidl_memory> memories(slots.size());
+    for (size_t i = 0; i < slots.size(); ++i) {
+        // if memory is available, return it; otherwise return error
+        auto iter = mSlotToMemoryCache.find(slots[i]);
+        if (iter == mSlotToMemoryCache.end()) {
+            cb(ErrorStatus::INVALID_ARGUMENT, {});
+            return Void();
+        }
+        memories[i] = iter->second;
+    }
+
+    // return successful
+    cb(ErrorStatus::NONE, std::move(memories));
+    return Void();
+}
+
+std::vector<int32_t> ExecutionBurstCallback::getSlots(const hidl_vec<hidl_memory>& memories,
+                                                      const std::vector<intptr_t>& keys) {
+    std::lock_guard<std::mutex> guard(mMutex);
+
+    // retrieve (or bind) all slots corresponding to memories
+    std::vector<int32_t> slots;
+    slots.reserve(memories.size());
+    for (size_t i = 0; i < memories.size(); ++i) {
+        slots.push_back(getSlotLocked(memories[i], keys[i]));
+    }
+    return slots;
+}
+
+std::pair<bool, int32_t> ExecutionBurstCallback::freeMemory(intptr_t key) {
+    std::lock_guard<std::mutex> guard(mMutex);
+
+    auto iter = mMemoryIdToSlotCache.find(key);
+    if (iter != mMemoryIdToSlotCache.end()) {
+        const int32_t slot = iter->second;
+        mMemoryIdToSlotCache.erase(key);
+        mSlotToMemoryCache.erase(slot);
+        return {true, slot};
+    } else {
+        return {false, 0};
+    }
+}
+
+int32_t ExecutionBurstCallback::getSlotLocked(const hidl_memory& memory, intptr_t key) {
+    auto iter = mMemoryIdToSlotCache.find(key);
+    if (iter == mMemoryIdToSlotCache.end()) {
+        const int32_t slot = mNextSlot;
+        mNextSlot = (mNextSlot + 1) % (1 << 30);
+        mMemoryIdToSlotCache[key] = slot;
+        mSlotToMemoryCache[slot] = memory;
+        return slot;
+    } else {
+        const int32_t slot = iter->second;
+        return slot;
+    }
+}
+
+ExecutionBurstController::ExecutionBurstController(
+        std::unique_ptr<FmqRequestChannel> fmqRequestChannel,
+        std::unique_ptr<FmqResultChannel> fmqResultChannel, const sp<IBurstContext>& burstContext,
+        const sp<IPreparedModel>& preparedModel, const sp<ExecutionBurstCallback>& callback,
+        bool blocking)
+    : mFmqRequestChannel(std::move(fmqRequestChannel)),
+      mFmqResultChannel(std::move(fmqResultChannel)),
+      mBurstContext(burstContext),
+      mPreparedModel(preparedModel),
+      mMemoryCache(callback),
+      mUsesFutex(blocking) {}
+
+bool ExecutionBurstController::sendPacket(const std::vector<FmqRequestDatum>& packet) {
+    if (mUsesFutex) {
+        return mFmqRequestChannel->writeBlocking(packet.data(), packet.size());
+    } else {
+        return mFmqRequestChannel->write(packet.data(), packet.size());
+    }
+}
+
+std::vector<FmqResultDatum> ExecutionBurstController::getPacketBlocking() {
+    using discriminator = FmqResultDatum::hidl_discriminator;
+
+    // wait for result packet and read first element of result packet
+    FmqResultDatum datum;
+    bool success = false;
+    if (mUsesFutex) {
+        success = mFmqResultChannel->readBlocking(&datum, 1);
+    } else {
+        // TODO: better handle the case where the service crashes after
+        // receiving the Request but before returning the result.
+        while (!mFmqResultChannel->read(&datum, 1)) {
+        }
+    }
+
+    // validate packet information
+    if (!success || datum.getDiscriminator() != discriminator::packetInformation) {
+        LOG(ERROR) << "FMQ Result packet ill-formed";
+        return {};
+    }
+
+    // unpack packet information
+    const auto& packetInfo = datum.packetInformation();
+    const size_t count = packetInfo.packetSize;
+
+    // retrieve remaining elements
+    // NOTE: all of the data is already available at this point, so there's no
+    // need to do a blocking wait to wait for more data
+    std::vector<FmqResultDatum> packet(count);
+    packet.front() = datum;
+    success = mFmqResultChannel->read(packet.data() + 1, packet.size() - 1);
+
+    if (!success) {
+        return {};
+    }
+
+    return packet;
+}
+
+// serialize a request into a packet
+std::vector<FmqRequestDatum> ExecutionBurstController::serialize(
+        const Request& request, MeasureTiming measure, const std::vector<intptr_t>& memoryIds) {
+    // count how many elements need to be sent for a request
+    size_t count = 2 + request.inputs.size() + request.outputs.size() + request.pools.size();
+    for (const auto& input : request.inputs) {
+        count += input.dimensions.size();
+    }
+    for (const auto& output : request.outputs) {
+        count += output.dimensions.size();
+    }
+
+    // create buffer to temporarily store elements
+    std::vector<FmqRequestDatum> data;
+    data.reserve(count);
+
+    // package packetInfo
+    {
+        FmqRequestDatum datum;
+        datum.packetInformation(
+                {/*.packetSize=*/static_cast<uint32_t>(count),
+                 /*.numberOfInputOperands=*/static_cast<uint32_t>(request.inputs.size()),
+                 /*.numberOfOutputOperands=*/static_cast<uint32_t>(request.outputs.size()),
+                 /*.numberOfPools=*/static_cast<uint32_t>(request.pools.size())});
+        data.push_back(datum);
+    }
+
+    // package input data
+    for (const auto& input : request.inputs) {
+        // package operand information
+        FmqRequestDatum datum;
+        datum.inputOperandInformation(
+                {/*.hasNoValue=*/input.hasNoValue,
+                 /*.location=*/input.location,
+                 /*.numberOfDimensions=*/static_cast<uint32_t>(input.dimensions.size())});
+        data.push_back(datum);
+
+        // package operand dimensions
+        for (uint32_t dimension : input.dimensions) {
+            FmqRequestDatum datum;
+            datum.inputOperandDimensionValue(dimension);
+            data.push_back(datum);
+        }
+    }
+
+    // package output data
+    for (const auto& output : request.outputs) {
+        // package operand information
+        FmqRequestDatum datum;
+        datum.outputOperandInformation(
+                {/*.hasNoValue=*/output.hasNoValue,
+                 /*.location=*/output.location,
+                 /*.numberOfDimensions=*/static_cast<uint32_t>(output.dimensions.size())});
+        data.push_back(datum);
+
+        // package operand dimensions
+        for (uint32_t dimension : output.dimensions) {
+            FmqRequestDatum datum;
+            datum.outputOperandDimensionValue(dimension);
+            data.push_back(datum);
+        }
+    }
+
+    // package pool identifier
+    const std::vector<int32_t> slots = mMemoryCache->getSlots(request.pools, memoryIds);
+    for (int32_t slot : slots) {
+        FmqRequestDatum datum;
+        datum.poolIdentifier(slot);
+        data.push_back(datum);
+    }
+
+    // package measureTiming
+    {
+        FmqRequestDatum datum;
+        datum.measureTiming(measure);
+        data.push_back(datum);
+    }
+
+    // return packet
+    return data;
+}
+
+// deserialize a packet into the result
+std::tuple<ErrorStatus, std::vector<OutputShape>, Timing> ExecutionBurstController::deserialize(
+        const std::vector<FmqResultDatum>& data) {
+    using discriminator = FmqResultDatum::hidl_discriminator;
+
+    std::vector<OutputShape> outputShapes;
+    size_t index = 0;
+
+    // validate packet information
+    if (data[index].getDiscriminator() != discriminator::packetInformation) {
+        LOG(ERROR) << "FMQ Result packet ill-formed";
+        return {ErrorStatus::GENERAL_FAILURE, {}, invalidTiming};
+    }
+
+    // unpackage packet information
+    const FmqResultDatum::PacketInformation& packetInfo = data[index].packetInformation();
+    index++;
+    const uint32_t packetSize = packetInfo.packetSize;
+    const ErrorStatus errorStatus = packetInfo.errorStatus;
+    const uint32_t numberOfOperands = packetInfo.numberOfOperands;
+
+    // unpackage operands
+    for (size_t operand = 0; operand < numberOfOperands; ++operand) {
+        // validate operand information
+        if (data[index].getDiscriminator() != discriminator::operandInformation) {
+            LOG(ERROR) << "FMQ Result packet ill-formed";
+            return {ErrorStatus::GENERAL_FAILURE, {}, invalidTiming};
+        }
+
+        // unpackage operand information
+        const FmqResultDatum::OperandInformation& operandInfo = data[index].operandInformation();
+        index++;
+        const bool isSufficient = operandInfo.isSufficient;
+        const uint32_t numberOfDimensions = operandInfo.numberOfDimensions;
+
+        // unpackage operand dimensions
+        std::vector<uint32_t> dimensions;
+        dimensions.reserve(numberOfDimensions);
+        for (size_t i = 0; i < numberOfDimensions; ++i) {
+            // validate dimension
+            if (data[index].getDiscriminator() != discriminator::operandDimensionValue) {
+                LOG(ERROR) << "FMQ Result packet ill-formed";
+                return {ErrorStatus::GENERAL_FAILURE, {}, invalidTiming};
+            }
+
+            // unpackage dimension
+            const uint32_t dimension = data[index].operandDimensionValue();
+            index++;
+
+            // store result
+            dimensions.push_back(dimension);
+        }
+
+        // store result
+        outputShapes.push_back({/*.dimensions=*/dimensions, /*.isSufficient=*/isSufficient});
+    }
+
+    // validate execution timing
+    if (data[index].getDiscriminator() != discriminator::executionTiming) {
+        LOG(ERROR) << "FMQ Result packet ill-formed";
+        return {ErrorStatus::GENERAL_FAILURE, {}, invalidTiming};
+    }
+
+    // unpackage execution timing
+    const Timing timing = data[index].executionTiming();
+    index++;
+
+    // validate packet information
+    if (index != packetSize) {
+        LOG(ERROR) << "FMQ Result packet ill-formed";
+        return {ErrorStatus::GENERAL_FAILURE, {}, invalidTiming};
+    }
+
+    // return result
+    return std::make_tuple(errorStatus, std::move(outputShapes), timing);
+}
+
+std::tuple<ErrorStatus, std::vector<OutputShape>, Timing> ExecutionBurstController::compute(
+        const Request& request, MeasureTiming measure, const std::vector<intptr_t>& memoryIds) {
+    // serialize request
+    std::vector<FmqRequestDatum> requestData = serialize(request, measure, memoryIds);
+
+    // TODO: handle the case where the serialziation exceeds
+    // kExecutionBurstChannelLength
+
+    // send request packet
+    bool success = sendPacket(requestData);
+    if (!success) {
+        LOG(ERROR) << "Error sending FMQ packet";
+        return {ErrorStatus::GENERAL_FAILURE, {}, invalidTiming};
+    }
+
+    // get result packet
+    const std::vector<FmqResultDatum> resultData = getPacketBlocking();
+    if (resultData.empty()) {
+        LOG(ERROR) << "Error retrieving FMQ packet";
+        return {ErrorStatus::GENERAL_FAILURE, {}, invalidTiming};
+    }
+
+    // deserialize result
+    return deserialize(resultData);
+}
+
+void ExecutionBurstController::freeMemory(intptr_t key) {
+    bool valid;
+    int32_t slot;
+    std::tie(valid, slot) = mMemoryCache->freeMemory(key);
+    if (valid) {
+        mBurstContext->freeMemory(slot).isOk();
+    }
+}
+
+std::unique_ptr<ExecutionBurstController> createExecutionBurstController(
+        const sp<IPreparedModel>& preparedModel, bool blocking) {
+    // check inputs
+    if (preparedModel == nullptr) {
+        LOG(ERROR) << "createExecutionBurstController passed a nullptr";
+        return nullptr;
+    }
+
+    // create callback object
+    sp<ExecutionBurstCallback> callback = new ExecutionBurstCallback();
+    if (callback == nullptr) {
+        LOG(ERROR) << "createExecutionBurstController failed to create callback";
+        return nullptr;
+    }
+
+    // create FMQ objects
+    std::unique_ptr<FmqRequestChannel> fmqRequestChannel{new (std::nothrow) FmqRequestChannel(
+            kExecutionBurstChannelLength, /*confEventFlag=*/blocking)};
+    std::unique_ptr<FmqResultChannel> fmqResultChannel{new (std::nothrow) FmqResultChannel(
+            kExecutionBurstChannelLength, /*confEventFlag=*/blocking)};
+
+    // check FMQ objects
+    if (!fmqRequestChannel || !fmqResultChannel || !fmqRequestChannel->isValid() ||
+        !fmqResultChannel->isValid()) {
+        LOG(ERROR) << "createExecutionBurstController failed to create FastMessageQueue";
+        return nullptr;
+    }
+
+    // descriptors
+    const FmqRequestDescriptor& fmqRequestDescriptor = *fmqRequestChannel->getDesc();
+    const FmqResultDescriptor& fmqResultDescriptor = *fmqResultChannel->getDesc();
+
+    // configure burst
+    ErrorStatus errorStatus;
+    sp<IBurstContext> burstContext;
+    Return<void> ret = preparedModel->configureExecutionBurst(
+            callback, fmqRequestDescriptor, fmqResultDescriptor,
+            [&errorStatus, &burstContext](ErrorStatus status, const sp<IBurstContext>& context) {
+                errorStatus = status;
+                burstContext = context;
+            });
+
+    // check burst
+    if (errorStatus != ErrorStatus::NONE) {
+        LOG(ERROR) << "IPreparedModel::configureExecutionBurst failed with "
+                   << toString(errorStatus);
+        return nullptr;
+    }
+    if (burstContext == nullptr) {
+        LOG(ERROR) << "IPreparedModel::configureExecutionBurst returned nullptr for burst";
+        return nullptr;
+    }
+
+    // make and return controller
+    return std::make_unique<ExecutionBurstController>(std::move(fmqRequestChannel),
+                                                      std::move(fmqResultChannel), burstContext,
+                                                      preparedModel, callback, blocking);
+}
+
+}  // namespace nn
+}  // namespace android
diff --git a/nn/common/ExecutionBurstServer.cpp b/nn/common/ExecutionBurstServer.cpp
new file mode 100644
index 0000000..64a4ee2
--- /dev/null
+++ b/nn/common/ExecutionBurstServer.cpp
@@ -0,0 +1,424 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ExecutionBurstServer.h"
+
+#include <android-base/logging.h>
+
+namespace android {
+namespace nn {
+
+BurstMemoryCache::BurstMemoryCache(const sp<IBurstCallback>& callback) : mCallback(callback) {}
+
+hidl_vec<hidl_memory> BurstMemoryCache::getMemories(const std::vector<int32_t>& slots) {
+    std::lock_guard<std::mutex> guard(mMutex);
+
+    // find unique unknown slots
+    std::vector<int32_t> unknownSlots = slots;
+    std::sort(unknownSlots.begin(), unknownSlots.end());
+    auto last = std::unique(unknownSlots.begin(), unknownSlots.end());
+    unknownSlots.erase(last, unknownSlots.end());
+
+    // retrieve unknown slots
+    ErrorStatus errorStatus = ErrorStatus::GENERAL_FAILURE;
+    std::vector<hidl_memory> returnedMemories;
+    Return<void> ret = mCallback->getMemories(
+            unknownSlots, [&errorStatus, &returnedMemories](ErrorStatus status,
+                                                            const hidl_vec<hidl_memory>& memories) {
+                errorStatus = status;
+                if (status == ErrorStatus::NONE) {
+                    returnedMemories = memories;
+                }
+            });
+
+    if (!ret.isOk() || errorStatus != ErrorStatus::NONE) {
+        LOG(ERROR) << "Error retrieving memories";
+        return {};
+    }
+
+    // add memories to unknown slots
+    for (size_t i = 0; i < unknownSlots.size(); ++i) {
+        mSlotToMemoryCache[unknownSlots[i]] = returnedMemories[i];
+    }
+
+    // get all slots
+    hidl_vec<hidl_memory> memories(slots.size());
+    for (size_t i = 0; i < slots.size(); ++i) {
+        memories[i] = mSlotToMemoryCache[slots[i]];
+    }
+    return memories;
+}
+
+void BurstMemoryCache::freeMemory(int32_t slot) {
+    std::lock_guard<std::mutex> guard(mMutex);
+    mSlotToMemoryCache.erase(slot);
+}
+
+ExecutionBurstServer::ExecutionBurstServer(const sp<IBurstCallback>& callback,
+                                           std::unique_ptr<FmqRequestChannel> requestChannel,
+                                           std::unique_ptr<FmqResultChannel> resultChannel,
+                                           IPreparedModel* preparedModel)
+    : mMemoryCache(callback),
+      mFmqRequestChannel(std::move(requestChannel)),
+      mFmqResultChannel(std::move(resultChannel)),
+      mPreparedModel(preparedModel),
+      mBlocking(mFmqRequestChannel->getEventFlagWord() != nullptr) {
+    // TODO: highly document the threading behavior of this class
+    mWorker = std::async(std::launch::async, [this] { task(); });
+}
+
+ExecutionBurstServer::~ExecutionBurstServer() {
+    // set teardown flag
+    mTeardown = true;
+
+    // force unblock
+    if (mBlocking) {
+        // TODO: look for a different/better way to signal/notify the futex to wake
+        // up any thread waiting on it
+        FmqRequestDatum datum;
+        datum.packetInformation({/*.packetSize=*/0, /*.numberOfInputOperands=*/0,
+                                 /*.numberOfOutputOperands=*/0, /*.numberOfPools=*/0});
+        mFmqRequestChannel->writeBlocking(&datum, 1);
+    }
+
+    // wait for task thread to end
+    mWorker.wait();
+}
+
+bool ExecutionBurstServer::sendPacket(const std::vector<FmqResultDatum>& packet) {
+    if (mTeardown) {
+        return false;
+    }
+
+    if (mBlocking) {
+        return mFmqResultChannel->writeBlocking(packet.data(), packet.size());
+    } else {
+        return mFmqResultChannel->write(packet.data(), packet.size());
+    }
+}
+
+std::vector<FmqRequestDatum> ExecutionBurstServer::getPacketBlocking() {
+    using discriminator = FmqRequestDatum::hidl_discriminator;
+
+    if (mTeardown) {
+        return {};
+    }
+
+    // wait for request packet and read first element of result packet
+    FmqRequestDatum datum;
+    bool success = false;
+    if (mBlocking) {
+        success = mFmqRequestChannel->readBlocking(&datum, 1);
+    } else {
+        while ((success = !mTeardown.load(std::memory_order_relaxed)) &&
+               !mFmqRequestChannel->read(&datum, 1)) {
+        }
+    }
+
+    // terminate loop
+    if (mTeardown) {
+        return {};
+    }
+
+    // validate packet information
+    if (!success || datum.getDiscriminator() != discriminator::packetInformation) {
+        LOG(ERROR) << "FMQ Request packet ill-formed";
+        return {};
+    }
+
+    // unpack packet information
+    const auto& packetInfo = datum.packetInformation();
+    const size_t count = packetInfo.packetSize;
+
+    // retrieve remaining elements
+    // NOTE: all of the data is already available at this point, so there's no
+    // need to do a blocking wait to wait for more data
+    std::vector<FmqRequestDatum> packet(count);
+    packet.front() = datum;
+    success = mFmqRequestChannel->read(packet.data() + 1, packet.size() - 1);
+
+    if (!success) {
+        return {};
+    }
+
+    return packet;
+}
+
+// deserialize request
+std::pair<Request, MeasureTiming> ExecutionBurstServer::deserialize(
+        const std::vector<FmqRequestDatum>& data) {
+    using discriminator = FmqRequestDatum::hidl_discriminator;
+
+    Request request;
+    size_t index = 0;
+
+    // validate packet information
+    if (data[index].getDiscriminator() != discriminator::packetInformation) {
+        LOG(ERROR) << "FMQ Request packet ill-formed";
+        return {{}, MeasureTiming::NO};
+    }
+
+    // unpackage packet information
+    const FmqRequestDatum::PacketInformation& packetInfo = data[index].packetInformation();
+    index++;
+    const uint32_t packetSize = packetInfo.packetSize;
+    const uint32_t numberOfInputOperands = packetInfo.numberOfInputOperands;
+    const uint32_t numberOfOutputOperands = packetInfo.numberOfOutputOperands;
+    const uint32_t numberOfPools = packetInfo.numberOfPools;
+
+    // unpackage input operands
+    std::vector<RequestArgument> inputs;
+    inputs.reserve(numberOfInputOperands);
+    for (size_t operand = 0; operand < numberOfInputOperands; ++operand) {
+        // validate input operand information
+        if (data[index].getDiscriminator() != discriminator::inputOperandInformation) {
+            LOG(ERROR) << "FMQ Request packet ill-formed";
+            return {{}, MeasureTiming::NO};
+        }
+
+        // unpackage operand information
+        const FmqRequestDatum::OperandInformation& operandInfo =
+                data[index].inputOperandInformation();
+        index++;
+        const bool hasNoValue = operandInfo.hasNoValue;
+        const DataLocation location = operandInfo.location;
+        const uint32_t numberOfDimensions = operandInfo.numberOfDimensions;
+
+        // unpackage operand dimensions
+        std::vector<uint32_t> dimensions;
+        dimensions.reserve(numberOfDimensions);
+        for (size_t i = 0; i < numberOfDimensions; ++i) {
+            // validate dimension
+            if (data[index].getDiscriminator() != discriminator::inputOperandDimensionValue) {
+                LOG(ERROR) << "FMQ Request packet ill-formed";
+                return {{}, MeasureTiming::NO};
+            }
+
+            // unpackage dimension
+            const uint32_t dimension = data[index].inputOperandDimensionValue();
+            index++;
+
+            // store result
+            dimensions.push_back(dimension);
+        }
+
+        // store result
+        inputs.push_back(
+                {/*.hasNoValue=*/hasNoValue, /*.location=*/location, /*.dimensions=*/dimensions});
+    }
+
+    // unpackage output operands
+    std::vector<RequestArgument> outputs;
+    outputs.reserve(numberOfOutputOperands);
+    for (size_t operand = 0; operand < numberOfOutputOperands; ++operand) {
+        // validate output operand information
+        if (data[index].getDiscriminator() != discriminator::outputOperandInformation) {
+            LOG(ERROR) << "FMQ Request packet ill-formed";
+            return {{}, MeasureTiming::NO};
+        }
+
+        // unpackage operand information
+        const FmqRequestDatum::OperandInformation& operandInfo =
+                data[index].outputOperandInformation();
+        index++;
+        const bool hasNoValue = operandInfo.hasNoValue;
+        const DataLocation location = operandInfo.location;
+        const uint32_t numberOfDimensions = operandInfo.numberOfDimensions;
+
+        // unpackage operand dimensions
+        std::vector<uint32_t> dimensions;
+        dimensions.reserve(numberOfDimensions);
+        for (size_t i = 0; i < numberOfDimensions; ++i) {
+            // validate dimension
+            if (data[index].getDiscriminator() != discriminator::outputOperandDimensionValue) {
+                LOG(ERROR) << "FMQ Request packet ill-formed";
+                return {{}, MeasureTiming::NO};
+            }
+
+            // unpackage dimension
+            const uint32_t dimension = data[index].outputOperandDimensionValue();
+            index++;
+
+            // store result
+            dimensions.push_back(dimension);
+        }
+
+        // store result
+        outputs.push_back(
+                {/*.hasNoValue=*/hasNoValue, /*.location=*/location, /*.dimensions=*/dimensions});
+    }
+
+    // unpackage pools
+    std::vector<int32_t> slots;
+    slots.reserve(numberOfPools);
+    for (size_t pool = 0; pool < numberOfPools; ++pool) {
+        // validate input operand information
+        if (data[index].getDiscriminator() != discriminator::poolIdentifier) {
+            LOG(ERROR) << "FMQ Request packet ill-formed";
+            return {{}, MeasureTiming::NO};
+        }
+
+        // unpackage operand information
+        const int32_t poolId = data[index].poolIdentifier();
+        index++;
+
+        // store result
+        slots.push_back(poolId);
+    }
+    hidl_vec<hidl_memory> pools = mMemoryCache.getMemories(slots);
+
+    // validate measureTiming
+    if (data[index].getDiscriminator() != discriminator::measureTiming) {
+        LOG(ERROR) << "FMQ Request packet ill-formed";
+        return {{}, MeasureTiming::NO};
+    }
+
+    // unpackage measureTiming
+    const MeasureTiming measure = data[index].measureTiming();
+    index++;
+
+    // validate packet information
+    if (index != packetSize) {
+        LOG(ERROR) << "FMQ Result packet ill-formed";
+        return {{}, MeasureTiming::NO};
+    }
+
+    // return request
+    return {{/*.inputs=*/inputs, /*.outputs=*/outputs, /*.pools=*/std::move(pools)}, measure};
+}
+
+// serialize result
+std::vector<FmqResultDatum> ExecutionBurstServer::serialize(
+        ErrorStatus errorStatus, const std::vector<OutputShape>& outputShapes, Timing timing) {
+    // count how many elements need to be sent for a request
+    size_t count = 2 + outputShapes.size();
+    for (const auto& outputShape : outputShapes) {
+        count += outputShape.dimensions.size();
+    }
+
+    // create buffer to temporarily store elements
+    std::vector<FmqResultDatum> data;
+    data.reserve(count);
+
+    // package packetInfo
+    {
+        FmqResultDatum datum;
+        datum.packetInformation({/*.packetSize=*/static_cast<uint32_t>(count),
+                                 /*.errorStatus=*/errorStatus,
+                                 /*.numberOfOperands=*/static_cast<uint32_t>(outputShapes.size())});
+        data.push_back(datum);
+    }
+
+    // package output shape data
+    for (const auto& operand : outputShapes) {
+        // package operand information
+        FmqResultDatum datum;
+        datum.operandInformation(
+                {/*.isSufficient=*/operand.isSufficient,
+                 /*.numberOfDimensions=*/static_cast<uint32_t>(operand.dimensions.size())});
+        data.push_back(datum);
+
+        // package operand dimensions
+        for (uint32_t dimension : operand.dimensions) {
+            FmqResultDatum datum;
+            datum.operandDimensionValue(dimension);
+            data.push_back(datum);
+        }
+    }
+
+    // package executionTiming
+    {
+        FmqResultDatum datum;
+        datum.executionTiming(timing);
+        data.push_back(datum);
+    }
+
+    // return result
+    return data;
+}
+
+Return<void> ExecutionBurstServer::freeMemory(int32_t slot) {
+    mMemoryCache.freeMemory(slot);
+    return Void();
+}
+
+void ExecutionBurstServer::task() {
+    while (!mTeardown) {
+        // receive request
+        const std::vector<FmqRequestDatum> requestData = getPacketBlocking();
+
+        // terminate loop
+        if (mTeardown) {
+            return;
+        }
+
+        // continue processing
+        Request request;
+        MeasureTiming measure;
+        std::tie(request, measure) = deserialize(requestData);
+
+        // perform computation
+        ErrorStatus errorStatus = ErrorStatus::GENERAL_FAILURE;
+        std::vector<OutputShape> outputShapes;
+        Timing returnedTiming;
+        mPreparedModel
+                ->executeSynchronously(request, measure,
+                                       [&errorStatus, &outputShapes, &returnedTiming](
+                                               ErrorStatus status,
+                                               const hidl_vec<OutputShape>& shapes, Timing timing) {
+                                           errorStatus = status;
+                                           outputShapes = shapes;
+                                           returnedTiming = timing;
+                                       })
+                .isOk();
+
+        // return result
+        const std::vector<FmqResultDatum> result =
+                serialize(errorStatus, outputShapes, returnedTiming);
+        sendPacket(result);
+    }
+}
+
+sp<IBurstContext> createBurstContext(const sp<IBurstCallback>& callback,
+                                     const MQDescriptorSync<FmqRequestDatum>& requestChannel,
+                                     const MQDescriptorSync<FmqResultDatum>& resultChannel,
+                                     IPreparedModel* preparedModel) {
+    // check inputs
+    if (callback == nullptr || preparedModel == nullptr) {
+        LOG(ERROR) << "createExecutionBurstServer passed a nullptr";
+        return nullptr;
+    }
+
+    // create FMQ objects
+    std::unique_ptr<FmqRequestChannel> fmqRequestChannel{new (std::nothrow)
+                                                                 FmqRequestChannel(requestChannel)};
+    std::unique_ptr<FmqResultChannel> fmqResultChannel{new (std::nothrow)
+                                                               FmqResultChannel(resultChannel)};
+
+    // check FMQ objects
+    if (!fmqRequestChannel || !fmqResultChannel || !fmqRequestChannel->isValid() ||
+        !fmqResultChannel->isValid()) {
+        LOG(ERROR) << "createExecutionBurstServer failed to create FastMessageQueue";
+        return nullptr;
+    }
+
+    // make and return context
+    return new ExecutionBurstServer(callback, std::move(fmqRequestChannel),
+                                    std::move(fmqResultChannel), preparedModel);
+}
+
+}  // namespace nn
+}  // namespace android
diff --git a/nn/common/include/ExecutionBurstController.h b/nn/common/include/ExecutionBurstController.h
new file mode 100644
index 0000000..bf36470
--- /dev/null
+++ b/nn/common/include/ExecutionBurstController.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_ML_NN_RUNTIME_EXECUTION_BURST_CONTROLLER_H
+#define ANDROID_ML_NN_RUNTIME_EXECUTION_BURST_CONTROLLER_H
+
+#include <android-base/macros.h>
+#include <fmq/MessageQueue.h>
+#include <hidl/MQDescriptor.h>
+#include <atomic>
+#include <map>
+#include <memory>
+#include <mutex>
+#include <tuple>
+#include "HalInterfaces.h"
+
+namespace android {
+namespace nn {
+
+using ::android::hardware::kSynchronizedReadWrite;
+using ::android::hardware::MessageQueue;
+using ::android::hardware::MQDescriptorSync;
+using FmqRequestChannel = MessageQueue<FmqRequestDatum, kSynchronizedReadWrite>;
+using FmqResultChannel = MessageQueue<FmqResultDatum, kSynchronizedReadWrite>;
+using FmqRequestDescriptor = MQDescriptorSync<FmqRequestDatum>;
+using FmqResultDescriptor = MQDescriptorSync<FmqResultDatum>;
+
+/**
+ * Number of elements in the FMQ.
+ */
+constexpr const size_t kExecutionBurstChannelLength = 1024;
+
+/**
+ * NN runtime burst callback object and memory cache.
+ *
+ * ExecutionBurstCallback associates a hidl_memory object with a slot number to
+ * be passed across FMQ. The ExecutionBurstServer can use this callback to
+ * retrieve this hidl_memory corresponding to the slot via HIDL.
+ *
+ * Whenever a hidl_memory object is copied, it will duplicate the underlying
+ * file descriptor. Because the NN runtime currently copies the hidl_memory on
+ * each execution, it is difficult to associate hidl_memory objects with
+ * previously cached hidl_memory objects. For this reason, callers of this class
+ * must pair each hidl_memory object with an associated key. For efficiency, if
+ * two hidl_memory objects represent the same underlying buffer, they must use
+ * the same key.
+ */
+class ExecutionBurstCallback : public IBurstCallback {
+    DISALLOW_COPY_AND_ASSIGN(ExecutionBurstCallback);
+
+   public:
+    ExecutionBurstCallback() = default;
+
+    Return<void> getMemories(const hidl_vec<int32_t>& slots, getMemories_cb cb) override;
+
+    std::vector<int32_t> getSlots(const hidl_vec<hidl_memory>& memories,
+                                  const std::vector<intptr_t>& keys);
+    int32_t getSlot(const hidl_memory& memory, intptr_t key);
+    std::pair<bool, int32_t> freeMemory(intptr_t key);
+
+   private:
+    int32_t getSlotLocked(const hidl_memory& memory, intptr_t key);
+
+    std::mutex mMutex;
+    int32_t mNextSlot = 0;
+    std::map<intptr_t, int32_t> mMemoryIdToSlotCache;
+    std::map<int32_t, hidl_memory> mSlotToMemoryCache;
+};
+
+/**
+ * NN runtime burst object
+ *
+ * TODO: provide high-level description of class
+ */
+class ExecutionBurstController {
+    DISALLOW_IMPLICIT_CONSTRUCTORS(ExecutionBurstController);
+
+   public:
+    ExecutionBurstController(std::unique_ptr<FmqRequestChannel> fmqRequestChannel,
+                             std::unique_ptr<FmqResultChannel> fmqResultChannel,
+                             const sp<IBurstContext>& burstContext,
+                             const sp<IPreparedModel>& preparedModel,
+                             const sp<ExecutionBurstCallback>& callback, bool blocking);
+
+    /**
+     * Execute a request on a model.
+     *
+     * @param request Arguments to be executed on a model.
+     * @return status and output shape of the execution.
+     */
+    std::tuple<ErrorStatus, std::vector<OutputShape>, Timing> compute(
+            const Request& request, MeasureTiming measure, const std::vector<intptr_t>& memoryIds);
+
+    /**
+     * Propagate a user's freeing of memory to the service.
+     *
+     * @param key Key corresponding to the memory object.
+     */
+    void freeMemory(intptr_t key);
+
+   private:
+    std::vector<FmqResultDatum> getPacketBlocking();
+    bool sendPacket(const std::vector<FmqRequestDatum>& packet);
+    std::vector<FmqRequestDatum> serialize(const Request& request, MeasureTiming measure,
+                                           const std::vector<intptr_t>& memoryIds);
+    std::tuple<ErrorStatus, std::vector<OutputShape>, Timing> deserialize(
+            const std::vector<FmqResultDatum>& data);
+
+    const std::unique_ptr<FmqRequestChannel> mFmqRequestChannel;
+    const std::unique_ptr<FmqResultChannel> mFmqResultChannel;
+    const sp<IBurstContext> mBurstContext;
+    const sp<IPreparedModel> mPreparedModel;
+    const sp<ExecutionBurstCallback> mMemoryCache;
+    const bool mUsesFutex;
+};
+
+/**
+ * Creates a burst controller on a prepared model.
+ *
+ * @param preparedModel Model prepared for execution to execute on.
+ * @param blocking 'true' if the FMQ should block until data is available.
+ * @return ExecutionBurstController Execution burst controller object.
+ */
+std::unique_ptr<ExecutionBurstController> createExecutionBurstController(
+        const sp<IPreparedModel>& preparedModel, bool blocking);
+
+}  // namespace nn
+}  // namespace android
+
+#endif  // ANDROID_ML_NN_RUNTIME_EXECUTION_BURST_CONTROLLER_H
diff --git a/nn/common/include/ExecutionBurstServer.h b/nn/common/include/ExecutionBurstServer.h
new file mode 100644
index 0000000..13dfaaf
--- /dev/null
+++ b/nn/common/include/ExecutionBurstServer.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_ML_NN_COMMON_EXECUTION_BURST_SERVER_H
+#define ANDROID_ML_NN_COMMON_EXECUTION_BURST_SERVER_H
+
+#include <android-base/macros.h>
+#include <fmq/MessageQueue.h>
+#include <hidl/MQDescriptor.h>
+#include <atomic>
+#include <future>
+#include <map>
+#include <set>
+#include "HalInterfaces.h"
+
+namespace android {
+namespace nn {
+
+using ::android::hardware::kSynchronizedReadWrite;
+using ::android::hardware::MessageQueue;
+using ::android::hardware::MQDescriptorSync;
+using FmqRequestChannel = MessageQueue<FmqRequestDatum, kSynchronizedReadWrite>;
+using FmqResultChannel = MessageQueue<FmqResultDatum, kSynchronizedReadWrite>;
+using FmqRequestDescriptor = MQDescriptorSync<FmqRequestDatum>;
+using FmqResultDescriptor = MQDescriptorSync<FmqResultDatum>;
+
+/**
+ */
+class BurstMemoryCache {
+    DISALLOW_IMPLICIT_CONSTRUCTORS(BurstMemoryCache);
+
+   public:
+    BurstMemoryCache(const sp<IBurstCallback>& callback);
+
+    hidl_vec<hidl_memory> getMemories(const std::vector<int32_t>& slots);
+    void freeMemory(int32_t slot);
+
+   private:
+    std::mutex mMutex;
+    const sp<IBurstCallback> mCallback;
+    std::map<int32_t, hidl_memory> mSlotToMemoryCache;
+};
+
+/**
+ * NN server burst object
+ */
+class ExecutionBurstServer : public IBurstContext {
+    DISALLOW_IMPLICIT_CONSTRUCTORS(ExecutionBurstServer);
+
+   public:
+    ExecutionBurstServer(const sp<IBurstCallback>& callback,
+                         std::unique_ptr<FmqRequestChannel> requestChannel,
+                         std::unique_ptr<FmqResultChannel> resultChannel,
+                         IPreparedModel* preparedModel);
+    ~ExecutionBurstServer();
+
+    Return<void> freeMemory(int32_t slot) override;
+
+   private:
+    bool sendPacket(const std::vector<FmqResultDatum>& packet);
+    std::vector<FmqRequestDatum> getPacketBlocking();
+    std::vector<FmqResultDatum> serialize(ErrorStatus errorStatus,
+                                          const std::vector<OutputShape>& outputShapes,
+                                          Timing timing);
+    std::pair<Request, MeasureTiming> deserialize(const std::vector<FmqRequestDatum>& data);
+    void task();
+
+    BurstMemoryCache mMemoryCache;
+    std::atomic<bool> mTeardown{false};
+    std::future<void> mWorker;
+    const std::unique_ptr<FmqRequestChannel> mFmqRequestChannel;
+    const std::unique_ptr<FmqResultChannel> mFmqResultChannel;
+    IPreparedModel* mPreparedModel;
+    const bool mBlocking;
+};
+
+/**
+ * Create automated context to manage FMQ-based executions.
+ *
+ * This function is intended to be used by a service to automatically:
+ * 1) Receive data from a provided FMQ
+ * 2) Execute a model with the given information
+ * 3) Send the result to the created FMQ
+ *
+ * @param callback Callback used to retrieve memories corresponding to
+ *                 unrecognized slots.
+ * @param requestChannel Input FMQ channel through which the client passes the
+ *                       request to the service.
+ * @param requestChannel Output FMQ channel from which the client can retrieve
+ *                       the result of the execution.
+ * @param preparedModel PreparedModel that the burst object was created from.
+ *                      This will be used to synchronously perform the
+ *                      execution.
+ * @result IBurstContext Handle to the burst context.
+ */
+::android::sp<::android::hardware::neuralnetworks::V1_2::IBurstContext> createBurstContext(
+        const sp<::android::hardware::neuralnetworks::V1_2::IBurstCallback>& callback,
+        const ::android::hardware::MQDescriptorSync<
+                ::android::hardware::neuralnetworks::V1_2::FmqRequestDatum>& requestChannel,
+        const ::android::hardware::MQDescriptorSync<
+                ::android::hardware::neuralnetworks::V1_2::FmqResultDatum>& resultChannel,
+        ::android::hardware::neuralnetworks::V1_2::IPreparedModel* preparedModel);
+
+}  // namespace nn
+}  // namespace android
+
+#endif  // ANDROID_ML_NN_COMMON_EXECUTION_BURST_SERVER_H
diff --git a/nn/common/include/HalInterfaces.h b/nn/common/include/HalInterfaces.h
index 0cf6e53..240ed8c 100644
--- a/nn/common/include/HalInterfaces.h
+++ b/nn/common/include/HalInterfaces.h
@@ -33,6 +33,7 @@
 #include <android/hidl/memory/1.0/IMemory.h>
 #include <hidlmemory/mapping.h>
 
+using ::android::sp;
 using ::android::hardware::hidl_memory;
 using ::android::hardware::hidl_string;
 using ::android::hardware::hidl_vec;
@@ -49,6 +50,10 @@
 using ::android::hardware::neuralnetworks::V1_1::Capabilities;
 using ::android::hardware::neuralnetworks::V1_1::ExecutionPreference;
 using ::android::hardware::neuralnetworks::V1_2::DeviceType;
+using ::android::hardware::neuralnetworks::V1_2::FmqRequestDatum;
+using ::android::hardware::neuralnetworks::V1_2::FmqResultDatum;
+using ::android::hardware::neuralnetworks::V1_2::IBurstCallback;
+using ::android::hardware::neuralnetworks::V1_2::IBurstContext;
 using ::android::hardware::neuralnetworks::V1_2::IDevice;
 using ::android::hardware::neuralnetworks::V1_2::IExecutionCallback;
 using ::android::hardware::neuralnetworks::V1_2::IPreparedModel;
diff --git a/nn/driver/sample/SampleDriver.cpp b/nn/driver/sample/SampleDriver.cpp
index 9baf439..3192ed4 100644
--- a/nn/driver/sample/SampleDriver.cpp
+++ b/nn/driver/sample/SampleDriver.cpp
@@ -19,6 +19,7 @@
 #include "SampleDriver.h"
 
 #include "CpuExecutor.h"
+#include "ExecutionBurstServer.h"
 #include "HalInterfaces.h"
 #include "Tracing.h"
 #include "ValidateHal.h"
@@ -308,15 +309,22 @@
 }
 
 Return<void> SamplePreparedModel::configureExecutionBurst(
-        const sp<V1_2::IBurstCallback>& /*callback*/,
-        const MQDescriptorSync<V1_2::FmqRequestDatum>& /*requestChannel*/,
-        const MQDescriptorSync<V1_2::FmqResultDatum>& /*resultChannel*/,
+        const sp<V1_2::IBurstCallback>& callback,
+        const MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel,
+        const MQDescriptorSync<V1_2::FmqResultDatum>& resultChannel,
         configureExecutionBurst_cb cb) {
     NNTRACE_FULL(NNTRACE_LAYER_DRIVER, NNTRACE_PHASE_EXECUTION,
                  "SampleDriver::configureExecutionBurst");
 
-    // TODO in subsequent CL
-    cb(ErrorStatus::NONE, nullptr);
+    const sp<V1_2::IBurstContext> burst =
+            createBurstContext(callback, requestChannel, resultChannel, this);
+
+    if (burst == nullptr) {
+        cb(ErrorStatus::GENERAL_FAILURE, {});
+    } else {
+        cb(ErrorStatus::NONE, burst);
+    }
+
     return Void();
 }
 
diff --git a/nn/runtime/test/TestExecution.cpp b/nn/runtime/test/TestExecution.cpp
index 3ee7e33..8d93efd 100644
--- a/nn/runtime/test/TestExecution.cpp
+++ b/nn/runtime/test/TestExecution.cpp
@@ -111,7 +111,7 @@
             return mPreparedModelV1_2->configureExecutionBurst(callback, requestChannel,
                                                                resultChannel, cb);
         } else {
-            cb(ErrorStatus::DEVICE_UNAVAILABLE, nullptr);
+            cb(mErrorStatus, nullptr);
             return Void();
         }
     }