Support message CRCs in the transport protocol.

This adds CRC-16 checksums for the call_application() function. The
command from the master includes and additional packet containing the
CRC which the slave can verify. The status from the slave after an app
has processed the command included the CRC of the reply data and the
status message itself.

The protocol is backward compatible by detecting the legacy protocol
from the status response and ignoring any CRCs.

In the event of a CRC failure when reading the status or response data,
the read operation is retried 3 times. In the event of a CRC failure of
the request, the transaction is restarted a maximum of 3 times.

Bug: 66104849
Change-Id: Id8c8e1ce0425af66a3a41e59cf7bb3843a4bf740
diff --git a/libnos_transport/Android.bp b/libnos_transport/Android.bp
index 9e4703e..bb8dd4f 100644
--- a/libnos_transport/Android.bp
+++ b/libnos_transport/Android.bp
@@ -18,6 +18,7 @@
     name: "libnos_transport",
     srcs: [
         "transport.c",
+        "crc16.c",
     ],
     defaults: ["nos_cc_host_supported_defaults"],
     cflags: [
diff --git a/libnos_transport/BUILD b/libnos_transport/BUILD
index 79e253d..f101166 100644
--- a/libnos_transport/BUILD
+++ b/libnos_transport/BUILD
@@ -2,6 +2,8 @@
     name = "libnos_transport",
     srcs = [
         "transport.c",
+        "crc16.c",
+        "crc16.h",
     ],
     hdrs = [
         "include/nos/transport.h",
diff --git a/libnos_transport/crc16.c b/libnos_transport/crc16.c
new file mode 100644
index 0000000..a6c3c5a
--- /dev/null
+++ b/libnos_transport/crc16.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "crc16.h"
+
+static const uint16_t crc16_table[256] = {
+  0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241,
+  0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440,
+  0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40,
+  0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841,
+  0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40,
+  0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41,
+  0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641,
+  0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040,
+  0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240,
+  0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441,
+  0x3C00, 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41,
+  0xFA01, 0x3AC0, 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840,
+  0x2800, 0xE8C1, 0xE981, 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41,
+  0xEE01, 0x2EC0, 0x2F80, 0xEF41, 0x2D00, 0xEDC1, 0xEC81, 0x2C40,
+  0xE401, 0x24C0, 0x2580, 0xE541, 0x2700, 0xE7C1, 0xE681, 0x2640,
+  0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0, 0x2080, 0xE041,
+  0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281, 0x6240,
+  0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441,
+  0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41,
+  0xAA01, 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840,
+  0x7800, 0xB8C1, 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41,
+  0xBE01, 0x7EC0, 0x7F80, 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40,
+  0xB401, 0x74C0, 0x7580, 0xB541, 0x7700, 0xB7C1, 0xB681, 0x7640,
+  0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101, 0x71C0, 0x7080, 0xB041,
+  0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0, 0x5280, 0x9241,
+  0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481, 0x5440,
+  0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40,
+  0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841,
+  0x8801, 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40,
+  0x4E00, 0x8EC1, 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41,
+  0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641,
+  0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040
+};
+
+uint16_t crc16_update(const void *buf, uint32_t len, uint16_t crc) {
+  uint32_t i;
+  const uint8_t *bytes = buf;
+  for (i = 0; i < len; ++i) {
+    crc = crc16_table[((crc >> 8) ^ *bytes++) & 0xFF] ^ (crc << 8);
+  }
+  return crc;
+}
+
+uint16_t crc16(const void *buf, uint32_t len) {
+  return crc16_update(buf, len, 0);
+}
diff --git a/libnos_transport/crc16.h b/libnos_transport/crc16.h
new file mode 100644
index 0000000..5b9838b
--- /dev/null
+++ b/libnos_transport/crc16.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef NOS_TRANSPORT_CRC16_H
+#define NOS_TRANSPORT_CRC16_H
+
+#include <stdint.h>
+
+/**
+ * Calculate the CRC-16 of the data.
+ *
+ * This uses the CRC-16-IBM variant: 0x8005 (x^16 + x^15 + x^2 + 1)
+ */
+uint16_t crc16(const void *buf, uint32_t len);
+
+/**
+ * Calculates the CRC-16 of the data to extend the given crc.
+ *
+ * CRC is a linear function so crc16( x | y ) == crc16(x, crc16(y)).
+ *
+ * This uses the CRC-16-IBM variant: 0x8005 (x^16 + x^15 + x^2 + 1)
+ */
+uint16_t crc16_update(const void *buf, uint32_t len, uint16_t crc);
+
+#endif /* NOS_TRANSPORT_CRC16_H */
diff --git a/libnos_transport/transport.c b/libnos_transport/transport.c
index 6f09be0..289b1a6 100644
--- a/libnos_transport/transport.c
+++ b/libnos_transport/transport.c
@@ -18,6 +18,7 @@
 
 #include <errno.h>
 #include <stdarg.h>
+#include <stdbool.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
@@ -25,6 +26,8 @@
 
 #include <application.h>
 
+#include "crc16.h"
+
 /* Note: evaluates expressions multiple times */
 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
 
@@ -60,8 +63,24 @@
 #define RETRY_COUNT 25
 #define RETRY_WAIT_TIME_US 5000
 
+/* In case of CRC error, try to retransmit */
+#define CRC_RETRY_COUNT 3
+
+struct transport_context {
+  const struct nos_device *dev;
+  uint8_t app_id;
+  uint16_t params;
+  const uint8_t *args;
+  uint32_t arg_len;
+  uint8_t *reply;
+  uint32_t *reply_len;
+};
+
+/*
+ * Read a datagram from the device, correctly handling retries.
+ */
 static int nos_device_read(const struct nos_device *dev, uint32_t command,
-                           uint8_t *buf, uint32_t len) {
+                           void *buf, uint32_t len) {
   int retries = RETRY_COUNT;
   while (retries--) {
     int err = dev->ops.read(dev->ctx, command, buf, len);
@@ -83,8 +102,11 @@
   return ETIMEDOUT;
 }
 
+/*
+ * Write a datagram to the device, correctly handling retries.
+ */
 static int nos_device_write(const struct nos_device *dev, uint32_t command,
-                            uint8_t *buf, uint32_t len) {
+                            const void *buf, uint32_t len) {
   int retries = RETRY_COUNT;
   while (retries--) {
     int err = dev->ops.write(dev->ctx, command, buf, len);
@@ -106,174 +128,316 @@
   return ETIMEDOUT;
 }
 
-/* status is non-zero on error */
-static int get_status(const struct nos_device *dev,
-                      uint8_t app_id, uint32_t *status, uint16_t *ulen)
-{
-  uint8_t buf[6];
-  uint32_t command = CMD_ID(app_id) | CMD_IS_READ | CMD_TRANSPORT;
+#define GET_STATUS_OK              0
+#define GET_STATUS_ERROR_IO       -1
+#define GET_STATUS_ERROR_PROTOCOL -2
 
-  if (0 != nos_device_read(dev, command, buf, sizeof(buf))) {
-    NLOGE("Failed to read device status");
-    return -1;
+/*
+ * Get the status regardless of protocol version. This means some fields might
+ * not be set meaningfully so the caller must check the version before accessing
+ * them.
+ *
+ * Returns non-zero on error.
+ */
+static int get_status(const struct transport_context *ctx,
+                      struct transport_status *out) {
+  int retries = CRC_RETRY_COUNT;
+  while (retries--) {
+    /* Get the status from the device */
+    union {
+      struct transport_legacy_status legacy;
+      struct transport_status current;
+    } status;
+    const uint32_t command = CMD_ID(ctx->app_id) | CMD_IS_READ | CMD_TRANSPORT;
+    if (0 != nos_device_read(ctx->dev, command, &status, sizeof(status))) {
+      NLOGE("Failed to read device status");
+      return GET_STATUS_ERROR_IO;
+    }
+
+    /*
+     * Identify the legacy protocol. This could have been caused by a bit error
+     * but which would result in the wrong status and reply_len being used. If
+     * it is the legacy protocol, transform it to V1.
+     */
+    const bool is_legacy = (status.current.magic != TRANSPORT_STATUS_MAGIC);
+    /* TODO: deprecate the legacy protocol when it is safe to do so */
+    if (is_legacy) {
+      out->version = TRANSPORT_LEGACY;
+      out->status = status.legacy.status;
+      out->reply_len = status.legacy.reply_len;
+      return GET_STATUS_OK;
+    }
+
+    /* Check the CRC, if it fails we will retry */
+    const uint16_t their_crc = status.current.crc;
+    status.current.crc = 0;
+    const uint16_t our_crc = crc16(&status, sizeof(status));
+    if (their_crc != our_crc) {
+      NLOGE("Status CRC mismatch: theirs=%04x ours=%04x", their_crc, our_crc);
+      continue;
+    }
+
+    /* Check this is a version we recognise */
+    if (status.current.version != TRANSPORT_V1) {
+      NLOGE("Don't recognise transport version: %d", status.current.version);
+      return GET_STATUS_ERROR_PROTOCOL;
+    }
+
+    /* It all looks good */
+    *out = status.current;
+    return GET_STATUS_OK;
   }
 
-  /* The read operation is successful */
-  *status = *(uint32_t *)buf;
-  *ulen = *(uint16_t *)(buf + 4);
-  return 0;
+  NLOGE("Unable to get valid checksum on status");
+  return GET_STATUS_ERROR_PROTOCOL;
 }
 
-static int clear_status(const struct nos_device *dev, uint8_t app_id)
-{
-  uint32_t command = CMD_ID(app_id) | CMD_TRANSPORT;
-
-  if (0 != nos_device_write(dev, command, 0, 0)) {
+static int clear_status(const struct transport_context *ctx) {
+  const uint32_t command = CMD_ID(ctx->app_id) | CMD_TRANSPORT;
+  if (nos_device_write(ctx->dev, command, 0, 0) != 0) {
     NLOGE("Failed to clear device status");
     return -1;
   }
-
   return 0;
 }
 
-uint32_t nos_call_application(const struct nos_device *dev,
-                              uint8_t app_id, uint16_t params,
-                              const uint8_t *args, uint32_t arg_len,
-                              uint8_t *reply, uint32_t *reply_len)
-{
-  uint32_t command;
-  uint8_t buf[MAX_DEVICE_TRANSFER];
-  uint32_t status;
-  uint16_t ulen;
-  uint32_t poll_count = 0;
+/*
+ * Ensure that the app is in an idle state ready to handle the transaction.
+ */
+static uint32_t make_ready(const struct transport_context *ctx) {
+  struct transport_status status;
+  int ret = get_status(ctx, &status);
 
-  if (get_status(dev, app_id, &status, &ulen) != 0) {
+  if (ret == GET_STATUS_OK) {
+    NLOGD("Inspection status=0x%08x reply_len=%d protocol=%s",
+          status.status, status.reply_len,
+          status.version == TRANSPORT_LEGACY ? "legacy" : "current");
+    /* If it's already idle then we're ready to proceed */
+    if (status.status == APP_STATUS_IDLE) {
+      return APP_SUCCESS;
+    }
+    /* Continue to try again after clearing state */
+  } else if (ret != GET_STATUS_ERROR_PROTOCOL) {
+    NLOGE("Failed to inspect device");
     return APP_ERROR_IO;
   }
-  NLOGV("%d: query status 0x%08x  ulen 0x%04x", __LINE__, status, ulen);
 
-  /* It's not idle, but we're the only ones telling it what to do, so it
-   * should be. */
-  if (status != APP_STATUS_IDLE) {
-
-    /* Try clearing the status */
-    NLOGV("clearing previous status");
-    if (clear_status(dev, app_id) != 0) {
-      return APP_ERROR_IO;
-    }
-
-    /* Check again */
-    if (get_status(dev, app_id, &status, &ulen) != 0) {
-      return APP_ERROR_IO;
-    }
-    NLOGV("%d: query status 0x%08x  ulen 0x%04x",__LINE__, status, ulen);
-
-    /* It's ignoring us and is still not ready, so it's broken */
-    if (status != APP_STATUS_IDLE) {
-      NLOGE("Device is not responding");
-      return APP_ERROR_IO;
-    }
+  /* Try clearing the status */
+  NLOGD("Clearing previous status");
+  if (clear_status(ctx) != 0) {
+    NLOGD("Failed to force idle status");
+    return APP_ERROR_IO;
   }
 
-  /* Send args data */
-  command = CMD_ID(app_id) | CMD_TRANSPORT | CMD_IS_DATA;
+  /* Check again */
+  if (get_status(ctx, &status) != GET_STATUS_OK) {
+    NLOGE("Failed to get cleared status");
+    return APP_ERROR_IO;
+  }
+  NLOGD("Cleared status=0x%08x reply_len=%d", status.status, status.reply_len);
+
+  /* It's ignoring us and is still not ready, so it's broken */
+  if (status.status != APP_STATUS_IDLE) {
+    NLOGE("Device is not responding");
+    return APP_ERROR_IO;
+  }
+
+  return APP_SUCCESS;
+}
+
+/*
+ * Split request into datagrams and send command to have app process it.
+ */
+static uint32_t send_command(const struct transport_context *ctx) {
+  const uint8_t *args = ctx->args;
+  uint16_t arg_len = ctx->arg_len;
+
+  NLOGV("Send command data (%d bytes)", arg_len);
+  uint32_t command = CMD_ID(ctx->app_id) | CMD_IS_DATA | CMD_TRANSPORT;
+  /* TODO: don't need to send empty packet in non-legacy protocol */
   do {
     /*
-     * We can't send more than the device can take. For
-     * Citadel using the TPM Wait protocol on SPS, this is
-     * a constant. For other buses, it may not be.
-     *
-     * For each Write, Citadel requires that we send the length of
-     * what we're about to send in the params field.
+     * We can't send more per datagram than the device can accept. For Citadel
+     * using the TPM Wait protocol on SPS, this is a constant. For other buses
+     * it may not be, but this is what we support here. Due to peculiarities of
+     * Citadel's SPS hardware, our protocol requires that we specify the length
+     * of what we're about to send in the params field of each Write.
      */
-    ulen = MIN(arg_len, MAX_DEVICE_TRANSFER);
+    const uint16_t ulen = MIN(arg_len, MAX_DEVICE_TRANSFER);
     CMD_SET_PARAM(command, ulen);
-    if (args && ulen)
-      memcpy(buf, args, ulen);
 
-    NLOGV("Write command 0x%08x, bytes 0x%x", command, ulen);
-
-    if (0 != nos_device_write(dev, command, buf, ulen)) {
+    NLOGD("Write command 0x%08x, bytes %d", command, ulen);
+    if (nos_device_write(ctx->dev, command, args, ulen) != 0) {
       NLOGE("Failed to send datagram to device");
       return APP_ERROR_IO;
     }
 
-    /* Additional data needs the additional flag set */
+    /* Any further Writes needed to send all the args must set the MORE bit */
     command |= CMD_MORE_TO_COME;
-
-    if (args)
-      args += ulen;
-    if (arg_len)
-      arg_len -= ulen;
+    args += ulen;
+    arg_len -= ulen;
   } while (arg_len);
 
-  /* See if we had any errors while sending the args */
-  if (get_status(dev, app_id, &status, &ulen) != 0) {
+  /* Finally, send the "go" command */
+  command = CMD_ID(ctx->app_id) | CMD_PARAM(ctx->params);
+
+  /*
+   * The outgoing crc covers:
+   *
+   *   1. the (16-bit) length of args
+   *   2. the args buffer (if any)
+   *   3. the (16-bit) reply_len_hint
+   *   4. the (32-bit) "go" command
+   */
+  struct transport_command_info command_info = {
+    .version = TRANSPORT_V1,
+    .reply_len_hint = *ctx->reply_len,
+  };
+  arg_len = ctx->arg_len;
+  command_info.crc = crc16(&arg_len, sizeof(arg_len));
+  command_info.crc = crc16_update(ctx->args, ctx->arg_len, command_info.crc);
+  command_info.crc = crc16_update(&command_info.reply_len_hint,
+                                  sizeof(command_info.reply_len_hint),
+                                  command_info.crc);
+  command_info.crc = crc16_update(&command, sizeof(command), command_info.crc);
+
+  /* Tell the app to handle the request while also sending the command_info
+   * which will be ignored by the legacy protocol. */
+  NLOGD("Write command 0x%08x, crc %04x...", command, command_info.crc);
+  if (0 != nos_device_write(ctx->dev, command, &command_info, sizeof(command_info))) {
+    NLOGE("Failed to send command datagram to device");
     return APP_ERROR_IO;
   }
-  NLOGV("%d: query status 0x%08x  ulen 0x%04x", __LINE__, status, ulen);
-  if (status & APP_STATUS_DONE)
-    /* Yep, problems. It should still be idle. */
-    goto reply;
 
-  /* Now tell the app to do whatever */
-  command = CMD_ID(app_id) | CMD_PARAM(params);
-  NLOGV("Write command 0x%08x", command);
-  if (0 != nos_device_write(dev, command, 0, 0)) {
-      NLOGE("Failed to send command datagram to device");
-      return APP_ERROR_IO;
-  }
+  return APP_SUCCESS;
+}
 
-  /* Poll the app status until it's done */
+/*
+ * Keep polling until the app says it is done.
+ */
+uint32_t poll_until_done(const struct transport_context *ctx,
+                         struct transport_status *status) {
+  uint32_t poll_count = 0;
+  NLOGV("Poll the app status until it's done");
   do {
-    if (get_status(dev, app_id, &status, &ulen) != 0) {
+    if (get_status(ctx, status) != GET_STATUS_OK) {
       return APP_ERROR_IO;
     }
-    NLOGD("%d:  poll status 0x%08x  ulen 0x%04x", __LINE__, status, ulen);
     poll_count++;
-  } while (!(status & APP_STATUS_DONE));
-  NLOGV("polled %d times, status 0x%08x  ulen 0x%04x", poll_count,
-        status, ulen);
+    NLOGD("%d:  poll=%d status=0x%08x reply_len=%d", __LINE__,
+          poll_count, status->status, status->reply_len);
+  } while (!(status->status & APP_STATUS_DONE));
 
-reply:
-  /* Read any result only if there's a place with room to put it */
-  if (reply && reply_len && *reply_len) {
-    uint16_t left = MIN(*reply_len, ulen);
-    uint16_t gimme, got;
+  NLOGV("status=0x%08x reply_len=%d...", status->status, status->reply_len);
+  return APP_STATUS_CODE(status->status);
+}
 
-    command = CMD_ID(app_id) | CMD_IS_READ |
-      CMD_TRANSPORT | CMD_IS_DATA;
+/*
+ * Reconstruct the reply data from datagram stream.
+ */
+uint32_t receive_reply(const struct transport_context *ctx,
+                       const struct transport_status *status) {
+  int retries = CRC_RETRY_COUNT;
+  while (retries--) {
+    NLOGV("Read the reply data (%d bytes)", status->reply_len);
 
-    got = 0 ;
+    uint32_t command = CMD_ID(ctx->app_id) | CMD_IS_READ | CMD_TRANSPORT | CMD_IS_DATA;
+    uint8_t *reply = ctx->reply;
+    uint16_t left = MIN(*ctx->reply_len, status->reply_len);
+    uint16_t got = 0;
+    uint16_t crc = 0;
     while (left) {
-
-      /*
-       * We can't read more than the device can send. For
-       * Citadel using the TPM Wait protocol on SPS, this is
-       * a constant. For other buses, it may not be.
-       */
-      gimme = MIN(left, MAX_DEVICE_TRANSFER);
-      NLOGV("Read command 0x%08x, bytes 0x%x", command, gimme);
-      if (0 != nos_device_read(dev, command, buf, gimme)) {
+      /* We can't read more per datagram than the device can send */
+      const uint16_t gimme = MIN(left, MAX_DEVICE_TRANSFER);
+      NLOGD("Read command=0x%08x, bytes=%d", command, gimme);
+      if (nos_device_read(ctx->dev, command, reply, gimme) != 0) {
         NLOGE("Failed to receive datagram from device");
         return APP_ERROR_IO;
       }
 
-      memcpy(reply, buf, gimme);
+      /* Any further Reads should set the MORE bit. This only works if Nugget
+       * OS sends back CRCs, but that's the only time we'd retry anyway. */
+      command |= CMD_MORE_TO_COME;
+
+      crc = crc16_update(reply, gimme, crc);
       reply += gimme;
       left -= gimme;
       got += gimme;
     }
     /* got it all */
-    *reply_len = got;
+    *ctx->reply_len = got;
+
+    /* Legacy protocol doesn't support CRC so hopefully it's ok */
+    if (status->version == TRANSPORT_LEGACY) return APP_SUCCESS;
+
+    if (crc == status->reply_crc) return APP_SUCCESS;
+    NLOGE("Reply CRC mismatch: theirs=%04x ours=%04x", status->reply_crc, crc);
   }
 
-  /* Clear the reply manually for the next caller */
-  command = CMD_ID(app_id) | CMD_TRANSPORT;
-  if (0 != nos_device_write(dev, command, 0, 0)) {
-    NLOGE("Failed to clear the reply");
+  NLOGE("Unable to get valid checksum on reply data");
+  return APP_ERROR_IO;
+}
+
+/*
+ * Driver for the master of the transport protocol.
+ */
+uint32_t nos_call_application(const struct nos_device *dev,
+                              uint8_t app_id, uint16_t params,
+                              const uint8_t *args, uint32_t arg_len,
+                              uint8_t *reply, uint32_t *reply_len)
+{
+  uint32_t res;
+  const struct transport_context ctx = {
+    .dev = dev,
+    .app_id = app_id,
+    .params = params,
+    .args = args,
+    .arg_len = arg_len,
+    .reply = reply,
+    .reply_len = reply_len,
+  };
+
+  if ((ctx.arg_len && !ctx.args) ||
+      (!ctx.reply_len) ||
+      (*ctx.reply_len && !ctx.reply)) {
+    NLOGE("Invalid args to %s()", __func__);
     return APP_ERROR_IO;
   }
 
-  return APP_STATUS_CODE(status);
+  NLOGV("Calling app %d with params 0x%04x", app_id, params);
+
+  struct transport_status status;
+  int retries = CRC_RETRY_COUNT;
+  do {
+    /* Wake up and wait for Citadel to be ready */
+    res = make_ready(&ctx);
+    if (res) return res;
+
+    /* Tell the app what to do */
+    res = send_command(&ctx);
+    if (res) return res;
+
+    /* Wait until the app has finished */
+    res = poll_until_done(&ctx, &status);
+    if (res == APP_SUCCESS) break;
+    if (res != APP_ERROR_CHECKSUM) return res;
+    NLOGD("Request checksum error: %d", retries);
+  } while (--retries);
+  if (retries == 0) return APP_ERROR_IO;
+
+  /* Get the reply, but only if the app produced data and the caller wants it */
+  if (ctx.reply && ctx.reply_len && *ctx.reply_len && status.reply_len) {
+    res = receive_reply(&ctx, &status);
+    if (res) return res;
+  } else {
+    *reply_len = 0;
+  }
+
+  NLOGV("Clear the reply manually for the next caller");
+  /* This should work, but isn't completely fatal if it doesn't because the
+   * next call will try again. */
+  (void)clear_status(&ctx);
+
+  NLOGV("%s returning 0x%x", __func__, APP_STATUS_CODE(status.status));
+  return APP_STATUS_CODE(status.status);
 }
diff --git a/nugget/include/application.h b/nugget/include/application.h
index 45824b3..d36ca99 100644
--- a/nugget/include/application.h
+++ b/nugget/include/application.h
@@ -199,20 +199,70 @@
  * then performs the requested operation and transititions to a "done" state.
  * The Master will retrieve the application status and any reply data from
  * Nugget OS, after which the application is ready to handle the next command.
- *
+ */
+
+/*
+ * The master can detect whether the slave uses the legacy protocol by
+ * inspecting the status response which will be exactly 6-bytes. The new
+ * protocol has a larger status and the magic value will be incorrect if
+ * it is a legacy status.
+ */
+
+#define TRANSPORT_LEGACY    0x00000000
+#define TRANSPORT_V1        0x00000001
+
+/* Command information for the transport protocol. */
+struct transport_command_info {
+  uint32_t version;          /* version of the protocol */
+  /* Version specific fields may follow */
+  uint16_t reply_len_hint;   /* max that the master will read */
+  uint16_t crc;              /* CRC of some command field */
+} __packed;
+
+/* Status response from the legacy transport protocol. It can be detected by
+ * its length as newer version of the protocol adds more information. */
+struct transport_legacy_status {
+  uint32_t status;         /* status of the app */
+  uint16_t reply_len;      /* length of available response data */
+} __packed;
+
+/* The magic straddles the end of the legacy status so the last 2 bytes cannot
+ * equal 0xDFDF as that is what will be returned when there is no data. */
+#define TRANSPORT_STATUS_MAGIC 0x157A7051
+
+struct transport_status {
+  uint32_t version;        /* version of the protocol */
+  uint32_t magic;          /* differentiate from the legacy protocol */
+  /* Version specific fields may follow */
+  uint32_t status;         /* status of the app */
+  uint16_t reply_len;      /* length of available response data */
+  uint16_t reply_crc;      /* CRC of the reply data */
+  uint16_t crc;            /* CRC of this struct with crc set to 0 */
+} __packed;
+
+/* Pre-calculated CRCs for different status responses set by in the interrupt
+ * context where the CRC would otherwise not be calculated. */
+#define STATUS_CRC_FOR_IDLE              0xd780
+#define STATUS_CRC_FOR_ERROR_IO          0x5d80
+#define STATUS_CRC_FOR_ERROR_TOO_MUCH    0x5680
+
+/*
  * Applications that wish to use this transport API will need to declare a
  * private struct app_transport which Nugget OS can use to maintain the state:
  */
-
 struct app_transport {
-  uint32_t command;                           /* from master */
-  volatile uint32_t status;                   /* current application status */
-  uint8_t *request, *response;                /* input/output data buffer */
-  uint16_t max_request_len, max_response_len; /* data buffer sizes */
-  uint16_t request_len, response_len;         /* current buffer count */
-  uint16_t request_idx, response_idx;         /* used internally */
   void (*done_fn)(struct app_transport *);    /* optional cleanup function */
   /* Note: Any done_fn() is called in interrupt context. Be quick. */
+  uint8_t *request, *response;                /* input/output data buffer */
+  uint16_t max_request_len, max_response_len; /* data buffer sizes */
+  /* The following are used for the incoming command. */
+  uint32_t command;                           /* from master */
+  struct transport_command_info command_info; /* info about the command */
+  uint16_t request_len;                       /* command data buffer size */
+  uint16_t request_idx;                       /* current index into request */
+  uint16_t response_idx;                      /* current index into response */
+  struct transport_status status[2];          /* current transport_status */
+  volatile uint8_t status_idx;                /* index of active status */
 };
 
 /*
@@ -224,18 +274,24 @@
  */
 #define __TRANSPORT_ALIGNED__ __attribute__((aligned(8)))
 
-/* For debugging if needed */
-extern void dump_transport_state(const struct app_transport *s);
-
 /*
  * The application will need to provide a write_to_app_fn_t function that will
  * be invoked when a new request is ready to be processed. All command and data
  * parameters will already be present in the app's struct app_transport, so it
  * just needs to awaken the application task to do the work.
  *
+ * When awakened, the application task must check that there were no errors in
+ * the transmission of the request by calling this function. If it returns
+ * true, the task should go back to sleep until the next request arrives.
+ */
+int request_is_invalid(struct app_transport *s);
+/*
  * When processing is finished, the app should call the app_reply() function to
- * return its status code and specify length of any data it has placed into the
- * response buffer, and then it can go back to sleep until its next invocation.
+ * return its status code and specify the length of any data it has placed into
+ * the response buffer, and then it can go back to sleep until its next
+ * invocation. CAUTION: The Master polls for app completion independently, so
+ * it may immediately begin retrieving the results as soon as this function
+ * is called *without* waiting for the Nugget OS app to go to sleep.
  */
 void app_reply(struct app_transport *st, uint32_t status, uint16_t reply_len);
 
@@ -248,6 +304,7 @@
   APP_ERROR_TOO_MUCH,        /* caller sent too much data */
   APP_ERROR_IO,              /* problem sending or receiving data */
   APP_ERROR_RPC,             /* problem during RPC communication */
+  APP_ERROR_CHECKSUM,        /* checksum failed, only used within protocol */
   /* more? */
 
   APP_SPECIFIC_ERROR = 0x20, /* "should be enough for anybody" */
@@ -314,6 +371,7 @@
 
 /* Command flags used internally by Transport API messages */
 #define CMD_TRANSPORT       0x40000000    /* 1=Transport API message */
+/* When CMD_TRANSPORT is set, the following bits have meaning */
 #define CMD_IS_DATA         0x20000000    /* 1=data msg 0=status msg */
 #define CMD_MORE_TO_COME    0x10000000    /* 1=continued 0=new */