external/boringssl: Sync to 7c5728649affe20e2952b11a0aeaf0e7b114aad9.

This includes the following changes:

https://boringssl.googlesource.com/boringssl/+log/68f37b7a3f451aa1ca8c93669c024d01f6270ae8..7c5728649affe20e2952b11a0aeaf0e7b114aad9

This also removes sha256-armv4.S from libcrypto_sources_no_clang; clang
can assemble it now. The other files still need to be there though.

Note this pulls in a fix to a wpa_supplicant regression introduced in
c895d6b1c580258e72e1ed3fcc86d38970ded9e1.

Test: make checkbuild
Test: cts-tradefed run cts -m CtsLibcoreTestCases -a arm64-v8a
Test: cts-tradefed run cts -m CtsLibcoreOkHttpTestCases -a arm64-v8a

Change-Id: Ife1d9ea1c87a0b7b1814b8e3590d6f1eaf721629
diff --git a/src/crypto/bio/bio_test.cc b/src/crypto/bio/bio_test.cc
index 01b93f6..4ae6c6e 100644
--- a/src/crypto/bio/bio_test.cc
+++ b/src/crypto/bio/bio_test.cc
@@ -135,152 +135,6 @@
   return true;
 }
 
-
-// BioReadZeroCopyWrapper is a wrapper around the zero-copy APIs to make
-// testing easier.
-static size_t BioReadZeroCopyWrapper(BIO *bio, uint8_t *data, size_t len) {
-  uint8_t *read_buf;
-  size_t read_buf_offset;
-  size_t available_bytes;
-  size_t len_read = 0;
-
-  do {
-    if (!BIO_zero_copy_get_read_buf(bio, &read_buf, &read_buf_offset,
-                                    &available_bytes)) {
-      return 0;
-    }
-
-    available_bytes = std::min(available_bytes, len - len_read);
-    memmove(data + len_read, read_buf + read_buf_offset, available_bytes);
-
-    BIO_zero_copy_get_read_buf_done(bio, available_bytes);
-
-    len_read += available_bytes;
-  } while (len - len_read > 0 && available_bytes > 0);
-
-  return len_read;
-}
-
-// BioWriteZeroCopyWrapper is a wrapper around the zero-copy APIs to make
-// testing easier.
-static size_t BioWriteZeroCopyWrapper(BIO *bio, const uint8_t *data,
-                                      size_t len) {
-  uint8_t *write_buf;
-  size_t write_buf_offset;
-  size_t available_bytes;
-  size_t len_written = 0;
-
-  do {
-    if (!BIO_zero_copy_get_write_buf(bio, &write_buf, &write_buf_offset,
-                                     &available_bytes)) {
-      return 0;
-    }
-
-    available_bytes = std::min(available_bytes, len - len_written);
-    memmove(write_buf + write_buf_offset, data + len_written, available_bytes);
-
-    BIO_zero_copy_get_write_buf_done(bio, available_bytes);
-
-    len_written += available_bytes;
-  } while (len - len_written > 0 && available_bytes > 0);
-
-  return len_written;
-}
-
-static bool TestZeroCopyBioPairs() {
-  // Test read and write, especially triggering the ring buffer wrap-around.
-  uint8_t bio1_application_send_buffer[1024];
-  uint8_t bio2_application_recv_buffer[1024];
-
-  const size_t kLengths[] = {254, 255, 256, 257, 510, 511, 512, 513};
-
-  // These trigger ring buffer wrap around.
-  const size_t kPartialLengths[] = {0, 1, 2, 3, 128, 255, 256, 257, 511, 512};
-
-  static const size_t kBufferSize = 512;
-
-  srand(1);
-  for (size_t i = 0; i < sizeof(bio1_application_send_buffer); i++) {
-    bio1_application_send_buffer[i] = rand() & 255;
-  }
-
-  // Transfer bytes from bio1_application_send_buffer to
-  // bio2_application_recv_buffer in various ways.
-  for (size_t i = 0; i < OPENSSL_ARRAY_SIZE(kLengths); i++) {
-    for (size_t j = 0; j < OPENSSL_ARRAY_SIZE(kPartialLengths); j++) {
-      size_t total_write = 0;
-      size_t total_read = 0;
-
-      BIO *bio1, *bio2;
-      if (!BIO_new_bio_pair(&bio1, kBufferSize, &bio2, kBufferSize)) {
-        return false;
-      }
-      bssl::UniquePtr<BIO> bio1_scoper(bio1);
-      bssl::UniquePtr<BIO> bio2_scoper(bio2);
-
-      total_write += BioWriteZeroCopyWrapper(
-          bio1, bio1_application_send_buffer, kLengths[i]);
-
-      // This tests interleaved read/write calls. Do a read between zero copy
-      // write calls.
-      uint8_t *write_buf;
-      size_t write_buf_offset;
-      size_t available_bytes;
-      if (!BIO_zero_copy_get_write_buf(bio1, &write_buf, &write_buf_offset,
-                                       &available_bytes)) {
-        return false;
-      }
-
-      // Free kPartialLengths[j] bytes in the beginning of bio1 write buffer.
-      // This enables ring buffer wrap around for the next write.
-      total_read += BIO_read(bio2, bio2_application_recv_buffer + total_read,
-                             kPartialLengths[j]);
-
-      size_t interleaved_write_len = std::min(kPartialLengths[j],
-                                              available_bytes);
-
-      // Write the data for the interleaved write call. If the buffer becomes
-      // empty after a read, the write offset is normally set to 0. Check that
-      // this does not happen for interleaved read/write and that
-      // |write_buf_offset| is still valid.
-      memcpy(write_buf + write_buf_offset,
-             bio1_application_send_buffer + total_write, interleaved_write_len);
-      if (BIO_zero_copy_get_write_buf_done(bio1, interleaved_write_len)) {
-        total_write += interleaved_write_len;
-      }
-
-      // Do another write in case |write_buf_offset| was wrapped.
-      total_write += BioWriteZeroCopyWrapper(
-          bio1, bio1_application_send_buffer + total_write,
-          kPartialLengths[j] - interleaved_write_len);
-
-      // Drain the rest.
-      size_t bytes_left = BIO_pending(bio2);
-      total_read += BioReadZeroCopyWrapper(
-          bio2, bio2_application_recv_buffer + total_read, bytes_left);
-
-      if (total_read != total_write) {
-        fprintf(stderr, "Lengths not equal in round (%u, %u)\n", (unsigned)i,
-                (unsigned)j);
-        return false;
-      }
-      if (total_read > kLengths[i] + kPartialLengths[j]) {
-        fprintf(stderr, "Bad lengths in round (%u, %u)\n", (unsigned)i,
-                (unsigned)j);
-        return false;
-      }
-      if (memcmp(bio1_application_send_buffer, bio2_application_recv_buffer,
-                 total_read) != 0) {
-        fprintf(stderr, "Buffers not equal in round (%u, %u)\n", (unsigned)i,
-                (unsigned)j);
-        return false;
-      }
-    }
-  }
-
-  return true;
-}
-
 static bool TestPrintf() {
   // Test a short output, a very long one, and various sizes around
   // 256 (the size of the buffer) to ensure edge cases are correct.
@@ -409,7 +263,137 @@
   return true;
 }
 
-int main(void) {
+static bool TestPair() {
+  // Run through the tests twice, swapping |bio1| and |bio2|, for symmetry.
+  for (int i = 0; i < 2; i++) {
+    BIO *bio1, *bio2;
+    if (!BIO_new_bio_pair(&bio1, 10, &bio2, 10)) {
+      return false;
+    }
+    bssl::UniquePtr<BIO> free_bio1(bio1), free_bio2(bio2);
+
+    if (i == 1) {
+      std::swap(bio1, bio2);
+    }
+
+    // Check initial states.
+    if (BIO_ctrl_get_write_guarantee(bio1) != 10 ||
+        BIO_ctrl_get_read_request(bio1) != 0) {
+      return false;
+    }
+
+    // Data written in one end may be read out the other.
+    char buf[20];
+    if (BIO_write(bio1, "12345", 5) != 5 ||
+        BIO_ctrl_get_write_guarantee(bio1) != 5 ||
+        BIO_read(bio2, buf, sizeof(buf)) != 5 ||
+        memcmp(buf, "12345", 5) != 0 ||
+        BIO_ctrl_get_write_guarantee(bio1) != 10) {
+      return false;
+    }
+
+    // Attempting to write more than 10 bytes will write partially.
+    if (BIO_write(bio1, "1234567890___", 13) != 10 ||
+        BIO_ctrl_get_write_guarantee(bio1) != 0 ||
+        BIO_write(bio1, "z", 1) != -1 ||
+        !BIO_should_write(bio1) ||
+        BIO_read(bio2, buf, sizeof(buf)) != 10 ||
+        memcmp(buf, "1234567890", 10) != 0 ||
+        BIO_ctrl_get_write_guarantee(bio1) != 10) {
+      return false;
+    }
+
+    // Unsuccessful reads update the read request.
+    if (BIO_read(bio2, buf, 5) != -1 ||
+        !BIO_should_read(bio2) ||
+        BIO_ctrl_get_read_request(bio1) != 5) {
+      return false;
+    }
+
+    // The read request is clamped to the size of the buffer.
+    if (BIO_read(bio2, buf, 20) != -1 ||
+        !BIO_should_read(bio2) ||
+        BIO_ctrl_get_read_request(bio1) != 10) {
+      return false;
+    }
+
+    // Data may be written and read in chunks.
+    if (BIO_write(bio1, "12345", 5) != 5 ||
+        BIO_ctrl_get_write_guarantee(bio1) != 5 ||
+        BIO_write(bio1, "67890___", 8) != 5 ||
+        BIO_ctrl_get_write_guarantee(bio1) != 0 ||
+        BIO_read(bio2, buf, 3) != 3 ||
+        memcmp(buf, "123", 3) != 0 ||
+        BIO_ctrl_get_write_guarantee(bio1) != 3 ||
+        BIO_read(bio2, buf, sizeof(buf)) != 7 ||
+        memcmp(buf, "4567890", 7) != 0 ||
+        BIO_ctrl_get_write_guarantee(bio1) != 10) {
+      return false;
+    }
+
+    // Successful reads reset the read request.
+    if (BIO_ctrl_get_read_request(bio1) != 0) {
+      return false;
+    }
+
+    // Test writes and reads starting in the middle of the ring buffer and
+    // wrapping to front.
+    if (BIO_write(bio1, "abcdefgh", 8) != 8 ||
+        BIO_ctrl_get_write_guarantee(bio1) != 2 ||
+        BIO_read(bio2, buf, 3) != 3 ||
+        memcmp(buf, "abc", 3) != 0 ||
+        BIO_ctrl_get_write_guarantee(bio1) != 5 ||
+        BIO_write(bio1, "ijklm___", 8) != 5 ||
+        BIO_ctrl_get_write_guarantee(bio1) != 0 ||
+        BIO_read(bio2, buf, sizeof(buf)) != 10 ||
+        memcmp(buf, "defghijklm", 10) != 0 ||
+        BIO_ctrl_get_write_guarantee(bio1) != 10) {
+      return false;
+    }
+
+    // Data may flow from both ends in parallel.
+    if (BIO_write(bio1, "12345", 5) != 5 ||
+        BIO_write(bio2, "67890", 5) != 5 ||
+        BIO_read(bio2, buf, sizeof(buf)) != 5 ||
+        memcmp(buf, "12345", 5) != 0 ||
+        BIO_read(bio1, buf, sizeof(buf)) != 5 ||
+        memcmp(buf, "67890", 5) != 0) {
+      return false;
+    }
+
+    // Closing the write end causes an EOF on the read half, after draining.
+    if (BIO_write(bio1, "12345", 5) != 5 ||
+        !BIO_shutdown_wr(bio1) ||
+        BIO_read(bio2, buf, sizeof(buf)) != 5 ||
+        memcmp(buf, "12345", 5) != 0 ||
+        BIO_read(bio2, buf, sizeof(buf)) != 0) {
+      return false;
+    }
+
+    // A closed write end may not be written to.
+    if (BIO_ctrl_get_write_guarantee(bio1) != 0 ||
+        BIO_write(bio1, "_____", 5) != -1) {
+      return false;
+    }
+
+    uint32_t err = ERR_get_error();
+    if (ERR_GET_LIB(err) != ERR_LIB_BIO ||
+        ERR_GET_REASON(err) != BIO_R_BROKEN_PIPE) {
+      return false;
+    }
+
+    // The other end is still functional.
+    if (BIO_write(bio2, "12345", 5) != 5 ||
+        BIO_read(bio1, buf, sizeof(buf)) != 5 ||
+        memcmp(buf, "12345", 5) != 0) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+int main() {
   CRYPTO_library_init();
 
 #if defined(OPENSSL_WINDOWS)
@@ -429,8 +413,8 @@
 
   if (!TestSocketConnect() ||
       !TestPrintf() ||
-      !TestZeroCopyBioPairs() ||
-      !TestASN1()) {
+      !TestASN1() ||
+      !TestPair()) {
     return 1;
   }
 
diff --git a/src/crypto/bio/pair.c b/src/crypto/bio/pair.c
index 2da2d20..df36343 100644
--- a/src/crypto/bio/pair.c
+++ b/src/crypto/bio/pair.c
@@ -72,12 +72,6 @@
   size_t offset; /* valid iff buf != NULL; 0 if len == 0 */
   size_t size;
   uint8_t *buf; /* "size" elements (if != NULL) */
-  char buf_externally_allocated; /* true iff buf was externally allocated. */
-
-  char zero_copy_read_lock;  /* true iff a zero copy read operation
-                              * is in progress. */
-  char zero_copy_write_lock; /* true iff a zero copy write operation
-                              * is in progress. */
 
   size_t request; /* valid iff peer != NULL; 0 if len != 0,
                    * otherwise set by peer to number of bytes
@@ -145,263 +139,12 @@
     bio_destroy_pair(bio);
   }
 
-  if (!b->buf_externally_allocated) {
-    OPENSSL_free(b->buf);
-  }
-
+  OPENSSL_free(b->buf);
   OPENSSL_free(b);
 
   return 1;
 }
 
-static size_t bio_zero_copy_get_read_buf(struct bio_bio_st* peer_b,
-                                         uint8_t** out_read_buf,
-                                         size_t* out_buf_offset) {
-  size_t max_available;
-  if (peer_b->len > peer_b->size - peer_b->offset) {
-    /* Only the first half of the ring buffer can be read. */
-    max_available = peer_b->size - peer_b->offset;
-  } else {
-    max_available = peer_b->len;
-  }
-
-  *out_read_buf = peer_b->buf;
-  *out_buf_offset = peer_b->offset;
-  return max_available;
-}
-
-int BIO_zero_copy_get_read_buf(BIO* bio, uint8_t** out_read_buf,
-                               size_t* out_buf_offset,
-                               size_t* out_available_bytes) {
-  struct bio_bio_st* b;
-  struct bio_bio_st* peer_b;
-  size_t max_available;
-  *out_available_bytes = 0;
-
-  BIO_clear_retry_flags(bio);
-
-  if (!bio->init) {
-    OPENSSL_PUT_ERROR(BIO, BIO_R_UNINITIALIZED);
-    return 0;
-  }
-
-  b = bio->ptr;
-
-  if (!b || !b->peer) {
-    OPENSSL_PUT_ERROR(BIO, BIO_R_UNSUPPORTED_METHOD);
-    return 0;
-  }
-
-  peer_b = b->peer->ptr;
-  if (!peer_b || !peer_b->peer || peer_b->peer->ptr != b) {
-    OPENSSL_PUT_ERROR(BIO, BIO_R_UNSUPPORTED_METHOD);
-    return 0;
-  }
-
-  if (peer_b->zero_copy_read_lock) {
-    OPENSSL_PUT_ERROR(BIO, BIO_R_INVALID_ARGUMENT);
-    return 0;
-  }
-
-  peer_b->request = 0;  /* Is not used by zero-copy API. */
-
-  max_available =
-      bio_zero_copy_get_read_buf(peer_b, out_read_buf, out_buf_offset);
-
-  assert(peer_b->buf != NULL);
-  if (max_available > 0) {
-    peer_b->zero_copy_read_lock = 1;
-  }
-
-  *out_available_bytes = max_available;
-  return 1;
-}
-
-int BIO_zero_copy_get_read_buf_done(BIO* bio, size_t bytes_read) {
-  struct bio_bio_st* b;
-  struct bio_bio_st* peer_b;
-  size_t max_available;
-  size_t dummy_read_offset;
-  uint8_t* dummy_read_buf;
-
-  assert(BIO_get_retry_flags(bio) == 0);
-
-  if (!bio->init) {
-    OPENSSL_PUT_ERROR(BIO, BIO_R_UNINITIALIZED);
-    return 0;
-  }
-
-  b = bio->ptr;
-
-  if (!b || !b->peer) {
-    OPENSSL_PUT_ERROR(BIO, BIO_R_UNSUPPORTED_METHOD);
-    return 0;
-  }
-
-  peer_b = b->peer->ptr;
-  if (!peer_b || !peer_b->peer || peer_b->peer->ptr != b) {
-    OPENSSL_PUT_ERROR(BIO, BIO_R_UNSUPPORTED_METHOD);
-    return 0;
-  }
-
-  if (!peer_b->zero_copy_read_lock) {
-    OPENSSL_PUT_ERROR(BIO, BIO_R_INVALID_ARGUMENT);
-    return 0;
-  }
-
-  max_available =
-      bio_zero_copy_get_read_buf(peer_b, &dummy_read_buf, &dummy_read_offset);
-  if (bytes_read > max_available) {
-    OPENSSL_PUT_ERROR(BIO, BIO_R_INVALID_ARGUMENT);
-    return 0;
-  }
-
-  assert(peer_b->len >= bytes_read);
-  peer_b->len -= bytes_read;
-  assert(peer_b->offset + bytes_read <= peer_b->size);
-
-  /* Move read offset. If zero_copy_write_lock == 1 we must advance the
-   * offset even if buffer becomes empty, to make sure
-   * write_offset = (offset + len) mod size does not change. */
-  if (peer_b->offset + bytes_read == peer_b->size ||
-      (!peer_b->zero_copy_write_lock && peer_b->len == 0)) {
-    peer_b->offset = 0;
-  } else {
-    peer_b->offset += bytes_read;
-  }
-
-  bio->num_read += bytes_read;
-  peer_b->zero_copy_read_lock = 0;
-  return 1;
-}
-
-static size_t bio_zero_copy_get_write_buf(struct bio_bio_st* b,
-                                          uint8_t** out_write_buf,
-                                          size_t* out_buf_offset) {
-  size_t write_offset;
-  size_t max_available;
-
-  assert(b->len <= b->size);
-
-  write_offset = b->offset + b->len;
-
-  if (write_offset >= b->size) {
-    /* Only the first half of the ring buffer can be written to. */
-    write_offset -= b->size;
-    /* write up to the start of the ring buffer. */
-    max_available = b->offset - write_offset;
-  } else {
-    /* write up to the end the buffer. */
-    max_available = b->size - write_offset;
-  }
-
-  *out_write_buf = b->buf;
-  *out_buf_offset = write_offset;
-  return max_available;
-}
-
-int BIO_zero_copy_get_write_buf(BIO* bio, uint8_t** out_write_buf,
-                                size_t* out_buf_offset,
-                                size_t* out_available_bytes) {
-  struct bio_bio_st* b;
-  struct bio_bio_st* peer_b;
-  size_t max_available;
-
-  *out_available_bytes = 0;
-  BIO_clear_retry_flags(bio);
-
-  if (!bio->init) {
-    OPENSSL_PUT_ERROR(BIO, BIO_R_UNINITIALIZED);
-    return 0;
-  }
-
-  b = bio->ptr;
-
-  if (!b || !b->buf || !b->peer) {
-    OPENSSL_PUT_ERROR(BIO, BIO_R_UNSUPPORTED_METHOD);
-    return 0;
-  }
-  peer_b = b->peer->ptr;
-  if (!peer_b || !peer_b->peer || peer_b->peer->ptr != b) {
-    OPENSSL_PUT_ERROR(BIO, BIO_R_UNSUPPORTED_METHOD);
-    return 0;
-  }
-
-  assert(b->buf != NULL);
-
-  if (b->zero_copy_write_lock) {
-    OPENSSL_PUT_ERROR(BIO, BIO_R_INVALID_ARGUMENT);
-    return 0;
-  }
-
-  b->request = 0;
-  if (b->closed) {
-    /* Bio is already closed. */
-    OPENSSL_PUT_ERROR(BIO, BIO_R_BROKEN_PIPE);
-    return 0;
-  }
-
-  max_available = bio_zero_copy_get_write_buf(b, out_write_buf, out_buf_offset);
-
-  if (max_available > 0) {
-    b->zero_copy_write_lock = 1;
-  }
-
-  *out_available_bytes = max_available;
-  return 1;
-}
-
-int BIO_zero_copy_get_write_buf_done(BIO* bio, size_t bytes_written) {
-  struct bio_bio_st* b;
-  struct bio_bio_st* peer_b;
-
-  size_t rest;
-  size_t dummy_write_offset;
-  uint8_t* dummy_write_buf;
-
-  if (!bio->init) {
-    OPENSSL_PUT_ERROR(BIO, BIO_R_UNINITIALIZED);
-    return 0;
-  }
-
-  b = bio->ptr;
-
-  if (!b || !b->buf || !b->peer) {
-    OPENSSL_PUT_ERROR(BIO, BIO_R_UNSUPPORTED_METHOD);
-    return 0;
-  }
-  peer_b = b->peer->ptr;
-  if (!peer_b || !peer_b->peer || peer_b->peer->ptr != b) {
-    OPENSSL_PUT_ERROR(BIO, BIO_R_UNSUPPORTED_METHOD);
-    return 0;
-  }
-
-  b->request = 0;
-  if (b->closed) {
-    /* BIO is already closed. */
-    OPENSSL_PUT_ERROR(BIO, BIO_R_BROKEN_PIPE);
-    return 0;
-  }
-
-  if (!b->zero_copy_write_lock) {
-    OPENSSL_PUT_ERROR(BIO, BIO_R_INVALID_ARGUMENT);
-    return 0;
-  }
-
-  rest = bio_zero_copy_get_write_buf(b, &dummy_write_buf, &dummy_write_offset);
-
-  if (bytes_written > rest) {
-    OPENSSL_PUT_ERROR(BIO, BIO_R_INVALID_ARGUMENT);
-    return 0;
-  }
-
-  bio->num_write += bytes_written;
-  /* Move write offset. */
-  b->len += bytes_written;
-  b->zero_copy_write_lock = 0;
-  return 1;
-}
-
 static int bio_read(BIO *bio, char *buf, int size_) {
   size_t size = size_;
   size_t rest;
@@ -422,7 +165,7 @@
 
   peer_b->request = 0; /* will be set in "retry_read" situation */
 
-  if (buf == NULL || size == 0 || peer_b->zero_copy_read_lock) {
+  if (buf == NULL || size == 0) {
     return 0;
   }
 
@@ -467,10 +210,7 @@
     memcpy(buf, peer_b->buf + peer_b->offset, chunk);
 
     peer_b->len -= chunk;
-    /* If zero_copy_write_lock == 1 we must advance the offset even if buffer
-     * becomes empty, to make sure write_offset = (offset + len) % size
-     * does not change. */
-    if (peer_b->len || peer_b->zero_copy_write_lock) {
+    if (peer_b->len) {
       peer_b->offset += chunk;
       assert(peer_b->offset <= peer_b->size);
       if (peer_b->offset == peer_b->size) {
@@ -504,10 +244,6 @@
   assert(b->peer != NULL);
   assert(b->buf != NULL);
 
-  if (b->zero_copy_write_lock) {
-    return 0;
-  }
-
   b->request = 0;
   if (b->closed) {
     /* we already closed */
@@ -564,9 +300,8 @@
   return num;
 }
 
-static int bio_make_pair(BIO* bio1, BIO* bio2,
-                         size_t writebuf1_len, uint8_t* ext_writebuf1,
-                         size_t writebuf2_len, uint8_t* ext_writebuf2) {
+static int bio_make_pair(BIO *bio1, BIO *bio2, size_t writebuf1_len,
+                         size_t writebuf2_len) {
   struct bio_bio_st *b1, *b2;
 
   assert(bio1 != NULL);
@@ -580,23 +315,14 @@
     return 0;
   }
 
-  assert(b1->buf_externally_allocated == 0);
-  assert(b2->buf_externally_allocated == 0);
-
   if (b1->buf == NULL) {
     if (writebuf1_len) {
       b1->size = writebuf1_len;
     }
-    if (!ext_writebuf1) {
-      b1->buf_externally_allocated = 0;
-      b1->buf = OPENSSL_malloc(b1->size);
-      if (b1->buf == NULL) {
-        OPENSSL_PUT_ERROR(BIO, ERR_R_MALLOC_FAILURE);
-        return 0;
-      }
-    } else {
-      b1->buf = ext_writebuf1;
-      b1->buf_externally_allocated = 1;
+    b1->buf = OPENSSL_malloc(b1->size);
+    if (b1->buf == NULL) {
+      OPENSSL_PUT_ERROR(BIO, ERR_R_MALLOC_FAILURE);
+      return 0;
     }
     b1->len = 0;
     b1->offset = 0;
@@ -606,16 +332,10 @@
     if (writebuf2_len) {
       b2->size = writebuf2_len;
     }
-    if (!ext_writebuf2) {
-      b2->buf_externally_allocated = 0;
-      b2->buf = OPENSSL_malloc(b2->size);
-      if (b2->buf == NULL) {
-        OPENSSL_PUT_ERROR(BIO, ERR_R_MALLOC_FAILURE);
-        return 0;
-      }
-    } else {
-      b2->buf = ext_writebuf2;
-      b2->buf_externally_allocated = 1;
+    b2->buf = OPENSSL_malloc(b2->size);
+    if (b2->buf == NULL) {
+      OPENSSL_PUT_ERROR(BIO, ERR_R_MALLOC_FAILURE);
+      return 0;
     }
     b2->len = 0;
     b2->offset = 0;
@@ -624,13 +344,9 @@
   b1->peer = bio2;
   b1->closed = 0;
   b1->request = 0;
-  b1->zero_copy_read_lock = 0;
-  b1->zero_copy_write_lock = 0;
   b2->peer = bio1;
   b2->closed = 0;
   b2->request = 0;
-  b2->zero_copy_read_lock = 0;
-  b2->zero_copy_write_lock = 0;
 
   bio1->init = 1;
   bio2->init = 1;
@@ -744,50 +460,22 @@
 
 static const BIO_METHOD *bio_s_bio(void) { return &methods_biop; }
 
-int BIO_new_bio_pair(BIO** bio1_p, size_t writebuf1,
-                     BIO** bio2_p, size_t writebuf2) {
-  return BIO_new_bio_pair_external_buf(bio1_p, writebuf1, NULL, bio2_p,
-                                       writebuf2, NULL);
-}
-
-int BIO_new_bio_pair_external_buf(BIO** bio1_p, size_t writebuf1_len,
-                                  uint8_t* ext_writebuf1,
-                                  BIO** bio2_p, size_t writebuf2_len,
-                                  uint8_t* ext_writebuf2) {
-  BIO *bio1 = NULL, *bio2 = NULL;
-  int ret = 0;
-
-  /* External buffers must have sizes greater than 0. */
-  if ((ext_writebuf1 && !writebuf1_len) || (ext_writebuf2 && !writebuf2_len)) {
-    goto err;
-  }
-
-  bio1 = BIO_new(bio_s_bio());
-  if (bio1 == NULL) {
-    goto err;
-  }
-  bio2 = BIO_new(bio_s_bio());
-  if (bio2 == NULL) {
-    goto err;
-  }
-
-  if (!bio_make_pair(bio1, bio2, writebuf1_len, ext_writebuf1, writebuf2_len,
-                     ext_writebuf2)) {
-    goto err;
-  }
-  ret = 1;
-
-err:
-  if (ret == 0) {
+int BIO_new_bio_pair(BIO** bio1_p, size_t writebuf1_len,
+                     BIO** bio2_p, size_t writebuf2_len) {
+  BIO *bio1 = BIO_new(bio_s_bio());
+  BIO *bio2 = BIO_new(bio_s_bio());
+  if (bio1 == NULL || bio2 == NULL ||
+      !bio_make_pair(bio1, bio2, writebuf1_len, writebuf2_len)) {
     BIO_free(bio1);
-    bio1 = NULL;
     BIO_free(bio2);
-    bio2 = NULL;
+    *bio1_p = NULL;
+    *bio2_p = NULL;
+    return 0;
   }
 
   *bio1_p = bio1;
   *bio2_p = bio2;
-  return ret;
+  return 1;
 }
 
 size_t BIO_ctrl_get_read_request(BIO *bio) {
diff --git a/src/crypto/bn/bn_test.cc b/src/crypto/bn/bn_test.cc
index 672d83f..044af5f 100644
--- a/src/crypto/bn/bn_test.cc
+++ b/src/crypto/bn/bn_test.cc
@@ -568,21 +568,25 @@
   bssl::UniquePtr<BIGNUM> a = GetBIGNUM(t, "A");
   bssl::UniquePtr<BIGNUM> p = GetBIGNUM(t, "P");
   bssl::UniquePtr<BIGNUM> mod_sqrt = GetBIGNUM(t, "ModSqrt");
-  if (!a || !p || !mod_sqrt) {
+  bssl::UniquePtr<BIGNUM> mod_sqrt2(BN_new());
+  if (!a || !p || !mod_sqrt || !mod_sqrt2 ||
+      // There are two possible answers.
+      !BN_sub(mod_sqrt2.get(), p.get(), mod_sqrt.get())) {
     return false;
   }
 
+  // -0 is 0, not P.
+  if (BN_is_zero(mod_sqrt.get())) {
+    BN_zero(mod_sqrt2.get());
+  }
+
   bssl::UniquePtr<BIGNUM> ret(BN_new());
-  bssl::UniquePtr<BIGNUM> ret2(BN_new());
   if (!ret ||
-      !ret2 ||
-      !BN_mod_sqrt(ret.get(), a.get(), p.get(), ctx) ||
-      // There are two possible answers.
-      !BN_sub(ret2.get(), p.get(), ret.get())) {
+      !BN_mod_sqrt(ret.get(), a.get(), p.get(), ctx)) {
     return false;
   }
 
-  if (BN_cmp(ret2.get(), mod_sqrt.get()) != 0 &&
+  if (BN_cmp(ret.get(), mod_sqrt2.get()) != 0 &&
       !ExpectBIGNUMsEqual(t, "sqrt(A) (mod P)", mod_sqrt.get(), ret.get())) {
     return false;
   }
@@ -590,6 +594,29 @@
   return true;
 }
 
+static bool TestNotModSquare(FileTest *t, BN_CTX *ctx) {
+  bssl::UniquePtr<BIGNUM> not_mod_square = GetBIGNUM(t, "NotModSquare");
+  bssl::UniquePtr<BIGNUM> p = GetBIGNUM(t, "P");
+  bssl::UniquePtr<BIGNUM> ret(BN_new());
+  if (!not_mod_square || !p || !ret) {
+    return false;
+  }
+
+  if (BN_mod_sqrt(ret.get(), not_mod_square.get(), p.get(), ctx)) {
+    t->PrintLine("BN_mod_sqrt unexpectedly succeeded.");
+    return false;
+  }
+
+  uint32_t err = ERR_peek_error();
+  if (ERR_GET_LIB(err) == ERR_LIB_BN &&
+      ERR_GET_REASON(err) == BN_R_NOT_A_SQUARE) {
+    ERR_clear_error();
+    return true;
+  }
+
+  return false;
+}
+
 static bool TestModInv(FileTest *t, BN_CTX *ctx) {
   bssl::UniquePtr<BIGNUM> a = GetBIGNUM(t, "A");
   bssl::UniquePtr<BIGNUM> m = GetBIGNUM(t, "M");
@@ -634,6 +661,7 @@
     {"ModExp", TestModExp},
     {"Exp", TestExp},
     {"ModSqrt", TestModSqrt},
+    {"NotModSquare", TestNotModSquare},
     {"ModInv", TestModInv},
 };
 
diff --git a/src/crypto/bn/bn_tests.txt b/src/crypto/bn/bn_tests.txt
index 692a642..ec89b8e 100644
--- a/src/crypto/bn/bn_tests.txt
+++ b/src/crypto/bn/bn_tests.txt
@@ -10737,6 +10737,28 @@
 A = 2eee37cf06228a387788188e650bc6d8a2ff402931443f69156a29155eca07dcb45f3aac238d92943c0c25c896098716baa433f25bd696a142f5a69d5d937e81
 P = 9df9d6cc20b8540411af4e5357ef2b0353cb1f2ab5ffc3e246b41c32f71e951f
 
+
+# NotModSquare tests.
+#
+# These test vectors are such that NotModSquare is not a square modulo P.
+
+NotModSquare = 03
+P = 07
+
+NotModSquare = 05
+P = 07
+
+NotModSquare = 06
+P = 07
+
+NotModSquare = 9df9d6cc20b8540411af4e5357ef2b0353cb1f2ab5ffc3e246b41c32f71e951e
+P = 9df9d6cc20b8540411af4e5357ef2b0353cb1f2ab5ffc3e246b41c32f71e951f
+
+
+# ModInv tests.
+#
+# These test vectors satisfy ModInv * A = 1 (mod M) and 0 <= ModInv < M.
+
 ModInv = 00
 A = 00
 M = 01
diff --git a/src/crypto/bn/sqrt.c b/src/crypto/bn/sqrt.c
index e3a7b9a..fb962a9 100644
--- a/src/crypto/bn/sqrt.c
+++ b/src/crypto/bn/sqrt.c
@@ -456,7 +456,9 @@
   }
 
   /* We estimate that the square root of an n-bit number is 2^{n/2}. */
-  BN_lshift(estimate, BN_value_one(), BN_num_bits(in)/2);
+  if (!BN_lshift(estimate, BN_value_one(), BN_num_bits(in)/2)) {
+    goto err;
+  }
 
   /* This is Newton's method for finding a root of the equation |estimate|^2 -
    * |in| = 0. */
diff --git a/src/crypto/bytestring/bytestring_test.cc b/src/crypto/bytestring/bytestring_test.cc
index 563c6b0..6ce071a 100644
--- a/src/crypto/bytestring/bytestring_test.cc
+++ b/src/crypto/bytestring/bytestring_test.cc
@@ -28,7 +28,6 @@
 #include "internal.h"
 #include "../internal.h"
 
-namespace bssl {
 
 static bool TestSkip() {
   static const uint8_t kData[] = {1, 2, 3};
@@ -317,7 +316,7 @@
 }
 
 static bool TestCBBFixed() {
-  ScopedCBB cbb;
+  bssl::ScopedCBB cbb;
   uint8_t buf[1];
   uint8_t *out_buf;
   size_t out_size;
@@ -401,7 +400,7 @@
 }
 
 static bool TestCBBDiscardChild() {
-  ScopedCBB cbb;
+  bssl::ScopedCBB cbb;
   CBB contents, inner_contents, inner_inner_contents;
 
   if (!CBB_init(cbb.get(), 0) ||
@@ -804,7 +803,7 @@
   uint8_t buf[10];
   uint8_t *ptr;
   size_t len;
-  ScopedCBB cbb;
+  bssl::ScopedCBB cbb;
   if (!CBB_init_fixed(cbb.get(), buf, sizeof(buf)) ||
       // Too large.
       CBB_reserve(cbb.get(), &ptr, 11)) {
@@ -827,7 +826,7 @@
 
 static bool TestStickyError() {
   // Write an input that exceeds the limit for its length prefix.
-  ScopedCBB cbb;
+  bssl::ScopedCBB cbb;
   CBB child;
   static const uint8_t kZeros[256] = {0};
   if (!CBB_init(cbb.get(), 0) ||
@@ -890,7 +889,7 @@
   return true;
 }
 
-static int Main() {
+int main() {
   CRYPTO_library_init();
 
   if (!TestSkip() ||
@@ -918,9 +917,3 @@
   printf("PASS\n");
   return 0;
 }
-
-}  // namespace bssl
-
-int main() {
-  return bssl::Main();
-}
diff --git a/src/crypto/cipher/aead_test.cc b/src/crypto/cipher/aead_test.cc
index b33a36d..313f041 100644
--- a/src/crypto/cipher/aead_test.cc
+++ b/src/crypto/cipher/aead_test.cc
@@ -23,7 +23,15 @@
 
 #include "../test/file_test.h"
 
-namespace bssl {
+
+#if defined(OPENSSL_SMALL)
+const EVP_AEAD* EVP_aead_aes_128_gcm_siv(void) {
+  return nullptr;
+}
+const EVP_AEAD* EVP_aead_aes_256_gcm_siv(void) {
+  return nullptr;
+}
+#endif
 
 // This program tests an AEAD against a series of test vectors from a file,
 // using the FileTest format. As an example, here's a valid test case:
@@ -48,7 +56,7 @@
     return false;
   }
 
-  ScopedEVP_AEAD_CTX ctx;
+  bssl::ScopedEVP_AEAD_CTX ctx;
   if (!EVP_AEAD_CTX_init_with_direction(ctx.get(), aead, key.data(), key.size(),
                                         tag.size(), evp_aead_seal)) {
     t->PrintLine("Failed to init AEAD.");
@@ -198,7 +206,7 @@
   const size_t max_overhead = EVP_AEAD_max_overhead(aead);
 
   std::vector<uint8_t> key(key_len, 'a');
-  ScopedEVP_AEAD_CTX ctx;
+  bssl::ScopedEVP_AEAD_CTX ctx;
   if (!EVP_AEAD_CTX_init(ctx.get(), aead, key.data(), key_len,
                          EVP_AEAD_DEFAULT_TAG_LENGTH, nullptr)) {
     return false;
@@ -302,6 +310,8 @@
 static const struct KnownAEAD kAEADs[] = {
   { "aes-128-gcm", EVP_aead_aes_128_gcm, false },
   { "aes-256-gcm", EVP_aead_aes_256_gcm, false },
+  { "aes-128-gcm-siv", EVP_aead_aes_128_gcm_siv, false },
+  { "aes-256-gcm-siv", EVP_aead_aes_256_gcm_siv, false },
   { "chacha20-poly1305", EVP_aead_chacha20_poly1305, false },
   { "chacha20-poly1305-old", EVP_aead_chacha20_poly1305_old, false },
   { "aes-128-cbc-sha1-tls", EVP_aead_aes_128_cbc_sha1_tls, true },
@@ -321,7 +331,7 @@
   { "", NULL, false },
 };
 
-static int Main(int argc, char **argv) {
+int main(int argc, char **argv) {
   CRYPTO_library_init();
 
   if (argc != 3) {
@@ -342,6 +352,11 @@
   }
 
   const EVP_AEAD *const aead = known_aead->func();
+  if (aead == NULL) {
+    // AEAD is not compiled in this configuration.
+    printf("PASS\n");
+    return 0;
+  }
 
   if (!TestCleanupAfterInitFailure(aead)) {
     return 1;
@@ -354,9 +369,3 @@
 
   return FileTestMain(TestAEAD, const_cast<EVP_AEAD*>(aead), argv[2]);
 }
-
-}  // namespace bssl
-
-int main(int argc, char **argv) {
-  return bssl::Main(argc, argv);
-}
diff --git a/src/crypto/cipher/cipher_test.cc b/src/crypto/cipher/cipher_test.cc
index cb42fc5..09802c2 100644
--- a/src/crypto/cipher/cipher_test.cc
+++ b/src/crypto/cipher/cipher_test.cc
@@ -63,7 +63,6 @@
 
 #include "../test/file_test.h"
 
-namespace bssl {
 
 static const EVP_CIPHER *GetCipher(const std::string &name) {
   if (name == "DES-CBC") {
@@ -127,7 +126,7 @@
 
   bool is_aead = EVP_CIPHER_mode(cipher) == EVP_CIPH_GCM_MODE;
 
-  ScopedEVP_CIPHER_CTX ctx;
+  bssl::ScopedEVP_CIPHER_CTX ctx;
   if (!EVP_CipherInit_ex(ctx.get(), cipher, nullptr, nullptr, nullptr,
                          encrypt ? 1 : 0)) {
     return false;
@@ -284,7 +283,7 @@
   return true;
 }
 
-static int Main(int argc, char **argv) {
+int main(int argc, char **argv) {
   CRYPTO_library_init();
 
   if (argc != 2) {
@@ -294,9 +293,3 @@
 
   return FileTestMain(TestCipher, nullptr, argv[1]);
 }
-
-}  // namespace bssl
-
-int main(int argc, char **argv) {
-  return bssl::Main(argc, argv);
-}
diff --git a/src/crypto/cipher/e_aes.c b/src/crypto/cipher/e_aes.c
index 9225d6a..f99022f 100644
--- a/src/crypto/cipher/e_aes.c
+++ b/src/crypto/cipher/e_aes.c
@@ -1446,6 +1446,305 @@
   return &aead_aes_256_ctr_hmac_sha256;
 }
 
+#if !defined(OPENSSL_SMALL)
+
+#define EVP_AEAD_AES_GCM_SIV_TAG_LEN 16
+
+struct aead_aes_gcm_siv_ctx {
+  union {
+    double align;
+    AES_KEY ks;
+  } ks;
+  block128_f kgk_block;
+  unsigned is_256:1;
+};
+
+static int aead_aes_gcm_siv_init(EVP_AEAD_CTX *ctx, const uint8_t *key,
+                                 size_t key_len, size_t tag_len) {
+  const size_t key_bits = key_len * 8;
+
+  if (key_bits != 128 && key_bits != 256) {
+    OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BAD_KEY_LENGTH);
+    return 0; /* EVP_AEAD_CTX_init should catch this. */
+  }
+
+  if (tag_len == EVP_AEAD_DEFAULT_TAG_LENGTH) {
+    tag_len = EVP_AEAD_AES_GCM_SIV_TAG_LEN;
+  }
+
+  if (tag_len != EVP_AEAD_AES_GCM_SIV_TAG_LEN) {
+    OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_TAG_TOO_LARGE);
+    return 0;
+  }
+
+  struct aead_aes_gcm_siv_ctx *gcm_siv_ctx =
+      OPENSSL_malloc(sizeof(struct aead_aes_gcm_siv_ctx));
+  if (gcm_siv_ctx == NULL) {
+    return 0;
+  }
+  memset(gcm_siv_ctx, 0, sizeof(struct aead_aes_gcm_siv_ctx));
+
+  if (aesni_capable()) {
+    aesni_set_encrypt_key(key, key_len * 8, &gcm_siv_ctx->ks.ks);
+    gcm_siv_ctx->kgk_block = (block128_f)aesni_encrypt;
+  } else if (hwaes_capable()) {
+    aes_hw_set_encrypt_key(key, key_len * 8, &gcm_siv_ctx->ks.ks);
+    gcm_siv_ctx->kgk_block = (block128_f)aes_hw_encrypt;
+  } else if (vpaes_capable()) {
+    vpaes_set_encrypt_key(key, key_len * 8, &gcm_siv_ctx->ks.ks);
+    gcm_siv_ctx->kgk_block = (block128_f)vpaes_encrypt;
+  } else {
+    AES_set_encrypt_key(key, key_len * 8, &gcm_siv_ctx->ks.ks);
+    gcm_siv_ctx->kgk_block = (block128_f)AES_encrypt;
+  }
+
+  gcm_siv_ctx->is_256 = (key_len == 32);
+  ctx->aead_state = gcm_siv_ctx;
+
+  return 1;
+}
+
+static void aead_aes_gcm_siv_cleanup(EVP_AEAD_CTX *ctx) {
+  struct aead_aes_gcm_siv_ctx *gcm_siv_ctx = ctx->aead_state;
+  OPENSSL_cleanse(gcm_siv_ctx, sizeof(struct aead_aes_gcm_siv_ctx));
+  OPENSSL_free(gcm_siv_ctx);
+}
+
+/* gcm_siv_crypt encrypts (or decrypts—it's the same thing) |in_len| bytes from
+ * |in| to |out|, using the block function |enc_block| with |key| in counter
+ * mode, starting at |initial_counter|. This differs from the traditional
+ * counter mode code in that the counter is handled little-endian, only the
+ * first four bytes are used and the GCM-SIV tweak to the final byte is
+ * applied. The |in| and |out| pointers may be equal but otherwise must not
+ * alias. */
+static void gcm_siv_crypt(uint8_t *out, const uint8_t *in, size_t in_len,
+                          const uint8_t initial_counter[AES_BLOCK_SIZE],
+                          block128_f enc_block, const AES_KEY *key) {
+  union {
+    uint32_t w[4];
+    uint8_t c[16];
+  } counter;
+
+  memcpy(counter.c, initial_counter, AES_BLOCK_SIZE);
+  counter.c[15] |= 0x80;
+
+  for (size_t done = 0; done < in_len;) {
+    uint8_t keystream[AES_BLOCK_SIZE];
+    enc_block(counter.c, keystream, key);
+    counter.w[0]++;
+
+    size_t todo = AES_BLOCK_SIZE;
+    if (in_len - done < todo) {
+      todo = in_len - done;
+    }
+
+    for (size_t i = 0; i < todo; i++) {
+      out[done + i] = keystream[i] ^ in[done + i];
+    }
+
+    done += todo;
+  }
+}
+
+/* gcm_siv_polyval evaluates POLYVAL at |auth_key| on the given plaintext and
+ * AD. The result is written to |out_tag|. */
+static void gcm_siv_polyval(uint8_t out_tag[16], const uint8_t *in,
+                            size_t in_len, const uint8_t *ad, size_t ad_len,
+                            const uint8_t auth_key[16]) {
+  struct polyval_ctx polyval_ctx;
+  CRYPTO_POLYVAL_init(&polyval_ctx, auth_key);
+
+  CRYPTO_POLYVAL_update_blocks(&polyval_ctx, ad, ad_len & ~15);
+
+  uint8_t scratch[16];
+  if (ad_len & 15) {
+    memset(scratch, 0, sizeof(scratch));
+    memcpy(scratch, &ad[ad_len & ~15], ad_len & 15);
+    CRYPTO_POLYVAL_update_blocks(&polyval_ctx, scratch, sizeof(scratch));
+  }
+
+  CRYPTO_POLYVAL_update_blocks(&polyval_ctx, in, in_len & ~15);
+  if (in_len & 15) {
+    memset(scratch, 0, sizeof(scratch));
+    memcpy(scratch, &in[in_len & ~15], in_len & 15);
+    CRYPTO_POLYVAL_update_blocks(&polyval_ctx, scratch, sizeof(scratch));
+  }
+
+  union {
+    uint8_t c[16];
+    struct {
+      uint64_t ad;
+      uint64_t in;
+    } bitlens;
+  } length_block;
+
+  length_block.bitlens.ad = ad_len * 8;
+  length_block.bitlens.in = in_len * 8;
+  CRYPTO_POLYVAL_update_blocks(&polyval_ctx, length_block.c,
+                               sizeof(length_block));
+
+  CRYPTO_POLYVAL_finish(&polyval_ctx, out_tag);
+  out_tag[15] &= 0x7f;
+}
+
+/* gcm_siv_record_keys contains the keys used for a specific GCM-SIV record. */
+struct gcm_siv_record_keys {
+  uint8_t auth_key[16];
+  union {
+    double align;
+    AES_KEY ks;
+  } enc_key;
+  block128_f enc_block;
+};
+
+/* gcm_siv_keys calculates the keys for a specific GCM-SIV record with the
+ * given nonce and writes them to |*out_keys|. */
+static void gcm_siv_keys(
+    const struct aead_aes_gcm_siv_ctx *gcm_siv_ctx,
+    struct gcm_siv_record_keys *out_keys,
+    const uint8_t nonce[EVP_AEAD_AES_GCM_SIV_TAG_LEN]) {
+  const AES_KEY *const key = &gcm_siv_ctx->ks.ks;
+  gcm_siv_ctx->kgk_block(nonce, out_keys->auth_key, key);
+
+  if (gcm_siv_ctx->is_256) {
+    uint8_t record_enc_key[32];
+    gcm_siv_ctx->kgk_block(out_keys->auth_key, record_enc_key + 16, key);
+    gcm_siv_ctx->kgk_block(record_enc_key + 16, record_enc_key, key);
+    aes_ctr_set_key(&out_keys->enc_key.ks, NULL, &out_keys->enc_block,
+                    record_enc_key, sizeof(record_enc_key));
+  } else {
+    uint8_t record_enc_key[16];
+    gcm_siv_ctx->kgk_block(out_keys->auth_key, record_enc_key, key);
+    aes_ctr_set_key(&out_keys->enc_key.ks, NULL, &out_keys->enc_block,
+                    record_enc_key, sizeof(record_enc_key));
+  }
+}
+
+static int aead_aes_gcm_siv_seal(const EVP_AEAD_CTX *ctx, uint8_t *out,
+                                 size_t *out_len, size_t max_out_len,
+                                 const uint8_t *nonce, size_t nonce_len,
+                                 const uint8_t *in, size_t in_len,
+                                 const uint8_t *ad, size_t ad_len) {
+  const struct aead_aes_gcm_siv_ctx *gcm_siv_ctx = ctx->aead_state;
+  const uint64_t in_len_64 = in_len;
+  const uint64_t ad_len_64 = ad_len;
+
+  if (in_len + EVP_AEAD_AES_GCM_SIV_TAG_LEN < in_len ||
+      in_len_64 > (UINT64_C(1) << 36) ||
+      ad_len_64 >= (UINT64_C(1) << 61)) {
+    OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_TOO_LARGE);
+    return 0;
+  }
+
+  if (max_out_len < in_len + EVP_AEAD_AES_GCM_SIV_TAG_LEN) {
+    OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BUFFER_TOO_SMALL);
+    return 0;
+  }
+
+  if (nonce_len != AES_BLOCK_SIZE) {
+    OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_UNSUPPORTED_NONCE_SIZE);
+    return 0;
+  }
+
+  struct gcm_siv_record_keys keys;
+  gcm_siv_keys(gcm_siv_ctx, &keys, nonce);
+
+  uint8_t tag[16];
+  gcm_siv_polyval(tag, in, in_len, ad, ad_len, keys.auth_key);
+  keys.enc_block(tag, tag, &keys.enc_key.ks);
+
+  gcm_siv_crypt(out, in, in_len, tag, keys.enc_block, &keys.enc_key.ks);
+
+  memcpy(&out[in_len], tag, EVP_AEAD_AES_GCM_SIV_TAG_LEN);
+  *out_len = in_len + EVP_AEAD_AES_GCM_SIV_TAG_LEN;
+
+  return 1;
+}
+
+static int aead_aes_gcm_siv_open(const EVP_AEAD_CTX *ctx, uint8_t *out,
+                                 size_t *out_len, size_t max_out_len,
+                                 const uint8_t *nonce, size_t nonce_len,
+                                 const uint8_t *in, size_t in_len,
+                                 const uint8_t *ad, size_t ad_len) {
+  const uint64_t ad_len_64 = ad_len;
+  if (ad_len_64 >= (UINT64_C(1) << 61)) {
+    OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_TOO_LARGE);
+    return 0;
+  }
+
+  const uint64_t in_len_64 = in_len;
+  if (in_len < EVP_AEAD_AES_GCM_SIV_TAG_LEN ||
+      in_len_64 > (UINT64_C(1) << 36) + AES_BLOCK_SIZE) {
+    OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BAD_DECRYPT);
+    return 0;
+  }
+
+  const struct aead_aes_gcm_siv_ctx *gcm_siv_ctx = ctx->aead_state;
+  const size_t plaintext_len = in_len - EVP_AEAD_AES_GCM_SIV_TAG_LEN;
+
+  if (max_out_len < plaintext_len) {
+    OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BUFFER_TOO_SMALL);
+    return 0;
+  }
+
+  struct gcm_siv_record_keys keys;
+  gcm_siv_keys(gcm_siv_ctx, &keys, nonce);
+
+  gcm_siv_crypt(out, in, plaintext_len, &in[plaintext_len], keys.enc_block,
+                &keys.enc_key.ks);
+
+  uint8_t expected_tag[EVP_AEAD_AES_GCM_SIV_TAG_LEN];
+  gcm_siv_polyval(expected_tag, out, plaintext_len, ad, ad_len, keys.auth_key);
+  keys.enc_block(expected_tag, expected_tag, &keys.enc_key.ks);
+
+  if (CRYPTO_memcmp(expected_tag, &in[plaintext_len], sizeof(expected_tag)) !=
+      0) {
+    OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BAD_DECRYPT);
+    return 0;
+  }
+
+  *out_len = plaintext_len;
+  return 1;
+}
+
+static const EVP_AEAD aead_aes_128_gcm_siv = {
+    16,                           /* key length */
+    AES_BLOCK_SIZE,               /* nonce length */
+    EVP_AEAD_AES_GCM_SIV_TAG_LEN, /* overhead */
+    EVP_AEAD_AES_GCM_SIV_TAG_LEN, /* max tag length */
+
+    aead_aes_gcm_siv_init,
+    NULL /* init_with_direction */,
+    aead_aes_gcm_siv_cleanup,
+    aead_aes_gcm_siv_seal,
+    aead_aes_gcm_siv_open,
+    NULL /* get_iv */,
+};
+
+static const EVP_AEAD aead_aes_256_gcm_siv = {
+    32,                           /* key length */
+    AES_BLOCK_SIZE,               /* nonce length */
+    EVP_AEAD_AES_GCM_SIV_TAG_LEN, /* overhead */
+    EVP_AEAD_AES_GCM_SIV_TAG_LEN, /* max tag length */
+
+    aead_aes_gcm_siv_init,
+    NULL /* init_with_direction */,
+    aead_aes_gcm_siv_cleanup,
+    aead_aes_gcm_siv_seal,
+    aead_aes_gcm_siv_open,
+    NULL /* get_iv */,
+};
+
+const EVP_AEAD *EVP_aead_aes_128_gcm_siv(void) {
+  return &aead_aes_128_gcm_siv;
+}
+
+const EVP_AEAD *EVP_aead_aes_256_gcm_siv(void) {
+  return &aead_aes_256_gcm_siv;
+}
+
+#endif  /* !OPENSSL_SMALL */
+
 int EVP_has_aes_hardware(void) {
 #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
   return aesni_capable() && crypto_gcm_clmul_enabled();
diff --git a/src/crypto/cipher/test/aes_128_gcm_siv_tests.txt b/src/crypto/cipher/test/aes_128_gcm_siv_tests.txt
new file mode 100644
index 0000000..a929b59
--- /dev/null
+++ b/src/crypto/cipher/test/aes_128_gcm_siv_tests.txt
@@ -0,0 +1,236 @@
+# This is the example from
+# https://tools.ietf.org/html/draft-irtf-cfrg-gcmsiv-02#section-8
+
+KEY: ee8e1ed9ff2540ae8f2ba9f50bc2f27c
+NONCE: 752abad3e0afb5f434dc4310f71f3d21
+IN: "Hello world"
+AD: "example"
+CT: 810649724764545b3625ff
+TAG: 010a10f4942710781d2948ac0192572f
+
+# Test vectors from
+# https://tools.ietf.org/html/draft-irtf-cfrg-gcmsiv-02#appendix-B
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 
+AD: 
+CT: 
+TAG: cb52de357fad226ae428d0ed5a575496
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 0100000000000000
+AD: 
+CT: 7e139f58002d68ee
+TAG: 715835541f2136f03b6dc80ae0a8ac46
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 010000000000000000000000
+AD: 
+CT: 4a87f0cd26e5d5086e90da02
+TAG: 4dff905e48d512e9c34ae8f3be66ec43
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 01000000000000000000000000000000
+AD: 
+CT: 048ca58c46d2368ce00132389f40b511
+TAG: 971da9aa385283522c4f67a9aedb37e5
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 0100000000000000000000000000000002000000000000000000000000000000
+AD: 
+CT: e1cf1cf545d2743ec005b26bd2c836ac1a4233d646c195ffa401f28063127baa
+TAG: 1071338b8c2930d3ec4c17cecbefa4b4
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 010000000000000000000000000000000200000000000000000000000000000003000000000000000000000000000000
+AD: 
+CT: 2e7e6881a02d57b877794b2fbfbfef5484f1cf74f4ad53a751b2582c0e698466bd9a49dcab53806d8e31d864c4632d00
+TAG: 04b1b8a9c1630ff028b14d2e57bca429
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 01000000000000000000000000000000020000000000000000000000000000000300000000000000000000000000000004000000000000000000000000000000
+AD: 
+CT: 0ac5be860726209d9218de3e9d533743e1efe1595bc58f93f00e9bb9a7558dc1e1b14a9c0d49eb5064c7efa79842f9c7cfdd77614709f0b545d3227498e774d5
+TAG: 860b73a1ed8a5b9acd925c3f3f49c5c5
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 0200000000000000
+AD: 01
+CT: 4919e29e9890e452
+TAG: 1433a5c0284c911163888dbd128e6874
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 020000000000000000000000
+AD: 01
+CT: db55d6da719fe0473538294e
+TAG: 5a8ab948ccd205a70c78e8fdf954693b
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 02000000000000000000000000000000
+AD: 01
+CT: aea3c54272abc1b58ed34a536743f4da
+TAG: da10d98bfe23784cfdfd0af97b6d5b78
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 0200000000000000000000000000000003000000000000000000000000000000
+AD: 01
+CT: aa694c0cfe148100cb5c6e27a77a7ff7b4233d6af251d9faa3d84f7c0d1113f1
+TAG: 778c5b68356a1a6a6f3c14a8f96c35ca
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 020000000000000000000000000000000300000000000000000000000000000004000000000000000000000000000000
+AD: 01
+CT: 9ac909928bcde79c2afa885df9c035c85a9eab136f6f6ea11034456bd306ea3c5dd542f706fffe538b5f139fa9dc622e
+TAG: 26c0c0d146d38787ca0fcbc3f911577a
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 02000000000000000000000000000000030000000000000000000000000000000400000000000000000000000000000005000000000000000000000000000000
+AD: 01
+CT: c56be9d61ecf6a31a6289cddc9b91aaf84cdb53a3913b825d6eb5e157906dfb0a308c6b0b095d6fd1a5b761ca7fa0e39ca92f38ae206eec844c0c4ab0c1c165e
+TAG: a60986309b99431a35dd8c5ebeef8375
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 02000000
+AD: 010000000000000000000000
+CT: 47995b96
+TAG: 16b668094202cadde992e0c16205793c
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 0300000000000000000000000000000004000000
+AD: 010000000000000000000000000000000200
+CT: 8fe25de75089e9f849150e57ab7f7810981cd319
+TAG: 89ca91ebc560709432fe9496746404cc
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 030000000000000000000000000000000400
+AD: 0100000000000000000000000000000002000000
+CT: b26d43ae158316ac37f41579ccf1d461274e
+TAG: 13b7c01d08dd6969d51d1bf0fbbdc4d2
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 
+AD: 
+CT: 
+TAG: cb52de357fad226ae428d0ed5a575496
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 0100000000000000
+AD: 
+CT: 7e139f58002d68ee
+TAG: 715835541f2136f03b6dc80ae0a8ac46
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 010000000000000000000000
+AD: 
+CT: 4a87f0cd26e5d5086e90da02
+TAG: 4dff905e48d512e9c34ae8f3be66ec43
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 01000000000000000000000000000000
+AD: 
+CT: 048ca58c46d2368ce00132389f40b511
+TAG: 971da9aa385283522c4f67a9aedb37e5
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 0100000000000000000000000000000002000000000000000000000000000000
+AD: 
+CT: e1cf1cf545d2743ec005b26bd2c836ac1a4233d646c195ffa401f28063127baa
+TAG: 1071338b8c2930d3ec4c17cecbefa4b4
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 010000000000000000000000000000000200000000000000000000000000000003000000000000000000000000000000
+AD: 
+CT: 2e7e6881a02d57b877794b2fbfbfef5484f1cf74f4ad53a751b2582c0e698466bd9a49dcab53806d8e31d864c4632d00
+TAG: 04b1b8a9c1630ff028b14d2e57bca429
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 01000000000000000000000000000000020000000000000000000000000000000300000000000000000000000000000004000000000000000000000000000000
+AD: 
+CT: 0ac5be860726209d9218de3e9d533743e1efe1595bc58f93f00e9bb9a7558dc1e1b14a9c0d49eb5064c7efa79842f9c7cfdd77614709f0b545d3227498e774d5
+TAG: 860b73a1ed8a5b9acd925c3f3f49c5c5
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 0200000000000000
+AD: 01
+CT: 4919e29e9890e452
+TAG: 1433a5c0284c911163888dbd128e6874
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 020000000000000000000000
+AD: 01
+CT: db55d6da719fe0473538294e
+TAG: 5a8ab948ccd205a70c78e8fdf954693b
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 02000000000000000000000000000000
+AD: 01
+CT: aea3c54272abc1b58ed34a536743f4da
+TAG: da10d98bfe23784cfdfd0af97b6d5b78
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 0200000000000000000000000000000003000000000000000000000000000000
+AD: 01
+CT: aa694c0cfe148100cb5c6e27a77a7ff7b4233d6af251d9faa3d84f7c0d1113f1
+TAG: 778c5b68356a1a6a6f3c14a8f96c35ca
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 020000000000000000000000000000000300000000000000000000000000000004000000000000000000000000000000
+AD: 01
+CT: 9ac909928bcde79c2afa885df9c035c85a9eab136f6f6ea11034456bd306ea3c5dd542f706fffe538b5f139fa9dc622e
+TAG: 26c0c0d146d38787ca0fcbc3f911577a
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 02000000000000000000000000000000030000000000000000000000000000000400000000000000000000000000000005000000000000000000000000000000
+AD: 01
+CT: c56be9d61ecf6a31a6289cddc9b91aaf84cdb53a3913b825d6eb5e157906dfb0a308c6b0b095d6fd1a5b761ca7fa0e39ca92f38ae206eec844c0c4ab0c1c165e
+TAG: a60986309b99431a35dd8c5ebeef8375
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 02000000
+AD: 010000000000000000000000
+CT: 47995b96
+TAG: 16b668094202cadde992e0c16205793c
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 0300000000000000000000000000000004000000
+AD: 010000000000000000000000000000000200
+CT: 8fe25de75089e9f849150e57ab7f7810981cd319
+TAG: 89ca91ebc560709432fe9496746404cc
+
+KEY: 01000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 030000000000000000000000000000000400
+AD: 0100000000000000000000000000000002000000
+CT: b26d43ae158316ac37f41579ccf1d461274e
+TAG: 13b7c01d08dd6969d51d1bf0fbbdc4d2
diff --git a/src/crypto/cipher/test/aes_256_gcm_siv_tests.txt b/src/crypto/cipher/test/aes_256_gcm_siv_tests.txt
new file mode 100644
index 0000000..cd38e23
--- /dev/null
+++ b/src/crypto/cipher/test/aes_256_gcm_siv_tests.txt
@@ -0,0 +1,226 @@
+# Test vectors from
+# https://tools.ietf.org/html/draft-irtf-cfrg-gcmsiv-02#appendix-B
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 
+AD: 
+CT: 
+TAG: eb7ccf36eeff369241379c87cc08e4f0
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 0100000000000000
+AD: 
+CT: ab3f382a6f0fb4c3
+TAG: a0a69e07b73281f5cdfd034f646cfa08
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 010000000000000000000000
+AD: 
+CT: be8d81f033ca23b953da2197
+TAG: cdf3ba70da9c7cbd45f5140ba0cca9f1
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 01000000000000000000000000000000
+AD: 
+CT: 46e05b7116dbe27aaeffe99892194072
+TAG: be19d78991c62130cf97f628c37c3eaa
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 0100000000000000000000000000000002000000000000000000000000000000
+AD: 
+CT: 23ddbe9ef342b03003f56d6b4a2e8aff035c7d7cfd705e1ab4502904254bb67a
+TAG: 16c5944034050657af7c0fec7efbc40f
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 010000000000000000000000000000000200000000000000000000000000000003000000000000000000000000000000
+AD: 
+CT: b104c8945f280e75b52c05c45a63d1872c7f0552b1501968d9913d71207d0433f978f1a3eecdf782016b77e8c9d3ff53
+TAG: abedb4841c20f3b05e61e0fd1fcaf3d0
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 01000000000000000000000000000000020000000000000000000000000000000300000000000000000000000000000004000000000000000000000000000000
+AD: 
+CT: e3f2bd14f4c80c9cea4c90c81f0e4d7eedb87eb19a7c0cf5a5a95cd3e441a71083b1191d115e9a9ff008b93feeb5a86d012a3e0adb89de2d1e3225479022292f
+TAG: 3ced67f5e03bb476a738c1343926dc19
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 0200000000000000
+AD: 01
+CT: 4dca2c16c3b0413c
+TAG: ac9b952c76a6f8b5df315f88126daa1c
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 020000000000000000000000
+AD: 01
+CT: ee0ca9068b5b85dfe115a660
+TAG: 756d6155927271077d790a05390ecb71
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 02000000000000000000000000000000
+AD: 01
+CT: 590edb785c0cb89d19f031fa7e7d4f91
+TAG: ac2c8f711c86dbecc8c7b663c5fbc1ea
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 0200000000000000000000000000000003000000000000000000000000000000
+AD: 01
+CT: dcf2024f5f98d463b82a8673c47dd82159748cac8bcc7c76b8cfa26029cb333c
+TAG: a9b406643e190e602fb104fbb842a1ac
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 020000000000000000000000000000000300000000000000000000000000000004000000000000000000000000000000
+AD: 01
+CT: 79216506b1ddadfe16366e4ec886d10dc9400b995259f74c0091f9b5a6add5680a612130f6c31ab833aa76d9b2be86de
+TAG: 3ddfe9ad2c350980942638d3f954ac6d
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 02000000000000000000000000000000030000000000000000000000000000000400000000000000000000000000000005000000000000000000000000000000
+AD: 01
+CT: 9535eb67240c49f30a0de5a90670813fa615e71fcb4c522ca79d9a33459a22f8c6a56d650bf0b15eecdd706e7689cf6510a281724613fea76b5366b40574b1b9
+TAG: abcb59ee31d25ee8889b70d7c36f9a41
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 02000000
+AD: 010000000000000000000000
+CT: 9611baa0
+TAG: 53daf2bc5916f7a6750f2432068dabee
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 0300000000000000000000000000000004000000
+AD: 010000000000000000000000000000000200
+CT: 78e3a1b54daa6547f775f30c38a45e887aea5c87
+TAG: f65187d8c28adba364d659b627b16431
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 030000000000000000000000000000000400
+AD: 0100000000000000000000000000000002000000
+CT: c6d3d28704bf20067d62e1a3872d40dda44b
+TAG: 6ac0135a4379dbc67967ff55fd4d1f2f
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 
+AD: 
+CT: 
+TAG: eb7ccf36eeff369241379c87cc08e4f0
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 0100000000000000
+AD: 
+CT: ab3f382a6f0fb4c3
+TAG: a0a69e07b73281f5cdfd034f646cfa08
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 010000000000000000000000
+AD: 
+CT: be8d81f033ca23b953da2197
+TAG: cdf3ba70da9c7cbd45f5140ba0cca9f1
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 01000000000000000000000000000000
+AD: 
+CT: 46e05b7116dbe27aaeffe99892194072
+TAG: be19d78991c62130cf97f628c37c3eaa
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 0100000000000000000000000000000002000000000000000000000000000000
+AD: 
+CT: 23ddbe9ef342b03003f56d6b4a2e8aff035c7d7cfd705e1ab4502904254bb67a
+TAG: 16c5944034050657af7c0fec7efbc40f
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 010000000000000000000000000000000200000000000000000000000000000003000000000000000000000000000000
+AD: 
+CT: b104c8945f280e75b52c05c45a63d1872c7f0552b1501968d9913d71207d0433f978f1a3eecdf782016b77e8c9d3ff53
+TAG: abedb4841c20f3b05e61e0fd1fcaf3d0
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 01000000000000000000000000000000020000000000000000000000000000000300000000000000000000000000000004000000000000000000000000000000
+AD: 
+CT: e3f2bd14f4c80c9cea4c90c81f0e4d7eedb87eb19a7c0cf5a5a95cd3e441a71083b1191d115e9a9ff008b93feeb5a86d012a3e0adb89de2d1e3225479022292f
+TAG: 3ced67f5e03bb476a738c1343926dc19
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 0200000000000000
+AD: 01
+CT: 4dca2c16c3b0413c
+TAG: ac9b952c76a6f8b5df315f88126daa1c
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 020000000000000000000000
+AD: 01
+CT: ee0ca9068b5b85dfe115a660
+TAG: 756d6155927271077d790a05390ecb71
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 02000000000000000000000000000000
+AD: 01
+CT: 590edb785c0cb89d19f031fa7e7d4f91
+TAG: ac2c8f711c86dbecc8c7b663c5fbc1ea
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 0200000000000000000000000000000003000000000000000000000000000000
+AD: 01
+CT: dcf2024f5f98d463b82a8673c47dd82159748cac8bcc7c76b8cfa26029cb333c
+TAG: a9b406643e190e602fb104fbb842a1ac
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 020000000000000000000000000000000300000000000000000000000000000004000000000000000000000000000000
+AD: 01
+CT: 79216506b1ddadfe16366e4ec886d10dc9400b995259f74c0091f9b5a6add5680a612130f6c31ab833aa76d9b2be86de
+TAG: 3ddfe9ad2c350980942638d3f954ac6d
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 02000000000000000000000000000000030000000000000000000000000000000400000000000000000000000000000005000000000000000000000000000000
+AD: 01
+CT: 9535eb67240c49f30a0de5a90670813fa615e71fcb4c522ca79d9a33459a22f8c6a56d650bf0b15eecdd706e7689cf6510a281724613fea76b5366b40574b1b9
+TAG: abcb59ee31d25ee8889b70d7c36f9a41
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 02000000
+AD: 010000000000000000000000
+CT: 9611baa0
+TAG: 53daf2bc5916f7a6750f2432068dabee
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 0300000000000000000000000000000004000000
+AD: 010000000000000000000000000000000200
+CT: 78e3a1b54daa6547f775f30c38a45e887aea5c87
+TAG: f65187d8c28adba364d659b627b16431
+
+KEY: 0100000000000000000000000000000000000000000000000000000000000000
+NONCE: 03000000000000000000000000000000
+IN: 030000000000000000000000000000000400
+AD: 0100000000000000000000000000000002000000
+CT: c6d3d28704bf20067d62e1a3872d40dda44b
+TAG: 6ac0135a4379dbc67967ff55fd4d1f2f
diff --git a/src/crypto/cipher/tls_cbc.c b/src/crypto/cipher/tls_cbc.c
index dd6ab8c..eb56604 100644
--- a/src/crypto/cipher/tls_cbc.c
+++ b/src/crypto/cipher/tls_cbc.c
@@ -133,107 +133,32 @@
   /* mac_end is the index of |in| just after the end of the MAC. */
   unsigned mac_end = in_len;
   unsigned mac_start = mac_end - md_size;
-  /* scan_start contains the number of bytes that we can ignore because
-   * the MAC's position can only vary by 255 bytes. */
-  unsigned scan_start = 0;
-  unsigned i, j;
-  unsigned rotate_offset;
 
   assert(orig_len >= in_len);
   assert(in_len >= md_size);
   assert(md_size <= EVP_MAX_MD_SIZE);
 
+  /* scan_start contains the number of bytes that we can ignore because
+   * the MAC's position can only vary by 255 bytes. */
+  unsigned scan_start = 0;
   /* This information is public so it's safe to branch based on it. */
   if (orig_len > md_size + 255 + 1) {
     scan_start = orig_len - (md_size + 255 + 1);
   }
 
-  /* Ideally the next statement would be:
-   *
-   *   rotate_offset = (mac_start - scan_start) % md_size;
-   *
-   * However, division is not a constant-time operation (at least on Intel
-   * chips). Thus we enumerate the possible values of md_size and handle each
-   * separately. The value of |md_size| is public information (it's determined
-   * by the cipher suite in the ServerHello) so our timing can vary based on
-   * its value. */
-
-  rotate_offset = mac_start - scan_start;
-  /* rotate_offset can be, at most, 255 (bytes of padding) + 1 (padding length)
-   * + md_size = 256 + 48 (since SHA-384 is the largest hash) = 304. */
-  assert(rotate_offset <= 304);
-
-  /* Below is an SMT-LIB2 verification that the Barrett reductions below are
-   * correct within this range:
-   *
-   * (define-fun barrett (
-   *     (x (_ BitVec 32))
-   *     (mul (_ BitVec 32))
-   *     (shift (_ BitVec 32))
-   *     (divisor (_ BitVec 32)) ) (_ BitVec 32)
-   *   (let ((q (bvsub x (bvmul divisor (bvlshr (bvmul x mul) shift))) ))
-   *     (ite (bvuge q divisor)
-   *       (bvsub q divisor)
-   *       q)))
-   *
-   * (declare-fun x () (_ BitVec 32))
-   *
-   * (assert (or
-   *   (let (
-   *     (divisor (_ bv20 32))
-   *     (mul (_ bv25 32))
-   *     (shift (_ bv9 32))
-   *     (limit (_ bv853 32)))
-   *
-   *     (and (bvule x limit) (not (= (bvurem x divisor)
-   *                                  (barrett x mul shift divisor)))))
-   *
-   *   (let (
-   *     (divisor (_ bv48 32))
-   *     (mul (_ bv10 32))
-   *     (shift (_ bv9 32))
-   *     (limit (_ bv768 32)))
-   *
-   *     (and (bvule x limit) (not (= (bvurem x divisor)
-   *                                  (barrett x mul shift divisor)))))
-   * ))
-   *
-   * (check-sat)
-   * (get-model)
-   */
-
-  if (md_size == 16) {
-    rotate_offset &= 15;
-  } else if (md_size == 20) {
-    /* 1/20 is approximated as 25/512 and then Barrett reduction is used.
-     * Analytically, this is correct for 0 <= rotate_offset <= 853. */
-    unsigned q = (rotate_offset * 25) >> 9;
-    rotate_offset -= q * 20;
-    rotate_offset -=
-        constant_time_select(constant_time_ge(rotate_offset, 20), 20, 0);
-  } else if (md_size == 32) {
-    rotate_offset &= 31;
-  } else if (md_size == 48) {
-    /* 1/48 is approximated as 10/512 and then Barrett reduction is used.
-     * Analytically, this is correct for 0 <= rotate_offset <= 768. */
-    unsigned q = (rotate_offset * 10) >> 9;
-    rotate_offset -= q * 48;
-    rotate_offset -=
-        constant_time_select(constant_time_ge(rotate_offset, 48), 48, 0);
-  } else {
-    /* This should be impossible therefore this path doesn't run in constant
-     * time. */
-    assert(0);
-    rotate_offset = rotate_offset % md_size;
-  }
-
+  unsigned rotate_offset = 0;
+  uint8_t mac_started = 0;
   memset(rotated_mac, 0, md_size);
-  for (i = scan_start, j = 0; i < orig_len; i++) {
-    uint8_t mac_started = constant_time_ge_8(i, mac_start);
+  for (unsigned i = scan_start, j = 0; i < orig_len; i++, j++) {
+    if (j >= md_size) {
+      j -= md_size;
+    }
+    unsigned is_mac_start = constant_time_eq(i, mac_start);
+    mac_started |= is_mac_start;
     uint8_t mac_ended = constant_time_ge_8(i, mac_end);
-    uint8_t b = in[i];
-    rotated_mac[j++] |= b & mac_started & ~mac_ended;
-    j &= constant_time_lt(j, md_size);
+    rotated_mac[j] |= in[i] & mac_started & ~mac_ended;
+    /* Save the offset that |mac_start| is mapped to. */
+    rotate_offset |= j & is_mac_start;
   }
 
   /* Now rotate the MAC. We rotate in log(md_size) steps, one for each bit
@@ -243,7 +168,7 @@
     /* Rotate by |offset| iff the corresponding bit is set in
      * |rotate_offset|, placing the result in |rotated_mac_tmp|. */
     const uint8_t skip_rotate = (rotate_offset & 1) - 1;
-    for (i = 0, j = offset; i < md_size; i++, j++) {
+    for (unsigned i = 0, j = offset; i < md_size; i++, j++) {
       if (j >= md_size) {
         j -= md_size;
       }
diff --git a/src/crypto/dh/dh_test.cc b/src/crypto/dh/dh_test.cc
index 9a3d780..99bb945 100644
--- a/src/crypto/dh/dh_test.cc
+++ b/src/crypto/dh/dh_test.cc
@@ -68,7 +68,6 @@
 #include <openssl/err.h>
 #include <openssl/mem.h>
 
-namespace bssl {
 
 static bool RunBasicTests();
 static bool RunRFC5114Tests();
@@ -76,7 +75,7 @@
 static bool TestASN1();
 static bool TestRFC3526();
 
-static int Main() {
+int main() {
   CRYPTO_library_init();
 
   if (!RunBasicTests() ||
@@ -568,7 +567,7 @@
     return false;
   }
 
-  ScopedCBB cbb;
+  bssl::ScopedCBB cbb;
   uint8_t *der;
   size_t der_len;
   if (!CBB_init(cbb.get(), 0) ||
@@ -661,9 +660,3 @@
 
   return true;
 }
-
-}  // namespace bssl
-
-int main() {
-  return bssl::Main();
-}
diff --git a/src/crypto/digest/digest_test.cc b/src/crypto/digest/digest_test.cc
index ecf0308..0d3f16e 100644
--- a/src/crypto/digest/digest_test.cc
+++ b/src/crypto/digest/digest_test.cc
@@ -28,8 +28,6 @@
 #include "../internal.h"
 
 
-namespace bssl {
-
 struct MD {
   // name is the name of the digest.
   const char* name;
@@ -161,7 +159,7 @@
 }
 
 static int TestDigest(const TestVector *test) {
-  ScopedEVP_MD_CTX ctx;
+  bssl::ScopedEVP_MD_CTX ctx;
 
   // Test the input provided.
   if (!EVP_DigestInit_ex(ctx.get(), test->md.func(), NULL)) {
@@ -246,7 +244,7 @@
   return true;
 }
 
-static int Main() {
+int main() {
   CRYPTO_library_init();
 
   for (size_t i = 0; i < OPENSSL_ARRAY_SIZE(kTestVectors); i++) {
@@ -263,9 +261,3 @@
   printf("PASS\n");
   return 0;
 }
-
-}  // namespace bssl
-
-int main() {
-  return bssl::Main();
-}
diff --git a/src/crypto/ec/oct.c b/src/crypto/ec/oct.c
index bf1957c..4e8272d 100644
--- a/src/crypto/ec/oct.c
+++ b/src/crypto/ec/oct.c
@@ -284,7 +284,7 @@
                                              EC_POINT *point, const BIGNUM *x,
                                              int y_bit, BN_CTX *ctx) {
   if (BN_is_negative(x) || BN_cmp(x, &group->field) >= 0) {
-    OPENSSL_PUT_ERROR(EC, EC_R_INVALID_COMPRESSION_BIT);
+    OPENSSL_PUT_ERROR(EC, EC_R_INVALID_COMPRESSED_POINT);
     return 0;
   }
 
@@ -381,19 +381,7 @@
 
   if (y_bit != BN_is_odd(y)) {
     if (BN_is_zero(y)) {
-      int kron;
-
-      kron = BN_kronecker(x, &group->field, ctx);
-      if (kron == -2) {
-        goto err;
-      }
-
-      if (kron == 1) {
-        OPENSSL_PUT_ERROR(EC, EC_R_INVALID_COMPRESSION_BIT);
-      } else {
-        /* BN_mod_sqrt() should have cought this error (not a square) */
-        OPENSSL_PUT_ERROR(EC, EC_R_INVALID_COMPRESSED_POINT);
-      }
+      OPENSSL_PUT_ERROR(EC, EC_R_INVALID_COMPRESSION_BIT);
       goto err;
     }
     if (!BN_usub(y, &group->field, y)) {
diff --git a/src/crypto/ec/wnaf.c b/src/crypto/ec/wnaf.c
index ba2257c..1594354 100644
--- a/src/crypto/ec/wnaf.c
+++ b/src/crypto/ec/wnaf.c
@@ -90,10 +90,10 @@
  * with the exception that the most significant digit may be only
  * w-1 zeros away from that next non-zero digit.
  */
-static signed char *compute_wNAF(const BIGNUM *scalar, int w, size_t *ret_len) {
+static int8_t *compute_wNAF(const BIGNUM *scalar, int w, size_t *ret_len) {
   int window_val;
   int ok = 0;
-  signed char *r = NULL;
+  int8_t *r = NULL;
   int sign = 1;
   int bit, next_bit, mask;
   size_t len = 0, j;
@@ -109,9 +109,8 @@
     return r;
   }
 
-  if (w <= 0 || w > 7) /* 'signed char' can represent integers with absolute
-                          values less than 2^7 */
-  {
+  /* 'int8_t' can represent integers with absolute values less than 2^7. */
+  if (w <= 0 || w > 7) {
     OPENSSL_PUT_ERROR(EC, ERR_R_INTERNAL_ERROR);
     goto err;
   }
@@ -129,20 +128,18 @@
   }
 
   len = BN_num_bits(scalar);
-  r = OPENSSL_malloc(
-      len +
-      1); /* modified wNAF may be one digit longer than binary representation
-           * (*ret_len will be set to the actual length, i.e. at most
-           * BN_num_bits(scalar) + 1) */
+  /* The modified wNAF may be one digit longer than binary representation
+   * (*ret_len will be set to the actual length, i.e. at most
+   * BN_num_bits(scalar) + 1). */
+  r = OPENSSL_malloc(len + 1);
   if (r == NULL) {
     OPENSSL_PUT_ERROR(EC, ERR_R_MALLOC_FAILURE);
     goto err;
   }
   window_val = scalar->d[0] & mask;
   j = 0;
-  while ((window_val != 0) ||
-         (j + w + 1 < len)) /* if j+w+1 >= len, window_val will not increase */
-  {
+  /* If j+w+1 >= len, window_val will not increase. */
+  while (window_val != 0 || j + w + 1 < len) {
     int digit = 0;
 
     /* 0 <= window_val <= 2^(w+1) */
@@ -174,9 +171,8 @@
 
       window_val -= digit;
 
-      /* now window_val is 0 or 2^(w+1) in standard wNAF generation;
-       * for modified window NAFs, it may also be 2^w
-       */
+      /* Now window_val is 0 or 2^(w+1) in standard wNAF generation;
+       * for modified window NAFs, it may also be 2^w. */
       if (window_val != 0 && window_val != next_bit && window_val != bit) {
         OPENSSL_PUT_ERROR(EC, ERR_R_INTERNAL_ERROR);
         goto err;
@@ -217,12 +213,29 @@
  *       sometimes smaller windows will give better performance
  *       (thus the boundaries should be increased)
  */
-#define EC_window_bits_for_scalar_size(b)                                      \
-  ((size_t)((b) >= 2000 ? 6 : (b) >= 800 ? 5 : (b) >= 300                      \
-                                                   ? 4                         \
-                                                   : (b) >= 70 ? 3 : (b) >= 20 \
-                                                                         ? 2   \
-                                                                         : 1))
+static size_t window_bits_for_scalar_size(size_t b) {
+  if (b >= 2000) {
+    return 6;
+  }
+
+  if (b >= 800) {
+    return 5;
+  }
+
+  if (b >= 300) {
+    return 4;
+  }
+
+  if (b >= 70) {
+    return 3;
+  }
+
+  if (b >= 20) {
+    return 2;
+  }
+
+  return 1;
+}
 
 int ec_wNAF_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *g_scalar,
                 const EC_POINT *p, const BIGNUM *p_scalar, BN_CTX *ctx) {
@@ -235,7 +248,7 @@
   int r_is_inverted = 0;
   int r_is_at_infinity = 1;
   size_t *wsize = NULL;      /* individual window sizes */
-  signed char **wNAF = NULL; /* individual wNAFs */
+  int8_t **wNAF = NULL; /* individual wNAFs */
   size_t *wNAF_len = NULL;
   size_t max_len = 0;
   size_t num_val;
@@ -294,7 +307,7 @@
     size_t bits;
 
     bits = i < num ? BN_num_bits(scalars[i]) : BN_num_bits(g_scalar);
-    wsize[i] = EC_window_bits_for_scalar_size(bits);
+    wsize[i] = window_bits_for_scalar_size(bits);
     num_val += (size_t)1 << (wsize[i] - 1);
     wNAF[i + 1] = NULL; /* make sure we always have a pivot */
     wNAF[i] =
@@ -364,7 +377,7 @@
     }
   }
 
-#if 1 /* optional; EC_window_bits_for_scalar_size assumes we do this step */
+#if 1 /* optional; window_bits_for_scalar_size assumes we do this step */
   if (!EC_POINTs_make_affine(group, num_val, val, ctx)) {
     goto err;
   }
@@ -429,7 +442,7 @@
   OPENSSL_free(wsize);
   OPENSSL_free(wNAF_len);
   if (wNAF != NULL) {
-    signed char **w;
+    int8_t **w;
 
     for (w = wNAF; *w != NULL; w++) {
       OPENSSL_free(*w);
diff --git a/src/crypto/err/ssl.errordata b/src/crypto/err/ssl.errordata
index b50f9ab..e9b2066 100644
--- a/src/crypto/err/ssl.errordata
+++ b/src/crypto/err/ssl.errordata
@@ -109,6 +109,7 @@
 SSL,193,PEER_ERROR_UNSUPPORTED_CERTIFICATE_TYPE
 SSL,267,PRE_SHARED_KEY_MUST_BE_LAST
 SSL,194,PROTOCOL_IS_SHUTDOWN
+SSL,271,PSK_IDENTITY_BINDER_COUNT_MISMATCH
 SSL,195,PSK_IDENTITY_NOT_FOUND
 SSL,196,PSK_NO_CLIENT_CB
 SSL,197,PSK_NO_SERVER_CB
@@ -170,6 +171,7 @@
 SSL,219,TOO_MANY_EMPTY_FRAGMENTS
 SSL,260,TOO_MANY_KEY_UPDATES
 SSL,220,TOO_MANY_WARNING_ALERTS
+SSL,270,TOO_MUCH_SKIPPED_EARLY_DATA
 SSL,221,UNABLE_TO_FIND_ECDH_PARAMETERS
 SSL,222,UNEXPECTED_EXTENSION
 SSL,223,UNEXPECTED_MESSAGE
diff --git a/src/crypto/evp/evp_extra_test.cc b/src/crypto/evp/evp_extra_test.cc
index 755fa83..4d41760 100644
--- a/src/crypto/evp/evp_extra_test.cc
+++ b/src/crypto/evp/evp_extra_test.cc
@@ -27,7 +27,6 @@
 #include <openssl/pkcs8.h>
 #include <openssl/rsa.h>
 
-namespace bssl {
 
 // kExampleRSAKeyDER is an RSA private key in ASN.1, DER format. Of course, you
 // should never use this key anywhere but in an example.
@@ -371,7 +370,7 @@
 
 static bool TestEVP_DigestSignInit(void) {
   bssl::UniquePtr<EVP_PKEY> pkey = LoadExampleRSAKey();
-  ScopedEVP_MD_CTX md_ctx;
+  bssl::ScopedEVP_MD_CTX md_ctx;
   if (!pkey ||
       !EVP_DigestSignInit(md_ctx.get(), NULL, EVP_sha256(), NULL, pkey.get()) ||
       !EVP_DigestSignUpdate(md_ctx.get(), kMsg, sizeof(kMsg))) {
@@ -409,7 +408,7 @@
 
 static bool TestEVP_DigestVerifyInit(void) {
   bssl::UniquePtr<EVP_PKEY> pkey = LoadExampleRSAKey();
-  ScopedEVP_MD_CTX md_ctx;
+  bssl::ScopedEVP_MD_CTX md_ctx;
   if (!pkey ||
       !EVP_DigestVerifyInit(md_ctx.get(), NULL, EVP_sha256(), NULL,
                             pkey.get()) ||
@@ -591,7 +590,7 @@
   if (!empty) {
     return false;
   }
-  ScopedCBB cbb;
+  bssl::ScopedCBB cbb;
   if (EVP_marshal_public_key(cbb.get(), empty.get())) {
     fprintf(stderr, "Marshalled empty public key.\n");
     return false;
@@ -670,7 +669,7 @@
   return true;
 }
 
-static int Main(void) {
+int main() {
   CRYPTO_library_init();
 
   if (!TestEVP_DigestSignInit()) {
@@ -718,9 +717,3 @@
   printf("PASS\n");
   return 0;
 }
-
-}  // namespace bssl
-
-int main() {
-  return bssl::Main();
-}
diff --git a/src/crypto/evp/evp_test.cc b/src/crypto/evp/evp_test.cc
index 68b869a..bfaa38a 100644
--- a/src/crypto/evp/evp_test.cc
+++ b/src/crypto/evp/evp_test.cc
@@ -75,7 +75,6 @@
 
 #include "../test/file_test.h"
 
-namespace bssl {
 
 // evp_test dispatches between multiple test types. PrivateKey tests take a key
 // name parameter and single block, decode it as a PEM private key, and save it
@@ -141,7 +140,7 @@
   }
 
   // The key must re-encode correctly.
-  ScopedCBB cbb;
+  bssl::ScopedCBB cbb;
   uint8_t *der;
   size_t der_len;
   if (!CBB_init(cbb.get(), 0) ||
@@ -253,7 +252,7 @@
   return true;
 }
 
-static int Main(int argc, char *argv[]) {
+int main(int argc, char *argv[]) {
   CRYPTO_library_init();
   if (argc != 2) {
     fprintf(stderr, "%s <test file.txt>\n", argv[0]);
@@ -263,9 +262,3 @@
   KeyMap map;
   return FileTestMain(TestEVP, &map, argv[1]);
 }
-
-}  // namespace bssl
-
-int main(int argc, char *argv[]) {
-  return bssl::Main(argc, argv);
-}
diff --git a/src/crypto/hmac/hmac_test.cc b/src/crypto/hmac/hmac_test.cc
index 60a9581..7b216e2 100644
--- a/src/crypto/hmac/hmac_test.cc
+++ b/src/crypto/hmac/hmac_test.cc
@@ -67,7 +67,6 @@
 
 #include "../test/file_test.h"
 
-namespace bssl {
 
 static const EVP_MD *GetDigest(const std::string &name) {
   if (name == "MD5") {
@@ -117,7 +116,7 @@
   }
 
   // Test using HMAC_CTX.
-  ScopedHMAC_CTX ctx;
+  bssl::ScopedHMAC_CTX ctx;
   if (!HMAC_Init_ex(ctx.get(), key.data(), key.size(), digest, nullptr) ||
       !HMAC_Update(ctx.get(), input.data(), input.size()) ||
       !HMAC_Final(ctx.get(), mac.get(), &mac_len) ||
@@ -158,7 +157,7 @@
   return true;
 }
 
-static int Main(int argc, char *argv[]) {
+int main(int argc, char *argv[]) {
   CRYPTO_library_init();
 
   if (argc != 2) {
@@ -168,9 +167,3 @@
 
   return FileTestMain(TestHMAC, nullptr, argv[1]);
 }
-
-}  // namespace bssl
-
-int main(int argc, char **argv) {
-  return bssl::Main(argc, argv);
-}
diff --git a/src/crypto/modes/CMakeLists.txt b/src/crypto/modes/CMakeLists.txt
index 17faa15..dc9e504 100644
--- a/src/crypto/modes/CMakeLists.txt
+++ b/src/crypto/modes/CMakeLists.txt
@@ -48,10 +48,11 @@
   OBJECT
 
   cbc.c
-  ctr.c
-  ofb.c
   cfb.c
+  ctr.c
   gcm.c
+  ofb.c
+  polyval.c
 
   ${MODES_ARCH_SOURCES}
 )
diff --git a/src/crypto/modes/cbc.c b/src/crypto/modes/cbc.c
index e41f2b4..6e9fe24 100644
--- a/src/crypto/modes/cbc.c
+++ b/src/crypto/modes/cbc.c
@@ -52,10 +52,6 @@
 #include "internal.h"
 
 
-#ifndef STRICT_ALIGNMENT
-#  define STRICT_ALIGNMENT 0
-#endif
-
 void CRYPTO_cbc128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
                            const void *key, uint8_t ivec[16],
                            block128_f block) {
diff --git a/src/crypto/modes/gcm.c b/src/crypto/modes/gcm.c
index eb63aa0..3b793e8 100644
--- a/src/crypto/modes/gcm.c
+++ b/src/crypto/modes/gcm.c
@@ -65,14 +65,6 @@
 #define GHASH_ASM
 #endif
 
-#if defined(BSWAP4) && STRICT_ALIGNMENT == 1
-/* redefine, because alignment is ensured */
-#undef GETU32
-#define GETU32(p) BSWAP4(*(const uint32_t *)(p))
-#undef PUTU32
-#define PUTU32(p, v) *(uint32_t *)(p) = BSWAP4(v)
-#endif
-
 #define PACK(s) ((size_t)(s) << (sizeof(size_t) * 8 - 16))
 #define REDUCE1BIT(V)                                                 \
   do {                                                                \
@@ -121,27 +113,10 @@
   Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
 
 #if defined(GHASH_ASM) && defined(OPENSSL_ARM)
-  /* ARM assembler expects specific dword order in Htable. */
-  {
-    int j;
-    const union {
-      long one;
-      char little;
-    } is_endian = {1};
-
-    if (is_endian.little) {
-      for (j = 0; j < 16; ++j) {
-        V = Htable[j];
-        Htable[j].hi = V.lo;
-        Htable[j].lo = V.hi;
-      }
-    } else {
-      for (j = 0; j < 16; ++j) {
-        V = Htable[j];
-        Htable[j].hi = V.lo << 32 | V.lo >> 32;
-        Htable[j].lo = V.hi << 32 | V.hi >> 32;
-      }
-    }
+  for (int j = 0; j < 16; ++j) {
+    V = Htable[j];
+    Htable[j].hi = V.lo;
+    Htable[j].lo = V.hi;
   }
 #endif
 }
@@ -157,10 +132,6 @@
   u128 Z;
   int cnt = 15;
   size_t rem, nlo, nhi;
-  const union {
-    long one;
-    char little;
-  } is_endian = {1};
 
   nlo = ((const uint8_t *)Xi)[15];
   nhi = nlo >> 4;
@@ -203,26 +174,8 @@
     Z.lo ^= Htable[nlo].lo;
   }
 
-  if (is_endian.little) {
-#ifdef BSWAP8
-    Xi[0] = BSWAP8(Z.hi);
-    Xi[1] = BSWAP8(Z.lo);
-#else
-    uint8_t *p = (uint8_t *)Xi;
-    uint32_t v;
-    v = (uint32_t)(Z.hi >> 32);
-    PUTU32(p, v);
-    v = (uint32_t)(Z.hi);
-    PUTU32(p + 4, v);
-    v = (uint32_t)(Z.lo >> 32);
-    PUTU32(p + 8, v);
-    v = (uint32_t)(Z.lo);
-    PUTU32(p + 12, v);
-#endif
-  } else {
-    Xi[0] = Z.hi;
-    Xi[1] = Z.lo;
-  }
+  Xi[0] = CRYPTO_bswap8(Z.hi);
+  Xi[1] = CRYPTO_bswap8(Z.lo);
 }
 
 /* Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
@@ -230,15 +183,11 @@
  * performance improvement, at least not on x86[_64]. It's here
  * mostly as reference and a placeholder for possible future
  * non-trivial optimization[s]... */
-static void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
-                           size_t len) {
+static void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16],
+                           const uint8_t *inp, size_t len) {
   u128 Z;
   int cnt;
   size_t rem, nlo, nhi;
-  const union {
-    long one;
-    char little;
-  } is_endian = {1};
 
   do {
     cnt = 15;
@@ -285,26 +234,8 @@
       Z.lo ^= Htable[nlo].lo;
     }
 
-    if (is_endian.little) {
-#ifdef BSWAP8
-      Xi[0] = BSWAP8(Z.hi);
-      Xi[1] = BSWAP8(Z.lo);
-#else
-      uint8_t *p = (uint8_t *)Xi;
-      uint32_t v;
-      v = (uint32_t)(Z.hi >> 32);
-      PUTU32(p, v);
-      v = (uint32_t)(Z.hi);
-      PUTU32(p + 4, v);
-      v = (uint32_t)(Z.lo >> 32);
-      PUTU32(p + 8, v);
-      v = (uint32_t)(Z.lo);
-      PUTU32(p + 12, v);
-#endif
-    } else {
-      Xi[0] = Z.hi;
-      Xi[1] = Z.lo;
-    }
+    Xi[0] = CRYPTO_bswap8(Z.hi);
+    Xi[1] = CRYPTO_bswap8(Z.lo);
   } while (inp += 16, len -= 16);
 }
 #else /* GHASH_ASM */
@@ -425,96 +356,88 @@
 #endif
 #endif
 
-void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, const void *key,
-                        block128_f block) {
-  const union {
-    long one;
-    char little;
-  } is_endian = {1};
+void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash,
+                       u128 out_table[16], const uint8_t *gcm_key) {
+  union {
+    uint64_t u[2];
+    uint8_t c[16];
+  } H;
 
-  memset(ctx, 0, sizeof(*ctx));
-  ctx->block = block;
+  memcpy(H.c, gcm_key, 16);
 
-  (*block)(ctx->H.c, ctx->H.c, key);
-
-  if (is_endian.little) {
-/* H is stored in host byte order */
-#ifdef BSWAP8
-    ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
-    ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
-#else
-    uint8_t *p = ctx->H.c;
-    uint64_t hi, lo;
-    hi = (uint64_t)GETU32(p) << 32 | GETU32(p + 4);
-    lo = (uint64_t)GETU32(p + 8) << 32 | GETU32(p + 12);
-    ctx->H.u[0] = hi;
-    ctx->H.u[1] = lo;
-#endif
-  }
+  /* H is stored in host byte order */
+  H.u[0] = CRYPTO_bswap8(H.u[0]);
+  H.u[1] = CRYPTO_bswap8(H.u[1]);
 
 #if defined(GHASH_ASM_X86_OR_64)
   if (crypto_gcm_clmul_enabled()) {
     if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
-      gcm_init_avx(ctx->Htable, ctx->H.u);
-      ctx->gmult = gcm_gmult_avx;
-      ctx->ghash = gcm_ghash_avx;
-    } else {
-      gcm_init_clmul(ctx->Htable, ctx->H.u);
-      ctx->gmult = gcm_gmult_clmul;
-      ctx->ghash = gcm_ghash_clmul;
+      gcm_init_avx(out_table, H.u);
+      *out_mult = gcm_gmult_avx;
+      *out_hash = gcm_ghash_avx;
+      return;
     }
+
+    gcm_init_clmul(out_table, H.u);
+    *out_mult = gcm_gmult_clmul;
+    *out_hash = gcm_ghash_clmul;
     return;
   }
-  gcm_init_4bit(ctx->Htable, ctx->H.u);
 #if defined(GHASH_ASM_X86) /* x86 only */
   if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
-    ctx->gmult = gcm_gmult_4bit_mmx;
-    ctx->ghash = gcm_ghash_4bit_mmx;
-  } else {
-    ctx->gmult = gcm_gmult_4bit_x86;
-    ctx->ghash = gcm_ghash_4bit_x86;
+    gcm_init_4bit(out_table, H.u);
+    *out_mult = gcm_gmult_4bit_mmx;
+    *out_hash = gcm_ghash_4bit_mmx;
+    return;
   }
-#else
-  ctx->gmult = gcm_gmult_4bit;
-  ctx->ghash = gcm_ghash_4bit;
 #endif
 #elif defined(GHASH_ASM_ARM)
   if (pmull_capable()) {
-    gcm_init_v8(ctx->Htable, ctx->H.u);
-    ctx->gmult = gcm_gmult_v8;
-    ctx->ghash = gcm_ghash_v8;
-  } else if (neon_capable()) {
-    gcm_init_neon(ctx->Htable,ctx->H.u);
-    ctx->gmult = gcm_gmult_neon;
-    ctx->ghash = gcm_ghash_neon;
-  } else {
-    gcm_init_4bit(ctx->Htable, ctx->H.u);
-    ctx->gmult = gcm_gmult_4bit;
-    ctx->ghash = gcm_ghash_4bit;
+    gcm_init_v8(out_table, H.u);
+    *out_mult = gcm_gmult_v8;
+    *out_hash = gcm_ghash_v8;
+    return;
+  }
+
+  if (neon_capable()) {
+    gcm_init_neon(out_table, H.u);
+    *out_mult = gcm_gmult_neon;
+    *out_hash = gcm_ghash_neon;
+    return;
   }
 #elif defined(GHASH_ASM_PPC64LE)
   if (CRYPTO_is_PPC64LE_vcrypto_capable()) {
-    gcm_init_p8(ctx->Htable, ctx->H.u);
-    ctx->gmult = gcm_gmult_p8;
-    ctx->ghash = gcm_ghash_p8;
-  } else {
-    gcm_init_4bit(ctx->Htable, ctx->H.u);
-    ctx->gmult = gcm_gmult_4bit;
-    ctx->ghash = gcm_ghash_4bit;
+    gcm_init_p8(out_table, H.u);
+    *out_mult = gcm_gmult_p8;
+    *out_hash = gcm_ghash_p8;
+    return;
   }
-#else
-  gcm_init_4bit(ctx->Htable, ctx->H.u);
-  ctx->gmult = gcm_gmult_4bit;
-  ctx->ghash = gcm_ghash_4bit;
 #endif
+
+  gcm_init_4bit(out_table, H.u);
+#if defined(GHASH_ASM_X86)
+  *out_mult = gcm_gmult_4bit_x86;
+  *out_hash = gcm_ghash_4bit_x86;
+#else
+  *out_mult = gcm_gmult_4bit;
+  *out_hash = gcm_ghash_4bit;
+#endif
+}
+
+void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, const void *aes_key,
+                        block128_f block) {
+  memset(ctx, 0, sizeof(*ctx));
+  ctx->block = block;
+
+  uint8_t gcm_key[16];
+  memset(gcm_key, 0, sizeof(gcm_key));
+  (*block)(gcm_key, gcm_key, aes_key);
+
+  CRYPTO_ghash_init(&ctx->gmult, &ctx->ghash, ctx->Htable, gcm_key);
 }
 
 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const void *key,
                          const uint8_t *iv, size_t len) {
-  const union {
-    long one;
-    char little;
-  } is_endian = {1};
   unsigned int ctr;
 #ifdef GCM_FUNCREF_4BIT
   void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
@@ -551,39 +474,15 @@
       GCM_MUL(ctx, Yi);
     }
     len0 <<= 3;
-    if (is_endian.little) {
-#ifdef BSWAP8
-      ctx->Yi.u[1] ^= BSWAP8(len0);
-#else
-      ctx->Yi.c[8] ^= (uint8_t)(len0 >> 56);
-      ctx->Yi.c[9] ^= (uint8_t)(len0 >> 48);
-      ctx->Yi.c[10] ^= (uint8_t)(len0 >> 40);
-      ctx->Yi.c[11] ^= (uint8_t)(len0 >> 32);
-      ctx->Yi.c[12] ^= (uint8_t)(len0 >> 24);
-      ctx->Yi.c[13] ^= (uint8_t)(len0 >> 16);
-      ctx->Yi.c[14] ^= (uint8_t)(len0 >> 8);
-      ctx->Yi.c[15] ^= (uint8_t)(len0);
-#endif
-    } else {
-      ctx->Yi.u[1] ^= len0;
-    }
+    ctx->Yi.u[1] ^= CRYPTO_bswap8(len0);
 
     GCM_MUL(ctx, Yi);
-
-    if (is_endian.little) {
-      ctr = GETU32(ctx->Yi.c + 12);
-    } else {
-      ctr = ctx->Yi.d[3];
-    }
+    ctr = GETU32_aligned(ctx->Yi.c + 12);
   }
 
   (*ctx->block)(ctx->Yi.c, ctx->EK0.c, key);
   ++ctr;
-  if (is_endian.little) {
-    PUTU32(ctx->Yi.c + 12, ctr);
-  } else {
-    ctx->Yi.d[3] = ctr;
-  }
+  PUTU32_aligned(ctx->Yi.c + 12, ctr);
 }
 
 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const uint8_t *aad, size_t len) {
@@ -656,10 +555,6 @@
 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const void *key,
                           const unsigned char *in, unsigned char *out,
                           size_t len) {
-  const union {
-    long one;
-    char little;
-  } is_endian = {1};
   unsigned int n, ctr;
   uint64_t mlen = ctx->len.u[1];
   block128_f block = ctx->block;
@@ -684,11 +579,7 @@
     ctx->ares = 0;
   }
 
-  if (is_endian.little) {
-    ctr = GETU32(ctx->Yi.c + 12);
-  } else {
-    ctr = ctx->Yi.d[3];
-  }
+  ctr = GETU32_aligned(ctx->Yi.c + 12);
 
   n = ctx->mres;
   if (n) {
@@ -709,11 +600,7 @@
       if (n == 0) {
         (*block)(ctx->Yi.c, ctx->EKi.c, key);
         ++ctr;
-        if (is_endian.little) {
-          PUTU32(ctx->Yi.c + 12, ctr);
-        } else {
-          ctx->Yi.d[3] = ctr;
-        }
+        PUTU32_aligned(ctx->Yi.c + 12, ctr);
       }
       ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
       n = (n + 1) % 16;
@@ -735,11 +622,7 @@
 
       (*block)(ctx->Yi.c, ctx->EKi.c, key);
       ++ctr;
-      if (is_endian.little) {
-        PUTU32(ctx->Yi.c + 12, ctr);
-      } else {
-        ctx->Yi.d[3] = ctr;
-      }
+      PUTU32_aligned(ctx->Yi.c + 12, ctr);
       for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
         out_t[i] = in_t[i] ^ ctx->EKi.t[i];
       }
@@ -758,11 +641,7 @@
 
       (*block)(ctx->Yi.c, ctx->EKi.c, key);
       ++ctr;
-      if (is_endian.little) {
-        PUTU32(ctx->Yi.c + 12, ctr);
-      } else {
-        ctx->Yi.d[3] = ctr;
-      }
+      PUTU32_aligned(ctx->Yi.c + 12, ctr);
       for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
         out_t[i] = in_t[i] ^ ctx->EKi.t[i];
       }
@@ -779,11 +658,7 @@
 
     (*block)(ctx->Yi.c, ctx->EKi.c, key);
     ++ctr;
-    if (is_endian.little) {
-      PUTU32(ctx->Yi.c + 12, ctr);
-    } else {
-      ctx->Yi.d[3] = ctr;
-    }
+    PUTU32_aligned(ctx->Yi.c + 12, ctr);
     for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
       ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
     }
@@ -796,11 +671,7 @@
   if (len) {
     (*block)(ctx->Yi.c, ctx->EKi.c, key);
     ++ctr;
-    if (is_endian.little) {
-      PUTU32(ctx->Yi.c + 12, ctr);
-    } else {
-      ctx->Yi.d[3] = ctr;
-    }
+    PUTU32_aligned(ctx->Yi.c + 12, ctr);
     while (len--) {
       ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
       ++n;
@@ -814,10 +685,6 @@
 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const void *key,
                           const unsigned char *in, unsigned char *out,
                           size_t len) {
-  const union {
-    long one;
-    char little;
-  } is_endian = {1};
   unsigned int n, ctr;
   uint64_t mlen = ctx->len.u[1];
   block128_f block = ctx->block;
@@ -842,11 +709,7 @@
     ctx->ares = 0;
   }
 
-  if (is_endian.little) {
-    ctr = GETU32(ctx->Yi.c + 12);
-  } else {
-    ctr = ctx->Yi.d[3];
-  }
+  ctr = GETU32_aligned(ctx->Yi.c + 12);
 
   n = ctx->mres;
   if (n) {
@@ -870,11 +733,7 @@
       if (n == 0) {
         (*block)(ctx->Yi.c, ctx->EKi.c, key);
         ++ctr;
-        if (is_endian.little) {
-          PUTU32(ctx->Yi.c + 12, ctr);
-        } else {
-          ctx->Yi.d[3] = ctr;
-        }
+        PUTU32_aligned(ctx->Yi.c + 12, ctr);
       }
       c = in[i];
       out[i] = c ^ ctx->EKi.c[n];
@@ -899,11 +758,7 @@
 
       (*block)(ctx->Yi.c, ctx->EKi.c, key);
       ++ctr;
-      if (is_endian.little) {
-        PUTU32(ctx->Yi.c + 12, ctr);
-      } else {
-        ctx->Yi.d[3] = ctr;
-      }
+      PUTU32_aligned(ctx->Yi.c + 12, ctr);
       for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
         out_t[i] = in_t[i] ^ ctx->EKi.t[i];
       }
@@ -922,11 +777,7 @@
 
       (*block)(ctx->Yi.c, ctx->EKi.c, key);
       ++ctr;
-      if (is_endian.little) {
-        PUTU32(ctx->Yi.c + 12, ctr);
-      } else {
-        ctx->Yi.d[3] = ctr;
-      }
+      PUTU32_aligned(ctx->Yi.c + 12, ctr);
       for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
         out_t[i] = in_t[i] ^ ctx->EKi.t[i];
       }
@@ -942,11 +793,7 @@
 
     (*block)(ctx->Yi.c, ctx->EKi.c, key);
     ++ctr;
-    if (is_endian.little) {
-      PUTU32(ctx->Yi.c + 12, ctr);
-    } else {
-      ctx->Yi.d[3] = ctr;
-    }
+    PUTU32_aligned(ctx->Yi.c + 12, ctr);
     for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
       size_t c = in_t[i];
       out_t[i] = c ^ ctx->EKi.t[i];
@@ -961,11 +808,7 @@
   if (len) {
     (*block)(ctx->Yi.c, ctx->EKi.c, key);
     ++ctr;
-    if (is_endian.little) {
-      PUTU32(ctx->Yi.c + 12, ctr);
-    } else {
-      ctx->Yi.d[3] = ctr;
-    }
+    PUTU32_aligned(ctx->Yi.c + 12, ctr);
     while (len--) {
       uint8_t c = in[n];
       ctx->Xi.c[n] ^= c;
@@ -981,10 +824,6 @@
 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, const void *key,
                                 const uint8_t *in, uint8_t *out, size_t len,
                                 ctr128_f stream) {
-  const union {
-    long one;
-    char little;
-  } is_endian = {1};
   unsigned int n, ctr;
   uint64_t mlen = ctx->len.u[1];
 #ifdef GCM_FUNCREF_4BIT
@@ -1034,21 +873,13 @@
   }
 #endif
 
-  if (is_endian.little) {
-    ctr = GETU32(ctx->Yi.c + 12);
-  } else {
-    ctr = ctx->Yi.d[3];
-  }
+  ctr = GETU32_aligned(ctx->Yi.c + 12);
 
 #if defined(GHASH)
   while (len >= GHASH_CHUNK) {
     (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
     ctr += GHASH_CHUNK / 16;
-    if (is_endian.little) {
-      PUTU32(ctx->Yi.c + 12, ctr);
-    } else {
-      ctx->Yi.d[3] = ctr;
-    }
+    PUTU32_aligned(ctx->Yi.c + 12, ctr);
     GHASH(ctx, out, GHASH_CHUNK);
     out += GHASH_CHUNK;
     in += GHASH_CHUNK;
@@ -1061,11 +892,7 @@
 
     (*stream)(in, out, j, key, ctx->Yi.c);
     ctr += (unsigned int)j;
-    if (is_endian.little) {
-      PUTU32(ctx->Yi.c + 12, ctr);
-    } else {
-      ctx->Yi.d[3] = ctr;
-    }
+    PUTU32_aligned(ctx->Yi.c + 12, ctr);
     in += i;
     len -= i;
 #if defined(GHASH)
@@ -1084,11 +911,7 @@
   if (len) {
     (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
     ++ctr;
-    if (is_endian.little) {
-      PUTU32(ctx->Yi.c + 12, ctr);
-    } else {
-      ctx->Yi.d[3] = ctr;
-    }
+    PUTU32_aligned(ctx->Yi.c + 12, ctr);
     while (len--) {
       ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
       ++n;
@@ -1102,10 +925,6 @@
 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, const void *key,
                                 const uint8_t *in, uint8_t *out, size_t len,
                                 ctr128_f stream) {
-  const union {
-    long one;
-    char little;
-  } is_endian = {1};
   unsigned int n, ctr;
   uint64_t mlen = ctx->len.u[1];
 #ifdef GCM_FUNCREF_4BIT
@@ -1157,22 +976,14 @@
   }
 #endif
 
-  if (is_endian.little) {
-    ctr = GETU32(ctx->Yi.c + 12);
-  } else {
-    ctr = ctx->Yi.d[3];
-  }
+  ctr = GETU32_aligned(ctx->Yi.c + 12);
 
 #if defined(GHASH)
   while (len >= GHASH_CHUNK) {
     GHASH(ctx, in, GHASH_CHUNK);
     (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
     ctr += GHASH_CHUNK / 16;
-    if (is_endian.little) {
-      PUTU32(ctx->Yi.c + 12, ctr);
-    } else {
-      ctx->Yi.d[3] = ctr;
-    }
+    PUTU32_aligned(ctx->Yi.c + 12, ctr);
     out += GHASH_CHUNK;
     in += GHASH_CHUNK;
     len -= GHASH_CHUNK;
@@ -1198,11 +1009,7 @@
 #endif
     (*stream)(in, out, j, key, ctx->Yi.c);
     ctr += (unsigned int)j;
-    if (is_endian.little) {
-      PUTU32(ctx->Yi.c + 12, ctr);
-    } else {
-      ctx->Yi.d[3] = ctr;
-    }
+    PUTU32_aligned(ctx->Yi.c + 12, ctr);
     out += i;
     in += i;
     len -= i;
@@ -1210,11 +1017,7 @@
   if (len) {
     (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
     ++ctr;
-    if (is_endian.little) {
-      PUTU32(ctx->Yi.c + 12, ctr);
-    } else {
-      ctx->Yi.d[3] = ctr;
-    }
+    PUTU32_aligned(ctx->Yi.c + 12, ctr);
     while (len--) {
       uint8_t c = in[n];
       ctx->Xi.c[n] ^= c;
@@ -1228,10 +1031,6 @@
 }
 
 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const uint8_t *tag, size_t len) {
-  const union {
-    long one;
-    char little;
-  } is_endian = {1};
   uint64_t alen = ctx->len.u[0] << 3;
   uint64_t clen = ctx->len.u[1] << 3;
 #ifdef GCM_FUNCREF_4BIT
@@ -1242,20 +1041,8 @@
     GCM_MUL(ctx, Xi);
   }
 
-  if (is_endian.little) {
-#ifdef BSWAP8
-    alen = BSWAP8(alen);
-    clen = BSWAP8(clen);
-#else
-    uint8_t *p = ctx->len.c;
-
-    ctx->len.u[0] = alen;
-    ctx->len.u[1] = clen;
-
-    alen = (uint64_t)GETU32(p) << 32 | GETU32(p + 4);
-    clen = (uint64_t)GETU32(p + 8) << 32 | GETU32(p + 12);
-#endif
-  }
+  alen = CRYPTO_bswap8(alen);
+  clen = CRYPTO_bswap8(clen);
 
   ctx->Xi.u[0] ^= alen;
   ctx->Xi.u[1] ^= clen;
diff --git a/src/crypto/modes/gcm_test.cc b/src/crypto/modes/gcm_test.cc
index 51d966e..8baf20e 100644
--- a/src/crypto/modes/gcm_test.cc
+++ b/src/crypto/modes/gcm_test.cc
@@ -46,6 +46,13 @@
  * OF THE POSSIBILITY OF SUCH DAMAGE.
  * ==================================================================== */
 
+/* Per C99, various stdint.h and inttypes.h macros (the latter used by
+ * internal.h) are unavailable in C++ unless some macros are defined. C++11
+ * overruled this decision, but older Android NDKs still require it. */
+#if !defined(__STDC_CONSTANT_MACROS)
+#define __STDC_CONSTANT_MACROS
+#endif
+
 #include <stdio.h>
 #include <string.h>
 
@@ -388,12 +395,22 @@
   return ret;
 }
 
+static bool TestByteSwap() {
+  return CRYPTO_bswap4(0x01020304) == 0x04030201 &&
+         CRYPTO_bswap8(UINT64_C(0x0102030405060708)) ==
+             UINT64_C(0x0807060504030201);
+}
+
 int main(void) {
   int ret = 0;
   unsigned i;
 
   CRYPTO_library_init();
 
+  if (!TestByteSwap()) {
+    ret = 1;
+  }
+
   for (i = 0; i < sizeof(test_cases) / sizeof(struct test_case); i++) {
     if (!run_test_case(i, &test_cases[i])) {
       ret = 1;
diff --git a/src/crypto/modes/internal.h b/src/crypto/modes/internal.h
index 430d040..a53da04 100644
--- a/src/crypto/modes/internal.h
+++ b/src/crypto/modes/internal.h
@@ -51,6 +51,8 @@
 
 #include <openssl/base.h>
 
+#include <string.h>
+
 #if defined(__cplusplus)
 extern "C" {
 #endif
@@ -64,90 +66,58 @@
 #define STRICT_ALIGNMENT 0
 #endif
 
-#if !defined(PEDANTIC) && !defined(OPENSSL_NO_ASM)
 #if defined(__GNUC__) && __GNUC__ >= 2
-#if defined(OPENSSL_X86_64)
-#define BSWAP8(x)                 \
-  ({                              \
-    uint64_t ret = (x);           \
-    asm("bswapq %0" : "+r"(ret)); \
-    ret;                          \
-  })
-#define BSWAP4(x)                 \
-  ({                              \
-    uint32_t ret = (x);           \
-    asm("bswapl %0" : "+r"(ret)); \
-    ret;                          \
-  })
-#elif defined(OPENSSL_X86)
-#define BSWAP8(x)                                     \
-  ({                                                  \
-    uint32_t lo = (uint64_t)(x) >> 32, hi = (x);      \
-    asm("bswapl %0; bswapl %1" : "+r"(hi), "+r"(lo)); \
-    (uint64_t) hi << 32 | lo;                         \
-  })
-#define BSWAP4(x)                 \
-  ({                              \
-    uint32_t ret = (x);           \
-    asm("bswapl %0" : "+r"(ret)); \
-    ret;                          \
-  })
-#elif defined(OPENSSL_AARCH64)
-#define BSWAP8(x)                          \
-  ({                                       \
-    uint64_t ret;                          \
-    asm("rev %0,%1" : "=r"(ret) : "r"(x)); \
-    ret;                                   \
-  })
-#define BSWAP4(x)                            \
-  ({                                         \
-    uint32_t ret;                            \
-    asm("rev %w0,%w1" : "=r"(ret) : "r"(x)); \
-    ret;                                     \
-  })
-#elif defined(OPENSSL_ARM) && !defined(STRICT_ALIGNMENT)
-#define BSWAP8(x)                                     \
-  ({                                                  \
-    uint32_t lo = (uint64_t)(x) >> 32, hi = (x);      \
-    asm("rev %0,%0; rev %1,%1" : "+r"(hi), "+r"(lo)); \
-    (uint64_t) hi << 32 | lo;                         \
-  })
-#define BSWAP4(x)                                      \
-  ({                                                   \
-    uint32_t ret;                                      \
-    asm("rev %0,%1" : "=r"(ret) : "r"((uint32_t)(x))); \
-    ret;                                               \
-  })
-#endif
+static inline uint32_t CRYPTO_bswap4(uint32_t x) {
+  return __builtin_bswap32(x);
+}
+
+static inline uint64_t CRYPTO_bswap8(uint64_t x) {
+  return __builtin_bswap64(x);
+}
 #elif defined(_MSC_VER)
-#if _MSC_VER >= 1300
 OPENSSL_MSVC_PRAGMA(warning(push, 3))
 #include <intrin.h>
 OPENSSL_MSVC_PRAGMA(warning(pop))
 #pragma intrinsic(_byteswap_uint64, _byteswap_ulong)
-#define BSWAP8(x) _byteswap_uint64((uint64_t)(x))
-#define BSWAP4(x) _byteswap_ulong((uint32_t)(x))
-#elif defined(OPENSSL_X86)
-__inline uint32_t _bswap4(uint32_t val) {
-  _asm mov eax, val
-  _asm bswap eax
+static inline uint32_t CRYPTO_bswap4(uint32_t x) {
+  return _byteswap_ulong(x);
 }
-#define BSWAP4(x) _bswap4(x)
-#endif
-#endif
-#endif
 
-#if defined(BSWAP4) && !defined(STRICT_ALIGNMENT)
-#define GETU32(p) BSWAP4(*(const uint32_t *)(p))
-#define PUTU32(p, v) *(uint32_t *)(p) = BSWAP4(v)
+static inline uint64_t CRYPTO_bswap8(uint64_t x) {
+  return _byteswap_uint64(x);
+}
 #else
-#define GETU32(p) \
-  ((uint32_t)(p)[0] << 24 | (uint32_t)(p)[1] << 16 | (uint32_t)(p)[2] << 8 | (uint32_t)(p)[3])
-#define PUTU32(p, v)                                   \
-  ((p)[0] = (uint8_t)((v) >> 24), (p)[1] = (uint8_t)((v) >> 16), \
-   (p)[2] = (uint8_t)((v) >> 8), (p)[3] = (uint8_t)(v))
+static inline uint32_t CRYPTO_bswap4(uint32_t x) {
+  x = (x >> 16) | (x << 16);
+  x = ((x & 0xff00ff00) >> 8) | ((x & 0x00ff00ff) << 8);
+  return x;
+}
+
+static inline uint64_t CRYPTO_bswap8(uint64_t x) {
+  return CRYPTO_bswap4(x >> 32) | (((uint64_t)CRYPTO_bswap4(x)) << 32);
+}
 #endif
 
+static inline uint32_t GETU32(const void *in) {
+  uint32_t v;
+  memcpy(&v, in, sizeof(v));
+  return CRYPTO_bswap4(v);
+}
+
+static inline void PUTU32(void *out, uint32_t v) {
+  v = CRYPTO_bswap4(v);
+  memcpy(out, &v, sizeof(v));
+}
+
+static inline uint32_t GETU32_aligned(const void *in) {
+  const char *alias = (const char *) in;
+  return CRYPTO_bswap4(*((const uint32_t *) alias));
+}
+
+static inline void PUTU32_aligned(void *in, uint32_t v) {
+  char *alias = (char *) in;
+  *((uint32_t *) alias) = CRYPTO_bswap4(v);
+}
 
 /* block128_f is the type of a 128-bit, block cipher. */
 typedef void (*block128_f)(const uint8_t in[16], uint8_t out[16],
@@ -156,6 +126,16 @@
 /* GCM definitions */
 typedef struct { uint64_t hi,lo; } u128;
 
+/* gmult_func multiplies |Xi| by the GCM key and writes the result back to
+ * |Xi|. */
+typedef void (*gmult_func)(uint64_t Xi[2], const u128 Htable[16]);
+
+/* ghash_func repeatedly multiplies |Xi| by the GCM key and adds in blocks from
+ * |inp|. The result is written back to |Xi| and the |len| argument must be a
+ * multiple of 16. */
+typedef void (*ghash_func)(uint64_t Xi[2], const u128 Htable[16],
+                           const uint8_t *inp, size_t len);
+
 /* This differs from upstream's |gcm128_context| in that it does not have the
  * |key| pointer, in order to make it |memcpy|-friendly. Rather the key is
  * passed into each call that needs it. */
@@ -166,14 +146,11 @@
     uint32_t d[4];
     uint8_t c[16];
     size_t t[16 / sizeof(size_t)];
-  } Yi, EKi, EK0, len, Xi, H;
+  } Yi, EKi, EK0, len, Xi;
 
-  /* Relative position of Xi, H and pre-computed Htable is used in some
-   * assembler modules, i.e. don't change the order! */
   u128 Htable[16];
-  void (*gmult)(uint64_t Xi[2], const u128 Htable[16]);
-  void (*ghash)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
-                size_t len);
+  gmult_func gmult;
+  ghash_func ghash;
 
   unsigned int mres, ares;
   block128_f block;
@@ -212,6 +189,12 @@
                                  uint8_t ecount_buf[16], unsigned *num,
                                  ctr128_f ctr);
 
+#if !defined(OPENSSL_NO_ASM) && \
+    (defined(OPENSSL_X86) || defined(OPENSSL_X86_64))
+void aesni_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, size_t blocks,
+                                const void *key, const uint8_t *ivec);
+#endif
+
 
 /* GCM.
  *
@@ -222,6 +205,12 @@
 
 typedef struct gcm128_context GCM128_CONTEXT;
 
+/* CRYPTO_ghash_init writes a precomputed table of powers of |gcm_key| to
+ * |out_table| and sets |*out_mult| and |*out_hash| to (potentially hardware
+ * accelerated) functions for performing operations in the GHASH field. */
+void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash,
+                       u128 out_table[16], const uint8_t *gcm_key);
+
 /* CRYPTO_gcm128_init initialises |ctx| to use |block| (typically AES) with
  * the given key. */
 OPENSSL_EXPORT void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, const void *key,
@@ -345,11 +334,36 @@
                                    block128_f block);
 
 
-#if !defined(OPENSSL_NO_ASM) && \
-    (defined(OPENSSL_X86) || defined(OPENSSL_X86_64))
-void aesni_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, size_t blocks,
-                                const void *key, const uint8_t *ivec);
-#endif
+/* POLYVAL.
+ *
+ * POLYVAL is a polynomial authenticator that operates over a field very
+ * similar to the one that GHASH uses. See
+ * https://tools.ietf.org/html/draft-irtf-cfrg-gcmsiv-02#section-3. */
+
+typedef union {
+  uint64_t u[2];
+  uint8_t c[16];
+} polyval_block;
+
+struct polyval_ctx {
+  polyval_block S;
+  u128 Htable[16];
+  gmult_func gmult;
+  ghash_func ghash;
+};
+
+/* CRYPTO_POLYVAL_init initialises |ctx| using |key|. */
+void CRYPTO_POLYVAL_init(struct polyval_ctx *ctx, const uint8_t key[16]);
+
+/* CRYPTO_POLYVAL_update_blocks updates the accumulator in |ctx| given the
+ * blocks from |in|. Only a whole number of blocks can be processed so |in_len|
+ * must be a multiple of 16. */
+void CRYPTO_POLYVAL_update_blocks(struct polyval_ctx *ctx, const uint8_t *in,
+                                  size_t in_len);
+
+/* CRYPTO_POLYVAL_finish writes the accumulator from |ctx| to |out|. */
+void CRYPTO_POLYVAL_finish(const struct polyval_ctx *ctx, uint8_t out[16]);
+
 
 #if defined(__cplusplus)
 } /* extern C */
diff --git a/src/crypto/modes/polyval.c b/src/crypto/modes/polyval.c
new file mode 100644
index 0000000..c5121a1
--- /dev/null
+++ b/src/crypto/modes/polyval.c
@@ -0,0 +1,93 @@
+/* Copyright (c) 2016, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <openssl/base.h>
+
+#if !defined(OPENSSL_SMALL)
+
+#include <assert.h>
+#include <string.h>
+
+#include "internal.h"
+#include "../internal.h"
+
+
+/* byte_reverse reverses the order of the bytes in |b->c|. */
+static void byte_reverse(polyval_block *b) {
+  const uint64_t t = CRYPTO_bswap8(b->u[0]);
+  b->u[0] = CRYPTO_bswap8(b->u[1]);
+  b->u[1] = t;
+}
+
+/* reverse_and_mulX_ghash interprets the bytes |b->c| as a reversed element of
+ * the GHASH field, multiplies that by 'x' and serialises the result back into
+ * |b|, but with GHASH's backwards bit ordering. */
+static void reverse_and_mulX_ghash(polyval_block *b) {
+  uint64_t hi = b->u[0];
+  uint64_t lo = b->u[1];
+  const unsigned carry = constant_time_eq(hi & 1, 1);
+  hi >>= 1;
+  hi |= lo << 63;
+  lo >>= 1;
+  lo ^= ((uint64_t) constant_time_select(carry, 0xe1, 0)) << 56;
+
+  b->u[0] = CRYPTO_bswap8(lo);
+  b->u[1] = CRYPTO_bswap8(hi);
+}
+
+/* POLYVAL(H, X_1, ..., X_n) =
+ * ByteReverse(GHASH(mulX_GHASH(ByteReverse(H)), ByteReverse(X_1), ...,
+ * ByteReverse(X_n))).
+ *
+ * See https://tools.ietf.org/html/draft-irtf-cfrg-gcmsiv-02#appendix-A. */
+
+void CRYPTO_POLYVAL_init(struct polyval_ctx *ctx, const uint8_t key[16]) {
+  polyval_block H;
+  memcpy(H.c, key, 16);
+  reverse_and_mulX_ghash(&H);
+
+  CRYPTO_ghash_init(&ctx->gmult, &ctx->ghash, ctx->Htable, H.c);
+  memset(&ctx->S, 0, sizeof(ctx->S));
+}
+
+void CRYPTO_POLYVAL_update_blocks(struct polyval_ctx *ctx, const uint8_t *in,
+                                  size_t in_len) {
+  assert((in_len & 15) == 0);
+  polyval_block reversed[32];
+
+  while (in_len > 0) {
+    size_t todo = in_len;
+    if (todo > sizeof(reversed)) {
+      todo = sizeof(reversed);
+    }
+    memcpy(reversed, in, todo);
+    in_len -= todo;
+
+    size_t blocks = todo / sizeof(polyval_block);
+    for (size_t i = 0; i < blocks; i++) {
+      byte_reverse(&reversed[i]);
+    }
+
+    ctx->ghash(ctx->S.u, ctx->Htable, (const uint8_t *) reversed, todo);
+  }
+}
+
+void CRYPTO_POLYVAL_finish(const struct polyval_ctx *ctx, uint8_t out[16]) {
+  polyval_block S = ctx->S;
+  byte_reverse(&S);
+  memcpy(out, &S.c, sizeof(polyval_block));
+}
+
+
+#endif  /* !OPENSSL_SMALL */
diff --git a/src/crypto/rand/urandom.c b/src/crypto/rand/urandom.c
index 2572625..17d194c 100644
--- a/src/crypto/rand/urandom.c
+++ b/src/crypto/rand/urandom.c
@@ -21,6 +21,7 @@
 #include <assert.h>
 #include <errno.h>
 #include <fcntl.h>
+#include <stdio.h>
 #include <string.h>
 #include <unistd.h>
 
@@ -87,12 +88,16 @@
 /* requested_lock is used to protect the |*_requested| variables. */
 static struct CRYPTO_STATIC_MUTEX requested_lock = CRYPTO_STATIC_MUTEX_INIT;
 
-/* urandom_fd_requested is set by |RAND_set_urandom_fd|.  It's protected by
+/* The following constants are magic values of |urandom_fd|. */
+static const int kUnset = -2;
+static const int kHaveGetrandom = -3;
+
+/* urandom_fd_requested is set by |RAND_set_urandom_fd|. It's protected by
  * |requested_lock|. */
-static int urandom_fd_requested = -2;
+static int urandom_fd_requested = -2 /* kUnset */;
 
 /* urandom_fd is a file descriptor to /dev/urandom. It's protected by |once|. */
-static int urandom_fd = -2;
+static int urandom_fd = -2 /* kUnset */;
 
 /* urandom_buffering_requested is set by |RAND_enable_fork_unsafe_buffering|.
  * It's protected by |requested_lock|. */
@@ -115,12 +120,31 @@
   CRYPTO_STATIC_MUTEX_unlock_read(&requested_lock);
 
 #if defined(USE_SYS_getrandom)
-  /* Initial test of getrandom to find any unexpected behavior. */
   uint8_t dummy;
-  syscall(SYS_getrandom, &dummy, sizeof(dummy), GRND_NONBLOCK);
-#endif
+  long getrandom_ret =
+      syscall(SYS_getrandom, &dummy, sizeof(dummy), GRND_NONBLOCK);
 
-  if (fd == -2) {
+  if (getrandom_ret == 1) {
+    urandom_fd = kHaveGetrandom;
+    return;
+  } else if (getrandom_ret == -1 && errno == EAGAIN) {
+    fprintf(stderr,
+            "getrandom indicates that the entropy pool has not been "
+            "initialized. Rather than continue with poor entropy, this process "
+            "will block until entropy is available.\n");
+    do {
+      getrandom_ret =
+          syscall(SYS_getrandom, &dummy, sizeof(dummy), 0 /* no flags */);
+    } while (getrandom_ret == -1 && errno == EINTR);
+
+    if (getrandom_ret == 1) {
+      urandom_fd = kHaveGetrandom;
+      return;
+    }
+  }
+#endif  /* USE_SYS_getrandom */
+
+  if (fd == kUnset) {
     do {
       fd = open("/dev/urandom", O_RDONLY);
     } while (fd == -1 && errno == EINTR);
@@ -156,7 +180,9 @@
   CRYPTO_STATIC_MUTEX_unlock_write(&requested_lock);
 
   CRYPTO_once(&once, init_once);
-  if (urandom_fd != fd) {
+  if (urandom_fd == kHaveGetrandom) {
+    close(fd);
+  } else if (urandom_fd != fd) {
     abort();  // Already initialized.
   }
 }
@@ -168,7 +194,7 @@
       abort();
     }
   } else {
-    fd = -2;
+    fd = kUnset;
   }
 
   CRYPTO_STATIC_MUTEX_lock_write(&requested_lock);
@@ -177,7 +203,15 @@
   CRYPTO_STATIC_MUTEX_unlock_write(&requested_lock);
 
   CRYPTO_once(&once, init_once);
-  if (urandom_buffering != 1 || (fd >= 0 && urandom_fd != fd)) {
+  if (urandom_buffering != 1) {
+    abort();  // Already initialized
+  }
+
+  if (urandom_fd == kHaveGetrandom) {
+    if (fd >= 0) {
+      close(fd);
+    }
+  } else if (urandom_fd != fd) {
     abort();  // Already initialized.
   }
 }
@@ -209,9 +243,19 @@
   ssize_t r;
 
   while (len > 0) {
-    do {
-      r = read(urandom_fd, out, len);
-    } while (r == -1 && errno == EINTR);
+    if (urandom_fd == kHaveGetrandom) {
+#if defined(USE_SYS_getrandom)
+      do {
+        r = syscall(SYS_getrandom, out, len, 0 /* no flags */);
+      } while (r == -1 && errno == EINTR);
+#else
+      abort();
+#endif
+    } else {
+      do {
+        r = read(urandom_fd, out, len);
+      } while (r == -1 && errno == EINTR);
+    }
 
     if (r <= 0) {
       return 0;
diff --git a/src/crypto/sha/asm/sha256-armv4.pl b/src/crypto/sha/asm/sha256-armv4.pl
index e1be226..bac7ce8 100644
--- a/src/crypto/sha/asm/sha256-armv4.pl
+++ b/src/crypto/sha/asm/sha256-armv4.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
 
 # ====================================================================
 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -175,16 +182,11 @@
 #endif
 
 .text
-#if __ARM_ARCH__<7
-.code	32
-#else
+#if defined(__thumb2__)
 .syntax unified
-# if defined(__thumb2__) && !defined(__APPLE__)
-#  define adrl adr
 .thumb
-# else
+#else
 .code   32
-# endif
 #endif
 
 .type	K256,%object
@@ -218,10 +220,10 @@
 .type	sha256_block_data_order,%function
 sha256_block_data_order:
 .Lsha256_block_data_order:
-#if __ARM_ARCH__<7
+#if __ARM_ARCH__<7 && !defined(__thumb2__)
 	sub	r3,pc,#8		@ sha256_block_data_order
 #else
-	adr	r3,sha256_block_data_order
+	adr	r3,.Lsha256_block_data_order
 #endif
 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
 	ldr	r12,.LOPENSSL_armcap
@@ -473,13 +475,14 @@
 
 .global	sha256_block_data_order_neon
 .type	sha256_block_data_order_neon,%function
-.align	4
+.align	5
+.skip	16
 sha256_block_data_order_neon:
 .LNEON:
 	stmdb	sp!,{r4-r12,lr}
 
 	sub	$H,sp,#16*4+16
-	adrl	$Ktbl,K256
+	adr	$Ktbl,K256
 	bic	$H,$H,#15		@ align for 128-bit stores
 	mov	$t2,sp
 	mov	sp,$H			@ alloca
@@ -599,7 +602,7 @@
 $code.=<<___;
 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
 
-# if defined(__thumb2__) && !defined(__APPLE__)
+# if defined(__thumb2__)
 #  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
 # else
 #  define INST(a,b,c,d)	.byte	a,b,c,d
@@ -610,16 +613,11 @@
 sha256_block_data_order_armv8:
 .LARMv8:
 	vld1.32	{$ABCD,$EFGH},[$ctx]
-# ifdef	__APPLE__
 	sub	$Ktbl,$Ktbl,#256+32
-# elif	defined(__thumb2__)
-	adr	$Ktbl,.LARMv8
-	sub	$Ktbl,$Ktbl,#.LARMv8-K256
-# else
-	adrl	$Ktbl,K256
-# endif
 	add	$len,$inp,$len,lsl#6	@ len to point at the end of inp
+	b	.Loop_v8
 
+.align	4
 .Loop_v8:
 	vld1.8		{@MSG[0]-@MSG[1]},[$inp]!
 	vld1.8		{@MSG[2]-@MSG[3]},[$inp]!
diff --git a/src/crypto/x509/x509_test.cc b/src/crypto/x509/x509_test.cc
index c39d98d..0c25754 100644
--- a/src/crypto/x509/x509_test.cc
+++ b/src/crypto/x509/x509_test.cc
@@ -25,7 +25,6 @@
 #include <openssl/pool.h>
 #include <openssl/x509.h>
 
-namespace bssl {
 
 static const char kCrossSigningRootPEM[] =
     "-----BEGIN CERTIFICATE-----\n"
@@ -724,7 +723,7 @@
   }
 
   // Test PKCS#1 v1.5.
-  ScopedEVP_MD_CTX md_ctx;
+  bssl::ScopedEVP_MD_CTX md_ctx;
   if (!EVP_DigestSignInit(md_ctx.get(), NULL, EVP_sha256(), NULL, pkey.get()) ||
       !SignatureRoundTrips(md_ctx.get(), pkey.get())) {
     fprintf(stderr, "RSA PKCS#1 with SHA-256 failed\n");
@@ -941,7 +940,51 @@
   return true;
 }
 
-static int Main() {
+static bool TestFailedParseFromBuffer() {
+  static const uint8_t kNonsense[] = {1, 2, 3, 4, 5};
+
+  bssl::UniquePtr<CRYPTO_BUFFER> buf(
+      CRYPTO_BUFFER_new(kNonsense, sizeof(kNonsense), nullptr));
+  if (!buf) {
+    return false;
+  }
+
+  bssl::UniquePtr<X509> cert(X509_parse_from_buffer(buf.get()));
+  if (cert) {
+    fprintf(stderr, "Nonsense somehow parsed.\n");
+    return false;
+  }
+  ERR_clear_error();
+
+  // Test a buffer with trailing data.
+  size_t data_len;
+  bssl::UniquePtr<uint8_t> data;
+  if (!PEMToDER(&data, &data_len, kRootCAPEM)) {
+    return false;
+  }
+
+  std::unique_ptr<uint8_t[]> data_with_trailing_byte(new uint8_t[data_len + 1]);
+  memcpy(data_with_trailing_byte.get(), data.get(), data_len);
+  data_with_trailing_byte[data_len] = 0;
+
+  bssl::UniquePtr<CRYPTO_BUFFER> buf_with_trailing_byte(
+      CRYPTO_BUFFER_new(data_with_trailing_byte.get(), data_len + 1, nullptr));
+  if (!buf_with_trailing_byte) {
+    return false;
+  }
+
+  bssl::UniquePtr<X509> root(
+      X509_parse_from_buffer(buf_with_trailing_byte.get()));
+  if (root) {
+    fprintf(stderr, "Parsed buffer with trailing byte.\n");
+    return false;
+  }
+  ERR_clear_error();
+
+  return true;
+}
+
+int main() {
   CRYPTO_library_init();
 
   if (!TestVerify() ||
@@ -952,16 +995,11 @@
       !TestFromBuffer() ||
       !TestFromBufferTrailingData() ||
       !TestFromBufferModified() ||
-      !TestFromBufferReused()) {
+      !TestFromBufferReused() ||
+      !TestFailedParseFromBuffer()) {
     return 1;
   }
 
   printf("PASS\n");
   return 0;
 }
-
-}  // namespace bssl
-
-int main() {
-  return bssl::Main();
-}
diff --git a/src/crypto/x509/x_x509.c b/src/crypto/x509/x_x509.c
index 845d4b2..d3cd5b0 100644
--- a/src/crypto/x509/x_x509.c
+++ b/src/crypto/x509/x_x509.c
@@ -106,6 +106,7 @@
         ret->crldp = NULL;
         ret->buf = NULL;
         CRYPTO_new_ex_data(&ret->ex_data);
+        CRYPTO_MUTEX_init(&ret->lock);
         break;
 
     case ASN1_OP_D2I_PRE:
@@ -120,6 +121,7 @@
         break;
 
     case ASN1_OP_FREE_POST:
+        CRYPTO_MUTEX_cleanup(&ret->lock);
         CRYPTO_free_ex_data(&g_ex_data_class, ret, &ret->ex_data);
         X509_CERT_AUX_free(ret->aux);
         ASN1_OCTET_STRING_free(ret->skid);
@@ -129,9 +131,7 @@
         GENERAL_NAMES_free(ret->altname);
         NAME_CONSTRAINTS_free(ret->nc);
         CRYPTO_BUFFER_free(ret->buf);
-
-        if (ret->name != NULL)
-            OPENSSL_free(ret->name);
+        OPENSSL_free(ret->name);
         break;
 
     }
@@ -162,8 +162,8 @@
   X509 *x509p = x509;
   X509 *ret = d2i_X509(&x509p, &inp, CRYPTO_BUFFER_len(buf));
   if (ret == NULL ||
-      (inp - CRYPTO_BUFFER_data(buf)) != (ptrdiff_t) CRYPTO_BUFFER_len(buf)) {
-    X509_free(x509);
+      inp - CRYPTO_BUFFER_data(buf) != (ptrdiff_t)CRYPTO_BUFFER_len(buf)) {
+    X509_free(x509p);
     return NULL;
   }
   assert(x509p == x509);
diff --git a/src/crypto/x509v3/v3_purp.c b/src/crypto/x509v3/v3_purp.c
index 9152444..324de85 100644
--- a/src/crypto/x509v3/v3_purp.c
+++ b/src/crypto/x509v3/v3_purp.c
@@ -146,9 +146,7 @@
 {
     int idx;
     const X509_PURPOSE *pt;
-    if (!(x->ex_flags & EXFLAG_SET)) {
-        x509v3_cache_extensions(x);
-    }
+    x509v3_cache_extensions(x);
     if (id == -1)
         return 1;
     idx = X509_PURPOSE_get_by_id(id);
@@ -407,16 +405,6 @@
         setup_dp(x, sk_DIST_POINT_value(x->crldp, i));
 }
 
-/*
- * g_x509_cache_extensions_lock is used to protect against concurrent calls
- * to |x509v3_cache_extensions|. Ideally this would be done with a
- * |CRYPTO_once_t| in the |X509| structure, but |CRYPTO_once_t| isn't public.
- * Note: it's not entirely clear whether this lock is needed. Not all paths to
- * this function took a lock in OpenSSL.
- */
-static struct CRYPTO_STATIC_MUTEX g_x509_cache_extensions_lock =
-    CRYPTO_STATIC_MUTEX_INIT;
-
 static void x509v3_cache_extensions(X509 *x)
 {
     BASIC_CONSTRAINTS *bs;
@@ -428,10 +416,17 @@
     size_t i;
     int j;
 
-    CRYPTO_STATIC_MUTEX_lock_write(&g_x509_cache_extensions_lock);
+    CRYPTO_MUTEX_lock_read(&x->lock);
+    const int is_set = x->ex_flags & EXFLAG_SET;
+    CRYPTO_MUTEX_unlock_read(&x->lock);
 
+    if (is_set) {
+        return;
+    }
+
+    CRYPTO_MUTEX_lock_write(&x->lock);
     if (x->ex_flags & EXFLAG_SET) {
-        CRYPTO_STATIC_MUTEX_unlock_write(&g_x509_cache_extensions_lock);
+        CRYPTO_MUTEX_unlock_write(&x->lock);
         return;
     }
 
@@ -564,7 +559,7 @@
     }
     x->ex_flags |= EXFLAG_SET;
 
-    CRYPTO_STATIC_MUTEX_unlock_write(&g_x509_cache_extensions_lock);
+    CRYPTO_MUTEX_unlock_write(&x->lock);
 }
 
 /*
@@ -604,10 +599,7 @@
 
 int X509_check_ca(X509 *x)
 {
-    if (!(x->ex_flags & EXFLAG_SET)) {
-        x509v3_cache_extensions(x);
-    }
-
+    x509v3_cache_extensions(x);
     return check_ca(x);
 }