Add GoogleKeymaster. Very incomplete.

Change-Id: I53542c7132bd1a04afee93f3247b88ed7ed0bedc
diff --git a/Makefile b/Makefile
index aa4e23b..4065b5a 100644
--- a/Makefile
+++ b/Makefile
@@ -11,10 +11,14 @@
 CXXFLAGS=-Wall -Werror -Wno-unused -Winit-self -Wpointer-arith	-Wunused-parameter \
 	-Wmissing-declarations -std=c++0x -fprofile-arcs -ftest-coverage \
 	-Wno-deprecated-declarations -fno-exceptions
-LDLIBS=-lpthread
+LDLIBS=-lcrypto -lpthread
 
 CPPSRCS=authorization_set.cpp \
 	authorization_set_test.cpp \
+	google_keymaster.cpp \
+	google_keymaster_test.cpp \
+	google_keymaster_messages.cpp \
+	google_keymaster_messages_test.cpp \
 	serializable.cpp
 CCSRCS=$(BASE)/external/gtest/src/gtest-all.cc
 CSRCS=ocb.c
@@ -24,7 +28,9 @@
 
 LINK.o=$(LINK.cc)
 
-BINARIES=authorization_set_test
+BINARIES=authorization_set_test \
+	google_keymaster_test \
+	google_keymaster_messages_test
 
 .PHONY: coverage valgrind clean run
 
@@ -49,7 +55,6 @@
 #UNINIT_OPTS=--track-origins=yes
 UNINIT_OPTS=--undef-value-errors=no
 
-
 VALGRIND_OPTS=--leak-check=full \
 	--show-reachable=yes \
 	--vgdb=full \
@@ -67,7 +72,21 @@
 	serializable.o \
 	$(BASE)/external/gtest/src/gtest-all.o
 
-$(BASE)/external/gtest/src/gtest-all.o: CXXFLAGS=-Wall
+google_keymaster_test: google_keymaster_test.o \
+	google_keymaster.o \
+	google_keymaster_messages.o \
+	authorization_set.o \
+	serializable.o \
+	ocb.o \
+	$(BASE)/external/gtest/src/gtest-all.o
+
+google_keymaster_messages_test: google_keymaster_messages_test.o \
+	google_keymaster_messages.o \
+	authorization_set.o \
+	serializable.o \
+	$(BASE)/external/gtest/src/gtest-all.o
+
+$(BASE)/external/gtest/src/gtest-all.o: CXXFLAGS:=$(subst -Wmissing-declarations,,$(CXXFLAGS))
 
 clean:
 	rm -f $(OBJS) $(DEPS) $(BINARIES) $(BINARIES:=.run) $(BINARIES:=.valgrind) \
diff --git a/ae.h b/ae.h
new file mode 100644
index 0000000..864d349
--- /dev/null
+++ b/ae.h
@@ -0,0 +1,164 @@
+/* ---------------------------------------------------------------------------
+ *
+ * AEAD API 0.12 - 23-MAY-2012
+ *
+ * This file gives an interface appropriate for many authenticated
+ * encryption with associated data (AEAD) implementations. It does not try
+ * to accommodate all possible options or limitations that an implementation
+ * might have -- you should consult the documentation of your chosen
+ * implementation to find things like RFC 5116 constants, alignment
+ * requirements, whether the incremental interface is supported, etc.
+ *
+ * This file is in the public domain. It is provided "as is", without
+ * warranty of any kind. Use at your own risk.
+ *
+ * Comments are welcome: Ted Krovetz <ted@krovetz>.
+ *
+ * ------------------------------------------------------------------------ */
+
+#ifndef _AE_H_
+#define _AE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* --------------------------------------------------------------------------
+ *
+ * Constants
+ *
+ * ----------------------------------------------------------------------- */
+
+/* Return status codes: Negative return values indicate an error occurred.
+ * For full explanations of error values, consult the implementation's
+ * documentation.                                                          */
+#define AE_SUCCESS (0)        /* Indicates successful completion of call  */
+#define AE_INVALID (-1)       /* Indicates bad tag during decryption      */
+#define AE_NOT_SUPPORTED (-2) /* Indicates unsupported option requested   */
+
+/* Flags: When data can be processed "incrementally", these flags are used
+ * to indicate whether the submitted data is the last or not.               */
+#define AE_FINALIZE (1) /* This is the last of data                  */
+#define AE_PENDING (0)  /* More data of is coming                    */
+
+/* --------------------------------------------------------------------------
+ *
+ * AEAD opaque structure definition
+ *
+ * ----------------------------------------------------------------------- */
+
+typedef struct _ae_ctx ae_ctx;
+
+/* --------------------------------------------------------------------------
+ *
+ * Data Structure Routines
+ *
+ * ----------------------------------------------------------------------- */
+
+ae_ctx* ae_allocate(void* misc); /* Allocate ae_ctx, set optional ptr   */
+void ae_free(ae_ctx* ctx);       /* Deallocate ae_ctx struct            */
+int ae_clear(ae_ctx* ctx);       /* Undo initialization                 */
+int ae_ctx_sizeof(void);         /* Return sizeof(ae_ctx)               */
+/* ae_allocate() allocates an ae_ctx structure, but does not initialize it.
+ * ae_free() deallocates an ae_ctx structure, but does not zero it.
+ * ae_clear() zeroes sensitive values associated with an ae_ctx structure
+ * and deallocates any auxiliary structures allocated during ae_init().
+ * ae_ctx_sizeof() returns sizeof(ae_ctx), to aid in any static allocations.
+ */
+
+/* --------------------------------------------------------------------------
+ *
+ * AEAD Routines
+ *
+ * ----------------------------------------------------------------------- */
+
+int ae_init(ae_ctx* ctx, const void* key, int key_len, int nonce_len, int tag_len);
+/* --------------------------------------------------------------------------
+ *
+ * Initialize an ae_ctx context structure.
+ *
+ * Parameters:
+ *  ctx       - Pointer to an ae_ctx structure to be initialized
+ *  key       - Pointer to user-supplied key
+ *  key_len   - Length of key supplied, in bytes
+ *  nonce_len - Length of nonces to be used for this key, in bytes
+ *  tag_len   - Length of tags to be produced for this key, in bytes
+ *
+ * Returns:
+ *  AE_SUCCESS       - Success. Ctx ready for use.
+ *  AE_NOT_SUPPORTED - An unsupported length was supplied. Ctx is untouched.
+ *  Otherwise        - Error. Check implementation documentation for codes.
+ *
+ * ----------------------------------------------------------------------- */
+
+int ae_encrypt(ae_ctx* ctx, const void* nonce, const void* pt, int pt_len, const void* ad,
+               int ad_len, void* ct, void* tag, int final);
+/* --------------------------------------------------------------------------
+ *
+ * Encrypt plaintext; provide for authentication of ciphertext/associated data.
+ *
+ * Parameters:
+ *  ctx    - Pointer to an ae_ctx structure initialized by ae_init.
+ *  nonce  - Pointer to a nonce_len (defined in ae_init) byte nonce.
+ *  pt     - Pointer to plaintext bytes to be encrypted.
+ *  pt_len - number of bytes pointed to by pt.
+ *  ad     - Pointer to associated data.
+ *  ad_len - number of bytes pointed to by ad.
+ *  ct     - Pointer to buffer to receive ciphertext encryption.
+ *  tag    - Pointer to receive authentication tag; or NULL
+ *           if tag is to be bundled into the ciphertext.
+ *  final  - Non-zero if this call completes the plaintext being encrypted.
+ *
+ * If nonce!=NULL then a message is being initiated. If final!=0
+ * then a message is being finalized. If final==0 or nonce==NULL
+ * then the incremental interface is being used. If nonce!=NULL and
+ * ad_len<0, then use same ad as last message.
+ *
+ * Returns:
+ *  non-negative     - Number of bytes written to ct.
+ *  AE_NOT_SUPPORTED - Usage mode unsupported (eg, incremental and/or sticky).
+ *  Otherwise        - Error. Check implementation documentation for codes.
+ *
+ * ----------------------------------------------------------------------- */
+
+int ae_decrypt(ae_ctx* ctx, const void* nonce, const void* ct, int ct_len, const void* ad,
+               int ad_len, void* pt, const void* tag, int final);
+/* --------------------------------------------------------------------------
+ *
+ * Decrypt ciphertext; provide authenticity of plaintext and associated data.
+ *
+ * Parameters:
+ *  ctx    - Pointer to an ae_ctx structure initialized by ae_init.
+ *  nonce  - Pointer to a nonce_len (defined in ae_init) byte nonce.
+ *  ct     - Pointer to ciphertext bytes to be decrypted.
+ *  ct_len - number of bytes pointed to by ct.
+ *  ad     - Pointer to associated data.
+ *  ad_len - number of bytes pointed to by ad.
+ *  pt     - Pointer to buffer to receive plaintext decryption.
+ *  tag    - Pointer to tag_len (defined in ae_init) bytes; or NULL
+ *           if tag is bundled into the ciphertext. Non-NULL tag is only
+ *           read when final is non-zero.
+ *  final  - Non-zero if this call completes the ciphertext being decrypted.
+ *
+ * If nonce!=NULL then "ct" points to the start of a ciphertext. If final!=0
+ * then "in" points to the final piece of ciphertext. If final==0 or nonce==
+ * NULL then the incremental interface is being used. If nonce!=NULL and
+ * ad_len<0, then use same ad as last message.
+ *
+ * Returns:
+ *  non-negative     - Number of bytes written to pt.
+ *  AE_INVALID       - Authentication failure.
+ *  AE_NOT_SUPPORTED - Usage mode unsupported (eg, incremental and/or sticky).
+ *  Otherwise        - Error. Check implementation documentation for codes.
+ *
+ * NOTE !!! NOTE !!! -- The ciphertext should be assumed possibly inauthentic
+ *                      until it has been completely written and it is
+ *                      verified that this routine did not return AE_INVALID.
+ *
+ * ----------------------------------------------------------------------- */
+
+#ifdef __cplusplus
+} /* closing brace for extern "C" */
+#endif
+
+#endif /* _AE_H_ */
diff --git a/google_keymaster.cpp b/google_keymaster.cpp
new file mode 100644
index 0000000..45ad3d4
--- /dev/null
+++ b/google_keymaster.cpp
@@ -0,0 +1,452 @@
+/*
+ * Copyright 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <assert.h>
+#include <string.h>
+
+#include <openssl/rsa.h>
+#include <openssl/evp.h>
+#include <openssl/err.h>
+#include <openssl/sha.h>
+
+#include <UniquePtr.h>
+
+#include "google_keymaster.h"
+#include "google_keymaster_utils.h"
+#include "ae.h"
+
+// We need placement new, but we don't want to pull in any standard C++ libs at the moment.
+// Luckily, it's trivial to just implement it.
+inline void* operator new(size_t /* size */, void* here) { return here; }
+
+namespace keymaster {
+
+const int NONCE_LENGTH = 12;
+const int TAG_LENGTH = 128 / 8;
+#define REQUIRED_ALIGNMENT_FOR_AES_OCB 16
+
+GoogleKeymaster::GoogleKeymaster() {}
+
+GoogleKeymaster::~GoogleKeymaster() {}
+
+const int RSA_DEFAULT_KEY_SIZE = 2048;
+const int RSA_DEFAULT_EXPONENT = 65537;
+
+#define CHECK_ERR(err)                                                                             \
+    if ((err) != OK)                                                                               \
+        return err;
+
+struct BIGNUM_Delete {
+    void operator()(BIGNUM* p) const { BN_free(p); }
+};
+typedef UniquePtr<BIGNUM, BIGNUM_Delete> Unique_BIGNUM;
+
+struct RSA_Delete {
+    void operator()(RSA* p) const { RSA_free(p); }
+};
+typedef UniquePtr<RSA, RSA_Delete> Unique_RSA;
+
+struct EVP_PKEY_Delete {
+    void operator()(EVP_PKEY* p) const { EVP_PKEY_free(p); }
+};
+typedef UniquePtr<EVP_PKEY, EVP_PKEY_Delete> Unique_EVP_PKEY;
+
+struct AE_CTX_Delete {
+    void operator()(ae_ctx* ctx) const { ae_free(ctx); }
+};
+typedef UniquePtr<ae_ctx, AE_CTX_Delete> Unique_ae_ctx;
+
+struct ByteArray_Delete {
+    void operator()(void* p) const { delete[] reinterpret_cast<uint8_t*>(p); }
+};
+
+// Context buffer used for AES OCB encryptions.
+uint8_t aes_ocb_ctx_buf[896];
+
+/**
+ * Many OpenSSL APIs take ownership of an argument on success but don't free the argument on
+ * failure. This means we need to tell our scoped pointers when we've transferred ownership, without
+ * triggering a warning by not using the result of release().
+ */
+template <typename T, typename Delete_T>
+inline void release_because_ownership_transferred(UniquePtr<T, Delete_T>& p) {
+    T* val __attribute__((unused)) = p.release();
+}
+
+keymaster_algorithm_t supported_algorithms[] = {
+    KM_ALGORITHM_RSA,
+};
+
+template <typename T>
+bool check_supported(keymaster_algorithm_t algorithm, SupportedResponse<T>* response) {
+    if (!array_contains(supported_algorithms, algorithm)) {
+        response->error = KM_ERROR_UNSUPPORTED_ALGORITHM;
+        return false;
+    }
+    return true;
+}
+
+void
+GoogleKeymaster::SupportedAlgorithms(SupportedResponse<keymaster_algorithm_t>* response) const {
+    if (response == NULL)
+        return;
+    response->SetResults(supported_algorithms);
+}
+
+void
+GoogleKeymaster::SupportedBlockModes(keymaster_algorithm_t algorithm,
+                                     SupportedResponse<keymaster_block_mode_t>* response) const {
+    if (response == NULL || !check_supported(algorithm, response))
+        return;
+    response->error = KM_ERROR_OK;
+}
+
+keymaster_padding_t rsa_supported_padding[] = {KM_PAD_NONE};
+
+void
+GoogleKeymaster::SupportedPaddingModes(keymaster_algorithm_t algorithm,
+                                       SupportedResponse<keymaster_padding_t>* response) const {
+    if (response == NULL || !check_supported(algorithm, response))
+        return;
+
+    response->error = KM_ERROR_OK;
+    switch (algorithm) {
+    case KM_ALGORITHM_RSA:
+        response->SetResults(rsa_supported_padding);
+        break;
+    default:
+        response->results_length = 0;
+        break;
+    }
+}
+
+keymaster_digest_t rsa_supported_digests[] = {KM_DIGEST_NONE};
+void GoogleKeymaster::SupportedDigests(keymaster_algorithm_t algorithm,
+                                       SupportedResponse<keymaster_digest_t>* response) const {
+    if (response == NULL || !check_supported(algorithm, response))
+        return;
+
+    response->error = KM_ERROR_OK;
+    switch (algorithm) {
+    case KM_ALGORITHM_RSA:
+        response->SetResults(rsa_supported_digests);
+        break;
+    default:
+        response->results_length = 0;
+        break;
+    }
+}
+
+keymaster_key_format_t rsa_supported_import_formats[] = {KM_KEY_FORMAT_PKCS8};
+void
+GoogleKeymaster::SupportedImportFormats(keymaster_algorithm_t algorithm,
+                                        SupportedResponse<keymaster_key_format_t>* response) const {
+    if (response == NULL || !check_supported(algorithm, response))
+        return;
+
+    response->error = KM_ERROR_OK;
+    switch (algorithm) {
+    case KM_ALGORITHM_RSA:
+        response->SetResults(rsa_supported_import_formats);
+        break;
+    default:
+        response->results_length = 0;
+        break;
+    }
+}
+
+keymaster_key_format_t rsa_supported_export_formats[] = {KM_KEY_FORMAT_X509};
+void
+GoogleKeymaster::SupportedExportFormats(keymaster_algorithm_t algorithm,
+                                        SupportedResponse<keymaster_key_format_t>* response) const {
+    if (response == NULL || !check_supported(algorithm, response))
+        return;
+
+    response->error = KM_ERROR_OK;
+    switch (algorithm) {
+    case KM_ALGORITHM_RSA:
+        response->SetResults(rsa_supported_export_formats);
+        break;
+    default:
+        response->results_length = 0;
+        break;
+    }
+}
+
+template <typename Message>
+void store_bignum(Message* message, void (Message::*set)(const void* value, size_t size),
+                  BIGNUM* bignum) {
+    size_t bufsize = BN_num_bytes(bignum);
+    UniquePtr<uint8_t[]> buf(new uint8_t[bufsize]);
+    int bytes_written = BN_bn2bin(bignum, buf.get());
+    (message->*set)(buf.get(), bytes_written);
+}
+
+class Eraser {
+  public:
+    Eraser(uint8_t* buf, size_t size) : buf_(buf), size_(size) {}
+    ~Eraser() {
+        while (size_-- > 0)
+            *buf_++ = 0;
+    }
+
+  private:
+    uint8_t* buf_;
+    size_t size_;
+};
+
+void GoogleKeymaster::GenerateKey(const GenerateKeyRequest& request,
+                                  GenerateKeyResponse* response) {
+    if (response == NULL)
+        return;
+    response->error = KM_ERROR_OK;
+
+    if (!CopyAuthorizations(request.key_description, response))
+        return;
+
+    keymaster_algorithm_t algorithm;
+    if (!request.key_description.GetTagValue(TAG_ALGORITHM, &algorithm)) {
+        response->error = KM_ERROR_UNSUPPORTED_ALGORITHM;
+        return;
+    }
+    switch (algorithm) {
+    case KM_ALGORITHM_RSA:
+        if (!GenerateRsa(request.key_description, response))
+            return;
+        break;
+    default:
+        response->error = KM_ERROR_UNSUPPORTED_ALGORITHM;
+        return;
+    }
+}
+
+class KeyBlob {
+  public:
+    static KeyBlob* AllocAndInit(GenerateKeyResponse* response, size_t key_len) {
+        size_t blob_length = get_size(response->enforced, response->unenforced, key_len);
+        KeyBlob* blob(reinterpret_cast<KeyBlob*>(new uint8_t[blob_length]));
+        return new (blob) KeyBlob(response->enforced, response->unenforced, key_len);
+    }
+
+    inline size_t length() {
+        return get_size(enforced_length(), unenforced_length(), key_length());
+    }
+    inline uint8_t* nonce() { return nonce_; }
+    inline size_t nonce_length() { return NONCE_LENGTH; }
+    inline uint8_t* key_data() { return key_data_; }
+    inline size_t key_length() { return key_length_; }
+    inline size_t key_data_length() { return key_length_ + TAG_LENGTH; }
+    inline uint8_t* enforced() {
+        return key_data_ + key_length_ + TAG_LENGTH + padding(key_length_ + TAG_LENGTH);
+    }
+    inline size_t enforced_length() { return enforced_length_; }
+    inline uint32_t* enforced_length_copy() {
+        return reinterpret_cast<uint32_t*>(enforced() + enforced_length());
+    }
+    inline uint8_t* unenforced() { return enforced() + enforced_length_ + sizeof(uint32_t); }
+    inline size_t unenforced_length() { return unenforced_length_; }
+    inline uint8_t* end() { return unenforced() + unenforced_length_; }
+    inline uint8_t* auth_data() { return enforced(); }
+    inline size_t auth_data_length() { return end() - enforced(); }
+
+  private:
+    KeyBlob(AuthorizationSet& enforced_set, AuthorizationSet& unenforced_set, size_t key_len)
+        : enforced_length_(enforced_set.SerializedSize()),
+          unenforced_length_(unenforced_set.SerializedSize()), key_length_(key_len) {
+        enforced_set.Serialize(enforced());
+        unenforced_set.Serialize(unenforced());
+    }
+
+    uint32_t enforced_length_;
+    uint32_t unenforced_length_;
+    uint32_t key_length_;
+    uint8_t nonce_[NONCE_LENGTH];
+    uint8_t key_data_[] __attribute__((aligned(REQUIRED_ALIGNMENT_FOR_AES_OCB)));
+    // Actual structure will also include:
+    //    uint8_t enforced[] at key_data + key_length
+    //    uint32_t enforced_length at key_data + key_length + enforced_length
+    //    uint8_t unenforced[] at key_data + key_length + enforced_length.
+
+    static size_t get_size(AuthorizationSet& enforced_set, AuthorizationSet& unenforced_set,
+                           size_t key_len) {
+        return get_size(enforced_set.SerializedSize(), unenforced_set.SerializedSize(), key_len);
+    }
+
+    static size_t get_size(size_t enforced_len, size_t unenforced_len, size_t key_len) {
+        size_t pad_len = padding(key_len + TAG_LENGTH);
+        return sizeof(KeyBlob) +   // includes lengths and nonce
+               key_len +           // key in key_data_
+               TAG_LENGTH +        // authentication tag in key_data_
+               pad_len +           // padding to align authorization data
+               enforced_len +      // enforced authorization data
+               sizeof(uint32_t) +  // size of enforced authorization data.  This is also in
+                                   // enforced_length_ but it's duplicated here to ensure that it's
+                                   // included in the OCB-authenticated data, to enforce the
+                                   // boundary between enforced and unenforced authorizations.
+               unenforced_len;     // size of unenforced authorization data.
+    }
+
+    /**
+     * Return the number of padding bytes needed to round up to the next alignment boundary.
+     * boundary.
+     */
+    static size_t padding(size_t size) {
+        return REQUIRED_ALIGNMENT_FOR_AES_OCB - (size % REQUIRED_ALIGNMENT_FOR_AES_OCB);
+    }
+};
+
+keymaster_error_t GoogleKeymaster::WrapKey(uint8_t* key_data, size_t key_length, KeyBlob* blob) {
+    assert(ae_ctx_sizeof() == (int)array_size(aes_ocb_ctx_buf));
+    Eraser ctx_eraser(aes_ocb_ctx_buf, array_size(aes_ocb_ctx_buf));
+    ae_ctx* ctx = reinterpret_cast<ae_ctx*>(aes_ocb_ctx_buf);
+    int ae_err = ae_init(ctx, MasterKey(), MasterKeyLength(), blob->nonce_length(), TAG_LENGTH);
+    if (ae_err != AE_SUCCESS) {
+        return KM_ERROR_UNKNOWN_ERROR;
+    }
+
+    GetNonce(blob->nonce(), blob->nonce_length());
+    ae_err = ae_encrypt(ctx, blob->nonce(), key_data, key_length, blob->auth_data(),
+                        blob->auth_data_length(), blob->key_data(), NULL, 1 /* final */);
+    if (ae_err < 0) {
+        return KM_ERROR_UNKNOWN_ERROR;
+    }
+    assert(ae_err == (int)key_length + TAG_LENGTH);
+    return KM_ERROR_OK;
+}
+
+bool GoogleKeymaster::CreateKeyBlob(GenerateKeyResponse* response, uint8_t* key_bytes,
+                                    size_t key_length) {
+    UniquePtr<KeyBlob, ByteArray_Delete> blob(KeyBlob::AllocAndInit(response, key_length));
+    if (blob.get() == NULL) {
+        response->error = KM_ERROR_MEMORY_ALLOCATION_FAILED;
+        return false;
+    }
+
+    keymaster_error_t err = WrapKey(key_bytes, key_length, blob.get());
+    if (err != KM_ERROR_OK) {
+        response->error = err;
+        return false;
+    }
+
+    response->key_blob.key_material_size = blob->length();
+    response->key_blob.key_material = reinterpret_cast<uint8_t*>(blob.release());
+
+    return true;
+}
+
+bool GoogleKeymaster::GenerateRsa(const AuthorizationSet& key_auths,
+                                  GenerateKeyResponse* response) {
+    uint64_t public_exponent = RSA_DEFAULT_EXPONENT;
+    if (!key_auths.GetTagValue(TAG_RSA_PUBLIC_EXPONENT, &public_exponent))
+        AddAuthorization(Authorization(TAG_RSA_PUBLIC_EXPONENT, public_exponent), response);
+
+    uint32_t key_size = RSA_DEFAULT_KEY_SIZE;
+    if (!key_auths.GetTagValue(TAG_KEY_SIZE, &key_size))
+        AddAuthorization(Authorization(TAG_KEY_SIZE, key_size), response);
+
+    Unique_BIGNUM exponent(BN_new());
+    Unique_RSA rsa_key(RSA_new());
+    Unique_EVP_PKEY pkey(EVP_PKEY_new());
+    if (rsa_key.get() == NULL || pkey.get() == NULL) {
+        response->error = KM_ERROR_MEMORY_ALLOCATION_FAILED;
+        return false;
+    }
+
+    if (!BN_set_word(exponent.get(), public_exponent) ||
+        !RSA_generate_key_ex(rsa_key.get(), key_size, exponent.get(), NULL /* callback */)) {
+        response->error = KM_ERROR_UNKNOWN_ERROR;
+        return false;
+    }
+
+    if (!EVP_PKEY_assign_RSA(pkey.get(), rsa_key.get())) {
+        response->error = KM_ERROR_UNKNOWN_ERROR;
+        return false;
+    } else {
+        release_because_ownership_transferred(rsa_key);
+    }
+
+    int der_length = i2d_PrivateKey(pkey.get(), NULL);
+    if (der_length <= 0) {
+        response->error = KM_ERROR_UNKNOWN_ERROR;
+        return false;
+    }
+    UniquePtr<uint8_t[]> der_data(new uint8_t[der_length]);
+    if (der_data.get() == NULL) {
+        response->error = KM_ERROR_MEMORY_ALLOCATION_FAILED;
+        return false;
+    }
+
+    uint8_t* tmp = der_data.get();
+    i2d_PrivateKey(pkey.get(), &tmp);
+
+    return CreateKeyBlob(response, der_data.get(), der_length);
+}
+
+static keymaster_error_t CheckAuthorizationSet(const AuthorizationSet& set) {
+    switch (set.is_valid()) {
+    case AuthorizationSet::OK_FULL:
+    case AuthorizationSet::OK_GROWABLE:
+        return KM_ERROR_OK;
+    case AuthorizationSet::ALLOCATION_FAILURE:
+        return KM_ERROR_MEMORY_ALLOCATION_FAILED;
+    case AuthorizationSet::BOUNDS_CHECKING_FAILURE:
+    case AuthorizationSet::MALFORMED_DATA:
+        return KM_ERROR_UNKNOWN_ERROR;
+    }
+    return KM_ERROR_OK;
+}
+
+bool GoogleKeymaster::CopyAuthorizations(const AuthorizationSet& key_description,
+                                         GenerateKeyResponse* response) {
+    for (size_t i = 0; i < key_description.size(); ++i) {
+        switch (key_description[i].tag) {
+        case KM_TAG_ROOT_OF_TRUST:
+        case KM_TAG_CREATION_DATETIME:
+        case KM_TAG_ORIGIN:
+            response->error = KM_ERROR_INVALID_TAG;
+            return false;
+        case KM_TAG_ROLLBACK_RESISTANT:
+            response->error = KM_ERROR_UNSUPPORTED_TAG;
+            return false;
+        default:
+            AddAuthorization(key_description[i], response);
+            break;
+        }
+    }
+
+    AddAuthorization(Authorization(TAG_CREATION_DATETIME, java_time(time(NULL))), response);
+    AddAuthorization(Authorization(TAG_ORIGIN, origin()), response);
+    AddAuthorization(Authorization(TAG_ROOT_OF_TRUST, "SW", 2), response);
+
+    response->error = CheckAuthorizationSet(response->enforced);
+    if (response->error != KM_ERROR_OK)
+        return false;
+    response->error = CheckAuthorizationSet(response->unenforced);
+    if (response->error != KM_ERROR_OK)
+        return false;
+
+    return true;
+}
+
+void GoogleKeymaster::AddAuthorization(const keymaster_key_param_t& auth,
+                                       GenerateKeyResponse* response) {
+    if (is_enforced(auth.tag))
+        response->enforced.push_back(auth);
+    else
+        response->unenforced.push_back(auth);
+}
+
+}  // namespace keymaster
diff --git a/google_keymaster.h b/google_keymaster.h
new file mode 100644
index 0000000..41861be
--- /dev/null
+++ b/google_keymaster.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef EXTERNAL_KEYMASTER_GOOGLE_KEYMASTER_H_
+#define EXTERNAL_KEYMASTER_GOOGLE_KEYMASTER_H_
+
+#include "authorization_set.h"
+#include "google_keymaster_messages.h"
+
+namespace keymaster {
+
+struct KeyBlob;
+
+/**
+ * OpenSSL-based Keymaster backing implementation, for use as a pure software implmentation
+ * (softkeymaster) and in a trusted execution environment (TEE), like ARM TrustZone.  This class
+ * doesn't actually implement the Keymaster HAL interface, instead it implements an alternative API
+ * which is similar to and based upon the HAL, but uses protobufs rather than simple C types.  The
+ * use of protobufs is primarily for TEEs which have a restricted, stream-based communication
+ * mechanism between the secure and non-secure worlds.  Protobuf's serialization allows arbitrarily
+ * complex data structures to be efficiently and reliably unmarshaled and remarshaled.
+ *
+ * For non-secure, pure software implementation there is a HAL translation layer that converts the
+ * HAL's parameters to and from protobuf message representations, which are then passed in to this
+ * API.
+ *
+ * For secure implementation there is another HAL translation layer that wraps the protobuf
+ * parameters into RPC messages, then serializes them to the TEE. In the TEE implementation there's
+ * another component which deserializes the messages, extracts the relevant parameters and calls
+ * this API.
+ */
+class GoogleKeymaster {
+  public:
+    GoogleKeymaster();
+    virtual ~GoogleKeymaster();
+
+    void SupportedAlgorithms(SupportedResponse<keymaster_algorithm_t>* response) const;
+    void SupportedBlockModes(keymaster_algorithm_t algorithm,
+                             SupportedResponse<keymaster_block_mode_t>* response) const;
+    void SupportedPaddingModes(keymaster_algorithm_t algorithm,
+                               SupportedResponse<keymaster_padding_t>* response) const;
+    void SupportedDigests(keymaster_algorithm_t algorithm,
+                          SupportedResponse<keymaster_digest_t>* response) const;
+    void SupportedImportFormats(keymaster_algorithm_t algorithm,
+                                SupportedResponse<keymaster_key_format_t>* response) const;
+    void SupportedExportFormats(keymaster_algorithm_t algorithm,
+                                SupportedResponse<keymaster_key_format_t>* response) const;
+
+    // virtual keymaster_error_t AddRngEntropy(AddEntropyRequest& /* request */);
+    void GenerateKey(const GenerateKeyRequest& request, GenerateKeyResponse* response);
+    void GetKeyCharacteristics(const GetKeyCharacteristicsRequest& request,
+                               GetKeyCharacteristicsResponse* response);
+
+    // void Rescope(const RescopeRequest& request, RescopeResponse* response);
+    // void ImportKey(const ImportKeyRequest& request, ImportKeyResponse* response);
+    // void ExportKey(const ExportKeyRequest& request, ExportKeyResponse* response);
+    // void BeginOperation(const BeginOperationRequest& request, BeginOperationResponse* response);
+    // void UpdateOperation(const UpdateOperationRequest& request, UpdateOperationResponse*
+    // response);
+    // void FinishOperation(const FinishOperationRequest& request, FinishOperationResponse*
+    // response);
+    // void AbortOperation(const AbortOperationRequest& request);
+
+    virtual bool is_enforced(keymaster_tag_t tag) = 0;
+
+    virtual keymaster_key_origin_t origin() = 0;
+
+  private:
+    virtual uint8_t* MasterKey() = 0;
+    virtual size_t MasterKeyLength() = 0;
+    virtual void GetNonce(uint8_t* nonce, size_t length) = 0;
+
+    bool CreateKeyBlob(GenerateKeyResponse* response, uint8_t* key_material, size_t key_length);
+
+    bool CopyAuthorizations(const AuthorizationSet& key_description, GenerateKeyResponse* response);
+    void AddAuthorization(const keymaster_key_param_t& auth, GenerateKeyResponse* response);
+    bool GenerateRsa(const AuthorizationSet& key_auths, GenerateKeyResponse* response);
+    keymaster_error_t WrapKey(uint8_t* key_data, size_t key_length, KeyBlob* blob);
+};
+
+}  // namespace keymaster
+
+#endif  //  EXTERNAL_KEYMASTER_GOOGLE_KEYMASTER_H_
diff --git a/google_keymaster_messages.cpp b/google_keymaster_messages.cpp
new file mode 100644
index 0000000..a8730b6
--- /dev/null
+++ b/google_keymaster_messages.cpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "google_keymaster_messages.h"
+
+namespace keymaster {
+
+GenerateKeyResponse::~GenerateKeyResponse() { delete[] key_blob.key_material; }
+
+size_t GenerateKeyResponse::SerializedSize() const {
+    if (error == KM_ERROR_OK) {
+        return sizeof(int32_t) /* error */ + sizeof(uint32_t) /* key size */ +
+               key_blob.key_material_size + sizeof(uint32_t) /* enforced size */ +
+               enforced.SerializedSize() + sizeof(uint32_t) /* unenforced size */ +
+               unenforced.SerializedSize();
+    } else {
+        return sizeof(error);
+    }
+}
+
+uint8_t* GenerateKeyResponse::Serialize(uint8_t* buf) const {
+    buf = append_to_buf(buf, static_cast<int32_t>(error));
+    if (error == KM_ERROR_OK) {
+        buf = append_size_and_data_to_buf(buf, key_blob.key_material, key_blob.key_material_size);
+        buf = append_to_buf(buf, static_cast<uint32_t>(enforced.SerializedSize()));
+        buf = enforced.Serialize(buf);
+        buf = append_to_buf(buf, static_cast<uint32_t>(unenforced.SerializedSize()));
+        buf = unenforced.Serialize(buf);
+    };
+    return buf;
+}
+
+bool GenerateKeyResponse::DeserializeToCopy(const uint8_t** buf, const uint8_t* end) {
+    delete[] key_blob.key_material;
+
+    if (!copy_from_buf(buf, end, &error))
+        return false;
+
+    if (end == *buf)
+        // Nothing but an error
+        return true;
+
+    uint32_t key_material_size;
+    if (!copy_from_buf(buf, end, &key_material_size) || end - *buf < key_material_size)
+        return false;
+
+    key_blob.key_material = new uint8_t[key_material_size];
+    key_blob.key_material_size = key_material_size;
+
+    if (key_blob.key_material == NULL ||
+        !copy_from_buf(buf, end, key_blob.key_material, key_blob.key_material_size))
+        return false;
+
+    uint32_t enforced_size;
+    if (!copy_from_buf(buf, end, &enforced_size) || end - *buf < enforced_size ||
+        !enforced.DeserializeToCopy(buf, *buf + enforced_size))
+        return false;
+
+    uint32_t unenforced_size;
+    if (!copy_from_buf(buf, end, &unenforced_size) || end - *buf < unenforced_size ||
+        !unenforced.DeserializeToCopy(buf, *buf + unenforced_size))
+        return false;
+
+    return true;
+}
+
+}  // namespace keymaster
diff --git a/google_keymaster_messages.h b/google_keymaster_messages.h
new file mode 100644
index 0000000..af6f6d6
--- /dev/null
+++ b/google_keymaster_messages.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef EXTERNAL_KEYMASTER_GOOGLE_KEYMASTER_MESSAGES_H_
+#define EXTERNAL_KEYMASTER_GOOGLE_KEYMASTER_MESSAGES_H_
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "authorization_set.h"
+
+namespace keymaster {
+
+// Commands
+const uint32_t GENERATE_KEY = 0;
+
+struct GenerateKeyRequest : public Serializable {
+    GenerateKeyRequest() {}
+    GenerateKeyRequest(uint8_t* buf, size_t size) : key_description(buf, size) {}
+
+    AuthorizationSet key_description;
+
+    size_t SerializedSize() const { return key_description.SerializedSize(); }
+    uint8_t* Serialize(uint8_t* buf) const { return key_description.Serialize(buf); }
+    bool DeserializeInPlace(uint8_t** buf, const uint8_t* end) {
+        return key_description.DeserializeInPlace(buf, end);
+    }
+    bool DeserializeToCopy(const uint8_t** buf, const uint8_t* end) {
+        return key_description.DeserializeToCopy(buf, end);
+    }
+};
+
+struct GenerateKeyResponse : public Serializable {
+    GenerateKeyResponse() {
+        error = KM_ERROR_OK;
+        key_blob.key_material = NULL;
+        key_blob.key_material_size = 0;
+    }
+    ~GenerateKeyResponse();
+
+    keymaster_error_t error;
+    keymaster_key_blob_t key_blob;
+    AuthorizationSet enforced;
+    AuthorizationSet unenforced;
+
+    size_t SerializedSize() const;
+    uint8_t* Serialize(uint8_t* buf) const;
+    bool DeserializeInPlace(uint8_t** /* buf */, const uint8_t* /* end */) { return false; }
+    bool DeserializeToCopy(const uint8_t** buf, const uint8_t* end);
+};
+
+struct SupportedAlgorithmsResponse {
+    keymaster_error_t error;
+    keymaster_algorithm_t* algorithms;
+    size_t algorithms_length;
+};
+
+template <typename T> struct SupportedResponse {
+    SupportedResponse() : results(NULL), results_length(0) {}
+    ~SupportedResponse() { delete[] results; }
+
+    template <size_t N> void SetResults(const T (&arr)[N]) {
+        delete[] results;
+        results_length = 0;
+        results = dup_array(arr);
+        if (results == NULL) {
+            error = KM_ERROR_MEMORY_ALLOCATION_FAILED;
+        } else {
+            results_length = N;
+            error = KM_ERROR_OK;
+        }
+    }
+
+    keymaster_error_t error;
+    T* results;
+    size_t results_length;
+};
+
+struct GetKeyCharacteristicsRequest {
+    keymaster_key_blob_t key_blob;
+    keymaster_blob_t client_id;
+    keymaster_blob_t app_data;
+};
+
+struct GetKeyCharacteristicsResponse {
+    keymaster_error_t error;
+    keymaster_key_blob_t key_blob;
+    AuthorizationSet enforced;
+    AuthorizationSet unenforced;
+};
+
+}  // namespace keymaster
+
+#endif  // EXTERNAL_KEYMASTER_GOOGLE_KEYMASTER_MESSAGES_H_
diff --git a/google_keymaster_messages_test.cpp b/google_keymaster_messages_test.cpp
new file mode 100644
index 0000000..6010572
--- /dev/null
+++ b/google_keymaster_messages_test.cpp
@@ -0,0 +1,168 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <UniquePtr.h>
+
+#include <gtest/gtest.h>
+
+#define KEYMASTER_NAME_TAGS
+#include "keymaster_tags.h"
+#include "google_keymaster_utils.h"
+#include "google_softkeymaster.h"
+
+int main(int argc, char** argv) {
+    ::testing::InitGoogleTest(&argc, argv);
+    int result = RUN_ALL_TESTS();
+    return result;
+}
+
+namespace keymaster {
+namespace test {
+
+TEST(GenerateKeyRequest, RoundTrip) {
+    keymaster_key_param_t params[] = {
+        Authorization(TAG_PURPOSE, KM_PURPOSE_SIGN),
+        Authorization(TAG_PURPOSE, KM_PURPOSE_VERIFY),
+        Authorization(TAG_ALGORITHM, KM_ALGORITHM_RSA),
+        Authorization(TAG_USER_ID, 7),
+        Authorization(TAG_USER_AUTH_ID, 8),
+        Authorization(TAG_APPLICATION_ID, "app_id", 6),
+        Authorization(TAG_AUTH_TIMEOUT, 300),
+    };
+    GenerateKeyRequest req;
+    req.key_description.Reinitialize(params, array_length(params));
+
+    size_t size = req.SerializedSize();
+    EXPECT_EQ(182U, size);
+
+    UniquePtr<uint8_t[]> buf(new uint8_t[size]);
+    EXPECT_EQ(buf.get() + size, req.Serialize(buf.get()));
+
+    GenerateKeyRequest deserialized1;
+    uint8_t* p = buf.get();
+    EXPECT_TRUE(deserialized1.DeserializeInPlace(&p, p + size));
+    EXPECT_EQ(7U, deserialized1.key_description.size());
+
+    // Check a few entries.
+    keymaster_purpose_t purpose;
+    EXPECT_TRUE(deserialized1.key_description.GetTagValue(TAG_PURPOSE, 0, &purpose));
+    EXPECT_EQ(KM_PURPOSE_SIGN, purpose);
+    keymaster_blob_t blob;
+    EXPECT_TRUE(deserialized1.key_description.GetTagValue(TAG_APPLICATION_ID, &blob));
+    EXPECT_EQ(6U, blob.data_length);
+    EXPECT_EQ(0, memcmp(blob.data, "app_id", 6));
+    uint32_t val;
+    EXPECT_TRUE(deserialized1.key_description.GetTagValue(TAG_USER_ID, &val));
+    EXPECT_EQ(7U, val);
+
+    GenerateKeyRequest deserialized2;
+    const uint8_t* p2 = buf.get();
+    EXPECT_TRUE(deserialized2.DeserializeToCopy(&p2, p2 + size));
+    EXPECT_EQ(7U, deserialized2.key_description.size());
+
+    // Check a few entries.
+    EXPECT_TRUE(deserialized2.key_description.GetTagValue(TAG_PURPOSE, 0, &purpose));
+    EXPECT_EQ(KM_PURPOSE_SIGN, purpose);
+    EXPECT_TRUE(deserialized2.key_description.GetTagValue(TAG_APPLICATION_ID, &blob));
+    EXPECT_EQ(6U, blob.data_length);
+    EXPECT_EQ(0, memcmp(blob.data, "app_id", 6));
+    EXPECT_TRUE(deserialized2.key_description.GetTagValue(TAG_USER_ID, &val));
+    EXPECT_EQ(7U, val);
+}
+
+uint8_t TEST_DATA[] = "a key blob";
+
+TEST(GenerateKeyResponse, RoundTrip) {
+    keymaster_key_param_t params[] = {
+        Authorization(TAG_PURPOSE, KM_PURPOSE_SIGN),
+        Authorization(TAG_PURPOSE, KM_PURPOSE_VERIFY),
+        Authorization(TAG_ALGORITHM, KM_ALGORITHM_RSA),
+        Authorization(TAG_USER_ID, 7),
+        Authorization(TAG_USER_AUTH_ID, 8),
+        Authorization(TAG_APPLICATION_ID, "app_id", 6),
+        Authorization(TAG_AUTH_TIMEOUT, 300),
+    };
+    GenerateKeyResponse rsp;
+    rsp.error = KM_ERROR_OK;
+    rsp.key_blob.key_material = dup_array(TEST_DATA);
+    rsp.key_blob.key_material_size = array_length(TEST_DATA);
+    rsp.enforced.Reinitialize(params, array_length(params));
+
+    size_t size = rsp.SerializedSize();
+    EXPECT_EQ(217U, size);
+
+    UniquePtr<uint8_t[]> buf(new uint8_t[size]);
+    EXPECT_EQ(buf.get() + size, rsp.Serialize(buf.get()));
+
+    GenerateKeyResponse deserialized;
+    uint8_t* p = buf.get();
+
+    // DeserializeInPlace is not implemented.
+    EXPECT_FALSE(deserialized.DeserializeInPlace(&p, p + size));
+
+    const uint8_t* p2 = buf.get();
+    EXPECT_TRUE(deserialized.DeserializeToCopy(&p2, p2 + size));
+    EXPECT_EQ(7U, deserialized.enforced.size());
+
+    EXPECT_EQ(0U, deserialized.unenforced.size());
+    EXPECT_EQ(KM_ERROR_OK, deserialized.error);
+
+    // Check a few entries of enforced.
+    keymaster_purpose_t purpose;
+    EXPECT_TRUE(deserialized.enforced.GetTagValue(TAG_PURPOSE, 0, &purpose));
+    EXPECT_EQ(KM_PURPOSE_SIGN, purpose);
+    keymaster_blob_t blob;
+    EXPECT_TRUE(deserialized.enforced.GetTagValue(TAG_APPLICATION_ID, &blob));
+    EXPECT_EQ(6U, blob.data_length);
+    EXPECT_EQ(0, memcmp(blob.data, "app_id", 6));
+    uint32_t val;
+    EXPECT_TRUE(deserialized.enforced.GetTagValue(TAG_USER_ID, &val));
+    EXPECT_EQ(7U, val);
+}
+
+TEST(GenerateKeyResponse, Error) {
+    keymaster_key_param_t params[] = {
+        Authorization(TAG_PURPOSE, KM_PURPOSE_SIGN),
+        Authorization(TAG_PURPOSE, KM_PURPOSE_VERIFY),
+        Authorization(TAG_ALGORITHM, KM_ALGORITHM_RSA),
+        Authorization(TAG_USER_ID, 7),
+        Authorization(TAG_USER_AUTH_ID, 8),
+        Authorization(TAG_APPLICATION_ID, "app_id", 6),
+        Authorization(TAG_AUTH_TIMEOUT, 300),
+    };
+    GenerateKeyResponse rsp;
+    rsp.error = KM_ERROR_UNSUPPORTED_ALGORITHM;
+    rsp.key_blob.key_material = dup_array(TEST_DATA);
+    rsp.key_blob.key_material_size = array_length(TEST_DATA);
+    rsp.enforced.Reinitialize(params, array_length(params));
+
+    size_t size = rsp.SerializedSize();
+    EXPECT_EQ(4U, size);
+
+    UniquePtr<uint8_t[]> buf(new uint8_t[size]);
+    EXPECT_EQ(buf.get() + size, rsp.Serialize(buf.get()));
+
+    GenerateKeyResponse deserialized;
+    const uint8_t* p = buf.get();
+    EXPECT_TRUE(deserialized.DeserializeToCopy(&p, p + size));
+    EXPECT_EQ(KM_ERROR_UNSUPPORTED_ALGORITHM, deserialized.error);
+    EXPECT_EQ(0U, deserialized.enforced.size());
+    EXPECT_EQ(0U, deserialized.unenforced.size());
+    EXPECT_EQ(0U, deserialized.key_blob.key_material_size);
+}
+
+}  // namespace test
+}  // namespace keymaster
diff --git a/google_keymaster_test.cpp b/google_keymaster_test.cpp
new file mode 100644
index 0000000..435257c
--- /dev/null
+++ b/google_keymaster_test.cpp
@@ -0,0 +1,226 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <openssl/engine.h>
+
+#define KEYMASTER_NAME_TAGS
+#include "keymaster_tags.h"
+#include "google_keymaster_utils.h"
+#include "google_softkeymaster.h"
+
+int main(int argc, char** argv) {
+    ::testing::InitGoogleTest(&argc, argv);
+    int result = RUN_ALL_TESTS();
+    // Clean up stuff OpenSSL leaves around, so Valgrind doesn't complain.
+    CRYPTO_cleanup_all_ex_data();
+    ERR_free_strings();
+    return result;
+}
+
+namespace keymaster {
+namespace test {
+
+class KeymasterTest : public testing::Test {
+  protected:
+    KeymasterTest() {
+    }
+    ~KeymasterTest() {
+    }
+
+    GoogleSoftKeymaster device;
+};
+
+template <keymaster_tag_t Tag, typename KeymasterEnum>
+bool contains(const AuthorizationSet& set, TypedEnumTag<KM_ENUM, Tag, KeymasterEnum> tag,
+              KeymasterEnum val) {
+    int pos = set.find(tag);
+    return pos != -1 && set[pos].enumerated == val;
+}
+
+template <keymaster_tag_t Tag, typename KeymasterEnum>
+bool contains(const AuthorizationSet& set, TypedEnumTag<KM_ENUM_REP, Tag, KeymasterEnum> tag,
+              KeymasterEnum val) {
+    int pos = -1;
+    while ((pos = set.find(tag, pos)) != -1)
+        if (set[pos].enumerated == val)
+            return true;
+    return false;
+}
+
+template <keymaster_tag_t Tag>
+bool contains(const AuthorizationSet& set, TypedTag<KM_INT, Tag> tag, uint32_t val) {
+    int pos = set.find(tag);
+    return pos != -1 && set[pos].integer == val;
+}
+
+template <keymaster_tag_t Tag>
+bool contains(const AuthorizationSet& set, TypedTag<KM_INT_REP, Tag> tag, uint32_t val) {
+    int pos = -1;
+    while ((pos = set.find(tag, pos)) != -1)
+        if (set[pos].integer == val)
+            return true;
+    return false;
+}
+
+template <keymaster_tag_t Tag>
+bool contains(const AuthorizationSet& set, TypedTag<KM_LONG, Tag> tag, uint64_t val) {
+    int pos = set.find(tag);
+    return pos != -1 && set[pos].long_integer == val;
+}
+
+template <keymaster_tag_t Tag>
+bool contains(const AuthorizationSet& set, TypedTag<KM_BYTES, Tag> tag, const std::string& val) {
+    int pos = set.find(tag);
+    return pos != -1 &&
+           std::string(reinterpret_cast<const char*>(set[pos].blob.data),
+                       set[pos].blob.data_length) == val;
+}
+
+inline bool contains(const AuthorizationSet& set, keymaster_tag_t tag) {
+    return set.find(tag) != -1;
+}
+
+typedef KeymasterTest CheckSupported;
+TEST_F(CheckSupported, SupportedAlgorithms) {
+    // Shouldn't blow up on NULL.
+    device.SupportedAlgorithms(NULL);
+
+    SupportedResponse<keymaster_algorithm_t> response;
+    device.SupportedAlgorithms(&response);
+    EXPECT_EQ(KM_ERROR_OK, response.error);
+    EXPECT_EQ(1U, response.results_length);
+    EXPECT_EQ(KM_ALGORITHM_RSA, response.results[0]);
+}
+
+TEST_F(CheckSupported, SupportedBlockModes) {
+    // Shouldn't blow up on NULL.
+    device.SupportedBlockModes(KM_ALGORITHM_RSA, NULL);
+
+    SupportedResponse<keymaster_block_mode_t> response;
+    device.SupportedBlockModes(KM_ALGORITHM_RSA, &response);
+    EXPECT_EQ(KM_ERROR_OK, response.error);
+    EXPECT_EQ(0U, response.results_length);
+
+    device.SupportedBlockModes(KM_ALGORITHM_DSA, &response);
+    EXPECT_EQ(KM_ERROR_UNSUPPORTED_ALGORITHM, response.error);
+}
+
+TEST_F(CheckSupported, SupportedPaddingModes) {
+    // Shouldn't blow up on NULL.
+    device.SupportedPaddingModes(KM_ALGORITHM_RSA, NULL);
+
+    SupportedResponse<keymaster_padding_t> response;
+    device.SupportedPaddingModes(KM_ALGORITHM_RSA, &response);
+    EXPECT_EQ(KM_ERROR_OK, response.error);
+    EXPECT_EQ(1U, response.results_length);
+    EXPECT_EQ(KM_PAD_NONE, response.results[0]);
+
+    device.SupportedPaddingModes(KM_ALGORITHM_DSA, &response);
+    EXPECT_EQ(KM_ERROR_UNSUPPORTED_ALGORITHM, response.error);
+}
+
+TEST_F(CheckSupported, SupportedDigests) {
+    // Shouldn't blow up on NULL.
+    device.SupportedDigests(KM_ALGORITHM_RSA, NULL);
+
+    SupportedResponse<keymaster_digest_t> response;
+    device.SupportedDigests(KM_ALGORITHM_RSA, &response);
+    EXPECT_EQ(KM_ERROR_OK, response.error);
+    EXPECT_EQ(1U, response.results_length);
+    EXPECT_EQ(KM_DIGEST_NONE, response.results[0]);
+
+    device.SupportedDigests(KM_ALGORITHM_DSA, &response);
+    EXPECT_EQ(KM_ERROR_UNSUPPORTED_ALGORITHM, response.error);
+}
+
+TEST_F(CheckSupported, SupportedImportFormats) {
+    // Shouldn't blow up on NULL.
+    device.SupportedImportFormats(KM_ALGORITHM_RSA, NULL);
+
+    SupportedResponse<keymaster_key_format_t> response;
+    device.SupportedImportFormats(KM_ALGORITHM_RSA, &response);
+    EXPECT_EQ(KM_ERROR_OK, response.error);
+    EXPECT_EQ(1U, response.results_length);
+    EXPECT_EQ(KM_KEY_FORMAT_PKCS8, response.results[0]);
+
+    device.SupportedImportFormats(KM_ALGORITHM_DSA, &response);
+    EXPECT_EQ(KM_ERROR_UNSUPPORTED_ALGORITHM, response.error);
+}
+
+TEST_F(CheckSupported, SupportedExportFormats) {
+    // Shouldn't blow up on NULL.
+    device.SupportedExportFormats(KM_ALGORITHM_RSA, NULL);
+
+    SupportedResponse<keymaster_key_format_t> response;
+    device.SupportedExportFormats(KM_ALGORITHM_RSA, &response);
+    EXPECT_EQ(KM_ERROR_OK, response.error);
+    EXPECT_EQ(1U, response.results_length);
+    EXPECT_EQ(KM_KEY_FORMAT_X509, response.results[0]);
+
+    device.SupportedExportFormats(KM_ALGORITHM_DSA, &response);
+    EXPECT_EQ(KM_ERROR_UNSUPPORTED_ALGORITHM, response.error);
+}
+
+typedef KeymasterTest NewKeyGeneration;
+TEST_F(NewKeyGeneration, Rsa) {
+    keymaster_key_param_t params[] = {
+        Authorization(TAG_PURPOSE, KM_PURPOSE_SIGN),
+        Authorization(TAG_PURPOSE, KM_PURPOSE_VERIFY),
+        Authorization(TAG_ALGORITHM, KM_ALGORITHM_RSA),
+        Authorization(TAG_USER_ID, 7),
+        Authorization(TAG_USER_AUTH_ID, 8),
+        Authorization(TAG_APPLICATION_ID, reinterpret_cast<const uint8_t*>("app_id"), 6),
+        Authorization(TAG_AUTH_TIMEOUT, 300),
+    };
+    GenerateKeyRequest req;
+    req.key_description.Reinitialize(params, array_length(params));
+    GenerateKeyResponse rsp;
+
+    device.GenerateKey(req, &rsp);
+
+    ASSERT_EQ(KM_ERROR_OK, rsp.error);
+    EXPECT_EQ(0U, rsp.enforced.size());
+    EXPECT_EQ(8U, rsp.enforced.SerializedSize());
+    EXPECT_GT(rsp.unenforced.SerializedSize(), 8U);
+
+    // Check specified tags are all present in unenforced characteristics
+    EXPECT_TRUE(contains(rsp.unenforced, TAG_PURPOSE, KM_PURPOSE_SIGN));
+    EXPECT_TRUE(contains(rsp.unenforced, TAG_PURPOSE, KM_PURPOSE_VERIFY));
+
+    EXPECT_TRUE(contains(rsp.unenforced, TAG_ALGORITHM, KM_ALGORITHM_RSA));
+
+    EXPECT_TRUE(contains(rsp.unenforced, TAG_USER_ID, 7));
+    EXPECT_TRUE(contains(rsp.unenforced, TAG_USER_AUTH_ID, 8));
+    EXPECT_TRUE(contains(rsp.unenforced, TAG_APPLICATION_ID, "app_id"));
+    EXPECT_TRUE(contains(rsp.unenforced, TAG_KEY_SIZE, 2048));
+    EXPECT_TRUE(contains(rsp.unenforced, TAG_AUTH_TIMEOUT, 300));
+
+    // Just for giggles, check that some unexpected tags/values are NOT present.
+    EXPECT_FALSE(contains(rsp.unenforced, TAG_PURPOSE, KM_PURPOSE_ENCRYPT));
+    EXPECT_FALSE(contains(rsp.unenforced, TAG_PURPOSE, KM_PURPOSE_DECRYPT));
+    EXPECT_FALSE(contains(rsp.unenforced, TAG_AUTH_TIMEOUT, 301));
+    EXPECT_FALSE(contains(rsp.unenforced, TAG_RESCOPE_AUTH_TIMEOUT));
+
+    // Now check that unspecified, defaulted tags are correct.
+    EXPECT_TRUE(contains(rsp.unenforced, TAG_RSA_PUBLIC_EXPONENT, 65537));
+    EXPECT_TRUE(contains(rsp.unenforced, TAG_ORIGIN, KM_ORIGIN_SOFTWARE));
+    EXPECT_TRUE(contains(rsp.unenforced, KM_TAG_CREATION_DATETIME));
+    EXPECT_TRUE(contains(rsp.unenforced, TAG_ROOT_OF_TRUST, "SW"));
+}
+
+}  // namespace test
+}  // namespace keymaster
diff --git a/google_keymaster_utils.h b/google_keymaster_utils.h
index cf60f86..a59a71a 100644
--- a/google_keymaster_utils.h
+++ b/google_keymaster_utils.h
@@ -38,7 +38,7 @@
  * array_length() on the original array to discover the size.
  */
 template <typename T, size_t N> inline T* dup_array(const T (&a)[N]) {
-    T* dup = static_cast<T*>(malloc(array_size(a)));
+    T* dup = new T[N];
     if (dup != NULL) {
         memcpy(dup, &a, array_size(a));
     }
diff --git a/google_softkeymaster.h b/google_softkeymaster.h
new file mode 100644
index 0000000..03df5b4
--- /dev/null
+++ b/google_softkeymaster.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef EXTERNAL_KEYMASTER_GOOGLE_SOFT_KEYMASTER_H_
+#define EXTERNAL_KEYMASTER_GOOGLE_SOFT_KEYMASTER_H_
+
+#include "google_keymaster.h"
+
+namespace keymaster {
+
+class GoogleSoftKeymaster : public GoogleKeymaster {
+  public:
+    bool is_enforced(keymaster_tag_t /* tag */) { return false; }
+    keymaster_key_origin_t origin() { return KM_ORIGIN_SOFTWARE; }
+
+  private:
+    static uint8_t master_key_[];
+
+    uint8_t* MasterKey() { return master_key_; }
+
+    size_t MasterKeyLength() { return 16; }
+
+    void GetNonce(uint8_t* nonce, size_t length) {
+        for (size_t i = 0; i < length; ++i)
+            nonce[i] = 0;
+    }
+};
+
+uint8_t GoogleSoftKeymaster::master_key_[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+}  // namespace
+
+#endif  // EXTERNAL_KEYMASTER_GOOGLE_SOFT_KEYMASTER_H_
diff --git a/ocb.c b/ocb.c
new file mode 100644
index 0000000..84f367d
--- /dev/null
+++ b/ocb.c
@@ -0,0 +1,1481 @@
+/*------------------------------------------------------------------------
+/ OCB Version 3 Reference Code (Optimized C)     Last modified 12-JUN-2013
+/-------------------------------------------------------------------------
+/ Copyright (c) 2013 Ted Krovetz.
+/
+/ Permission to use, copy, modify, and/or distribute this software for any
+/ purpose with or without fee is hereby granted, provided that the above
+/ copyright notice and this permission notice appear in all copies.
+/
+/ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+/ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+/ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+/ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+/ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+/ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+/ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+/
+/ Phillip Rogaway holds patents relevant to OCB. See the following for
+/ his patent grant: http://www.cs.ucdavis.edu/~rogaway/ocb/grant.htm
+/
+/ Special thanks to Keegan McAllister for suggesting several good improvements
+/
+/ Comments are welcome: Ted Krovetz <ted@krovetz.net> - Dedicated to Laurel K
+/------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------- */
+/* Usage notes                                                             */
+/* ----------------------------------------------------------------------- */
+
+/* - When AE_PENDING is passed as the 'final' parameter of any function,
+/    the length parameters must be a multiple of (BPI*16).
+/  - When available, SSE or AltiVec registers are used to manipulate data.
+/    So, when on machines with these facilities, all pointers passed to
+/    any function should be 16-byte aligned.
+/  - Plaintext and ciphertext pointers may be equal (ie, plaintext gets
+/    encrypted in-place), but no other pair of pointers may be equal.
+/  - This code assumes all x86 processors have SSE2 and SSSE3 instructions
+/    when compiling under MSVC. If untrue, alter the #define.
+/  - This code is tested for C99 and recent versions of GCC and MSVC.      */
+
+/* ----------------------------------------------------------------------- */
+/* User configuration options                                              */
+/* ----------------------------------------------------------------------- */
+
+/* Set the AES key length to use and length of authentication tag to produce.
+/  Setting either to 0 requires the value be set at runtime via ae_init().
+/  Some optimizations occur for each when set to a fixed value.            */
+#define OCB_KEY_LEN 16 /* 0, 16, 24 or 32. 0 means set in ae_init */
+#define OCB_TAG_LEN 16 /* 0 to 16. 0 means set in ae_init         */
+
+/* This implementation has built-in support for multiple AES APIs. Set any
+/  one of the following to non-zero to specify which to use.               */
+#define USE_OPENSSL_AES 1   /* http://openssl.org                      */
+#define USE_REFERENCE_AES 0 /* Internet search: rijndael-alg-fst.c     */
+#define USE_AES_NI 0        /* Uses compiler's intrinsics              */
+
+/* During encryption and decryption, various "L values" are required.
+/  The L values can be precomputed during initialization (requiring extra
+/  space in ae_ctx), generated as needed (slightly slowing encryption and
+/  decryption), or some combination of the two. L_TABLE_SZ specifies how many
+/  L values to precompute. L_TABLE_SZ must be at least 3. L_TABLE_SZ*16 bytes
+/  are used for L values in ae_ctx. Plaintext and ciphertexts shorter than
+/  2^L_TABLE_SZ blocks need no L values calculated dynamically.            */
+#define L_TABLE_SZ 16
+
+/* Set L_TABLE_SZ_IS_ENOUGH non-zero iff you know that all plaintexts
+/  will be shorter than 2^(L_TABLE_SZ+4) bytes in length. This results
+/  in better performance.                                                  */
+#define L_TABLE_SZ_IS_ENOUGH 1
+
+/* ----------------------------------------------------------------------- */
+/* Includes and compiler specific definitions                              */
+/* ----------------------------------------------------------------------- */
+
+#include "ae.h"
+#include <stdlib.h>
+#include <string.h>
+
+/* Define standard sized integers                                          */
+#if defined(_MSC_VER) && (_MSC_VER < 1600)
+typedef unsigned __int8 uint8_t;
+typedef unsigned __int32 uint32_t;
+typedef unsigned __int64 uint64_t;
+typedef __int64 int64_t;
+#else
+#include <stdint.h>
+#endif
+
+/* Compiler-specific intrinsics and fixes: bswap64, ntz                    */
+#if _MSC_VER
+#define inline __inline                           /* MSVC doesn't recognize "inline" in C */
+#define restrict __restrict                       /* MSVC doesn't recognize "restrict" in C */
+#define __SSE2__ (_M_IX86 || _M_AMD64 || _M_X64)  /* Assume SSE2  */
+#define __SSSE3__ (_M_IX86 || _M_AMD64 || _M_X64) /* Assume SSSE3 */
+#include <intrin.h>
+#pragma intrinsic(_byteswap_uint64, _BitScanForward, memcpy)
+#define bswap64(x) _byteswap_uint64(x)
+static inline unsigned ntz(unsigned x) {
+    _BitScanForward(&x, x);
+    return x;
+}
+#elif __GNUC__
+#define inline __inline__                   /* No "inline" in GCC ansi C mode */
+#define restrict __restrict__               /* No "restrict" in GCC ansi C mode */
+#define bswap64(x) __builtin_bswap64(x)     /* Assuming GCC 4.3+ */
+#define ntz(x) __builtin_ctz((unsigned)(x)) /* Assuming GCC 3.4+ */
+#else /* Assume some C99 features: stdint.h, inline, restrict */
+#define bswap32(x)                                                                                 \
+    ((((x)&0xff000000u) >> 24) | (((x)&0x00ff0000u) >> 8) | (((x)&0x0000ff00u) << 8) |             \
+     (((x)&0x000000ffu) << 24))
+
+static inline uint64_t bswap64(uint64_t x) {
+    union {
+        uint64_t u64;
+        uint32_t u32[2];
+    } in, out;
+    in.u64 = x;
+    out.u32[0] = bswap32(in.u32[1]);
+    out.u32[1] = bswap32(in.u32[0]);
+    return out.u64;
+}
+
+#if (L_TABLE_SZ <= 9) && (L_TABLE_SZ_IS_ENOUGH) /* < 2^13 byte texts */
+static inline unsigned ntz(unsigned x) {
+    static const unsigned char tz_table[] = {
+        0, 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 6, 2, 3, 2, 4, 2, 3, 2, 5, 2,
+        3, 2, 4, 2, 3, 2, 7, 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 6, 2, 3, 2,
+        4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 8, 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2,
+        3, 2, 6, 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 7, 2, 3, 2, 4, 2, 3, 2,
+        5, 2, 3, 2, 4, 2, 3, 2, 6, 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2};
+    return tz_table[x / 4];
+}
+#else                                           /* From http://supertech.csail.mit.edu/papers/debruijn.pdf */
+static inline unsigned ntz(unsigned x) {
+    static const unsigned char tz_table[32] = {0,  1,  28, 2,  29, 14, 24, 3,  30, 22, 20,
+                                               15, 25, 17, 4,  8,  31, 27, 13, 23, 21, 19,
+                                               16, 7,  26, 12, 18, 6,  11, 5,  10, 9};
+    return tz_table[((uint32_t)((x & -x) * 0x077CB531u)) >> 27];
+}
+#endif
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* Define blocks and operations -- Patch if incorrect on your compiler.    */
+/* ----------------------------------------------------------------------- */
+
+#if __SSE2__
+#include <xmmintrin.h> /* SSE instructions and _mm_malloc */
+#include <emmintrin.h> /* SSE2 instructions               */
+typedef __m128i block;
+#define xor_block(x, y) _mm_xor_si128(x, y)
+#define zero_block() _mm_setzero_si128()
+#define unequal_blocks(x, y) (_mm_movemask_epi8(_mm_cmpeq_epi8(x, y)) != 0xffff)
+#if __SSSE3__ || USE_AES_NI
+#include <tmmintrin.h> /* SSSE3 instructions              */
+#define swap_if_le(b)                                                                              \
+    _mm_shuffle_epi8(b, _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))
+#else
+static inline block swap_if_le(block b) {
+    block a = _mm_shuffle_epi32(b, _MM_SHUFFLE(0, 1, 2, 3));
+    a = _mm_shufflehi_epi16(a, _MM_SHUFFLE(2, 3, 0, 1));
+    a = _mm_shufflelo_epi16(a, _MM_SHUFFLE(2, 3, 0, 1));
+    return _mm_xor_si128(_mm_srli_epi16(a, 8), _mm_slli_epi16(a, 8));
+}
+#endif
+static inline block gen_offset(uint64_t KtopStr[3], unsigned bot) {
+    block hi = _mm_load_si128((__m128i*)(KtopStr + 0));  /* hi = B A */
+    block lo = _mm_loadu_si128((__m128i*)(KtopStr + 1)); /* lo = C B */
+    __m128i lshift = _mm_cvtsi32_si128(bot);
+    __m128i rshift = _mm_cvtsi32_si128(64 - bot);
+    lo = _mm_xor_si128(_mm_sll_epi64(hi, lshift), _mm_srl_epi64(lo, rshift));
+#if __SSSE3__ || USE_AES_NI
+    return _mm_shuffle_epi8(lo, _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7));
+#else
+    return swap_if_le(_mm_shuffle_epi32(lo, _MM_SHUFFLE(1, 0, 3, 2)));
+#endif
+}
+static inline block double_block(block bl) {
+    const __m128i mask = _mm_set_epi32(135, 1, 1, 1);
+    __m128i tmp = _mm_srai_epi32(bl, 31);
+    tmp = _mm_and_si128(tmp, mask);
+    tmp = _mm_shuffle_epi32(tmp, _MM_SHUFFLE(2, 1, 0, 3));
+    bl = _mm_slli_epi32(bl, 1);
+    return _mm_xor_si128(bl, tmp);
+}
+#elif __ALTIVEC__
+#include <altivec.h>
+typedef vector unsigned block;
+#define xor_block(x, y) vec_xor(x, y)
+#define zero_block() vec_splat_u32(0)
+#define unequal_blocks(x, y) vec_any_ne(x, y)
+#define swap_if_le(b) (b)
+#if __PPC64__
+block gen_offset(uint64_t KtopStr[3], unsigned bot) {
+    union {
+        uint64_t u64[2];
+        block bl;
+    } rval;
+    rval.u64[0] = (KtopStr[0] << bot) | (KtopStr[1] >> (64 - bot));
+    rval.u64[1] = (KtopStr[1] << bot) | (KtopStr[2] >> (64 - bot));
+    return rval.bl;
+}
+#else
+/* Special handling: Shifts are mod 32, and no 64-bit types */
+block gen_offset(uint64_t KtopStr[3], unsigned bot) {
+    const vector unsigned k32 = {32, 32, 32, 32};
+    vector unsigned hi = *(vector unsigned*)(KtopStr + 0);
+    vector unsigned lo = *(vector unsigned*)(KtopStr + 2);
+    vector unsigned bot_vec;
+    if (bot < 32) {
+        lo = vec_sld(hi, lo, 4);
+    } else {
+        vector unsigned t = vec_sld(hi, lo, 4);
+        lo = vec_sld(hi, lo, 8);
+        hi = t;
+        bot = bot - 32;
+    }
+    if (bot == 0)
+        return hi;
+    *(unsigned*)&bot_vec = bot;
+    vector unsigned lshift = vec_splat(bot_vec, 0);
+    vector unsigned rshift = vec_sub(k32, lshift);
+    hi = vec_sl(hi, lshift);
+    lo = vec_sr(lo, rshift);
+    return vec_xor(hi, lo);
+}
+#endif
+static inline block double_block(block b) {
+    const vector unsigned char mask = {135, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+    const vector unsigned char perm = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0};
+    const vector unsigned char shift7 = vec_splat_u8(7);
+    const vector unsigned char shift1 = vec_splat_u8(1);
+    vector unsigned char c = (vector unsigned char)b;
+    vector unsigned char t = vec_sra(c, shift7);
+    t = vec_and(t, mask);
+    t = vec_perm(t, t, perm);
+    c = vec_sl(c, shift1);
+    return (block)vec_xor(c, t);
+}
+#elif __ARM_NEON__
+#include <arm_neon.h>
+typedef int8x16_t block; /* Yay! Endian-neutral reads! */
+#define xor_block(x, y) veorq_s8(x, y)
+#define zero_block() vdupq_n_s8(0)
+static inline int unequal_blocks(block a, block b) {
+    int64x2_t t = veorq_s64((int64x2_t)a, (int64x2_t)b);
+    return (vgetq_lane_s64(t, 0) | vgetq_lane_s64(t, 1)) != 0;
+}
+#define swap_if_le(b) (b) /* Using endian-neutral int8x16_t */
+/* KtopStr is reg correct by 64 bits, return mem correct */
+block gen_offset(uint64_t KtopStr[3], unsigned bot) {
+    const union {
+        unsigned x;
+        unsigned char endian;
+    } little = {1};
+    const int64x2_t k64 = {-64, -64};
+    uint64x2_t hi = *(uint64x2_t*)(KtopStr + 0); /* hi = A B */
+    uint64x2_t lo = *(uint64x2_t*)(KtopStr + 1); /* hi = B C */
+    int64x2_t ls = vdupq_n_s64(bot);
+    int64x2_t rs = vqaddq_s64(k64, ls);
+    block rval = (block)veorq_u64(vshlq_u64(hi, ls), vshlq_u64(lo, rs));
+    if (little.endian)
+        rval = vrev64q_s8(rval);
+    return rval;
+}
+static inline block double_block(block b) {
+    const block mask = {135, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+    block tmp = vshrq_n_s8(b, 7);
+    tmp = vandq_s8(tmp, mask);
+    tmp = vextq_s8(tmp, tmp, 1); /* Rotate high byte to end */
+    b = vshlq_n_s8(b, 1);
+    return veorq_s8(tmp, b);
+}
+#else
+typedef struct { uint64_t l, r; } block;
+static inline block xor_block(block x, block y) {
+    x.l ^= y.l;
+    x.r ^= y.r;
+    return x;
+}
+static inline block zero_block(void) {
+    const block t = {0, 0};
+    return t;
+}
+#define unequal_blocks(x, y) ((((x).l ^ (y).l) | ((x).r ^ (y).r)) != 0)
+static inline block swap_if_le(block b) {
+    const union {
+        unsigned x;
+        unsigned char endian;
+    } little = {1};
+    if (little.endian) {
+        block r;
+        r.l = bswap64(b.l);
+        r.r = bswap64(b.r);
+        return r;
+    } else
+        return b;
+}
+
+/* KtopStr is reg correct by 64 bits, return mem correct */
+block gen_offset(uint64_t KtopStr[3], unsigned bot) {
+    block rval;
+    if (bot != 0) {
+        rval.l = (KtopStr[0] << bot) | (KtopStr[1] >> (64 - bot));
+        rval.r = (KtopStr[1] << bot) | (KtopStr[2] >> (64 - bot));
+    } else {
+        rval.l = KtopStr[0];
+        rval.r = KtopStr[1];
+    }
+    return swap_if_le(rval);
+}
+
+#if __GNUC__ && __arm__
+static inline block double_block(block b) {
+    __asm__("adds %1,%1,%1\n\t"
+            "adcs %H1,%H1,%H1\n\t"
+            "adcs %0,%0,%0\n\t"
+            "adcs %H0,%H0,%H0\n\t"
+            "it cs\n\t"
+            "eorcs %1,%1,#135"
+            : "+r"(b.l), "+r"(b.r)
+            :
+            : "cc");
+    return b;
+}
+#else
+static inline block double_block(block b) {
+    uint64_t t = (uint64_t)((int64_t)b.l >> 63);
+    b.l = (b.l + b.l) ^ (b.r >> 63);
+    b.r = (b.r + b.r) ^ (t & 135);
+    return b;
+}
+#endif
+
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* AES - Code uses OpenSSL API. Other implementations get mapped to it.    */
+/* ----------------------------------------------------------------------- */
+
+/*---------------*/
+#if USE_OPENSSL_AES
+/*---------------*/
+
+#include <openssl/aes.h> /* http://openssl.org/ */
+
+/* How to ECB encrypt an array of blocks, in place                         */
+static inline void AES_ecb_encrypt_blks(block* blks, unsigned nblks, AES_KEY* key) {
+    while (nblks) {
+        --nblks;
+        AES_encrypt((unsigned char*)(blks + nblks), (unsigned char*)(blks + nblks), key);
+    }
+}
+
+static inline void AES_ecb_decrypt_blks(block* blks, unsigned nblks, AES_KEY* key) {
+    while (nblks) {
+        --nblks;
+        AES_decrypt((unsigned char*)(blks + nblks), (unsigned char*)(blks + nblks), key);
+    }
+}
+
+#define BPI 4 /* Number of blocks in buffer per ECB call */
+
+/*-------------------*/
+#elif USE_REFERENCE_AES
+/*-------------------*/
+
+#include "rijndael-alg-fst.h" /* Barreto's Public-Domain Code */
+#if (OCB_KEY_LEN == 0)
+typedef struct {
+    uint32_t rd_key[60];
+    int rounds;
+} AES_KEY;
+#define ROUNDS(ctx) ((ctx)->rounds)
+#define AES_set_encrypt_key(x, y, z)                                                               \
+    do {                                                                                           \
+        rijndaelKeySetupEnc((z)->rd_key, x, y);                                                    \
+        (z)->rounds = y / 32 + 6;                                                                  \
+    } while (0)
+#define AES_set_decrypt_key(x, y, z)                                                               \
+    do {                                                                                           \
+        rijndaelKeySetupDec((z)->rd_key, x, y);                                                    \
+        (z)->rounds = y / 32 + 6;                                                                  \
+    } while (0)
+#else
+typedef struct { uint32_t rd_key[OCB_KEY_LEN + 28]; } AES_KEY;
+#define ROUNDS(ctx) (6 + OCB_KEY_LEN / 4)
+#define AES_set_encrypt_key(x, y, z) rijndaelKeySetupEnc((z)->rd_key, x, y)
+#define AES_set_decrypt_key(x, y, z) rijndaelKeySetupDec((z)->rd_key, x, y)
+#endif
+#define AES_encrypt(x, y, z) rijndaelEncrypt((z)->rd_key, ROUNDS(z), x, y)
+#define AES_decrypt(x, y, z) rijndaelDecrypt((z)->rd_key, ROUNDS(z), x, y)
+
+static void AES_ecb_encrypt_blks(block* blks, unsigned nblks, AES_KEY* key) {
+    while (nblks) {
+        --nblks;
+        AES_encrypt((unsigned char*)(blks + nblks), (unsigned char*)(blks + nblks), key);
+    }
+}
+
+void AES_ecb_decrypt_blks(block* blks, unsigned nblks, AES_KEY* key) {
+    while (nblks) {
+        --nblks;
+        AES_decrypt((unsigned char*)(blks + nblks), (unsigned char*)(blks + nblks), key);
+    }
+}
+
+#define BPI 4 /* Number of blocks in buffer per ECB call */
+
+/*----------*/
+#elif USE_AES_NI
+/*----------*/
+
+#include <wmmintrin.h>
+
+#if (OCB_KEY_LEN == 0)
+typedef struct {
+    __m128i rd_key[15];
+    int rounds;
+} AES_KEY;
+#define ROUNDS(ctx) ((ctx)->rounds)
+#else
+typedef struct { __m128i rd_key[7 + OCB_KEY_LEN / 4]; } AES_KEY;
+#define ROUNDS(ctx) (6 + OCB_KEY_LEN / 4)
+#endif
+
+#define EXPAND_ASSIST(v1, v2, v3, v4, shuff_const, aes_const)                                      \
+    v2 = _mm_aeskeygenassist_si128(v4, aes_const);                                                 \
+    v3 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(v3), _mm_castsi128_ps(v1), 16));         \
+    v1 = _mm_xor_si128(v1, v3);                                                                    \
+    v3 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(v3), _mm_castsi128_ps(v1), 140));        \
+    v1 = _mm_xor_si128(v1, v3);                                                                    \
+    v2 = _mm_shuffle_epi32(v2, shuff_const);                                                       \
+    v1 = _mm_xor_si128(v1, v2)
+
+#define EXPAND192_STEP(idx, aes_const)                                                             \
+    EXPAND_ASSIST(x0, x1, x2, x3, 85, aes_const);                                                  \
+    x3 = _mm_xor_si128(x3, _mm_slli_si128(x3, 4));                                                 \
+    x3 = _mm_xor_si128(x3, _mm_shuffle_epi32(x0, 255));                                            \
+    kp[idx] = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(x0), 68));   \
+    kp[idx + 1] =                                                                                  \
+        _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(x0), _mm_castsi128_ps(x3), 78));          \
+    EXPAND_ASSIST(x0, x1, x2, x3, 85, (aes_const * 2));                                            \
+    x3 = _mm_xor_si128(x3, _mm_slli_si128(x3, 4));                                                 \
+    x3 = _mm_xor_si128(x3, _mm_shuffle_epi32(x0, 255));                                            \
+    kp[idx + 2] = x0;                                                                              \
+    tmp = x3
+
+static void AES_128_Key_Expansion(const unsigned char* userkey, void* key) {
+    __m128i x0, x1, x2;
+    __m128i* kp = (__m128i*)key;
+    kp[0] = x0 = _mm_loadu_si128((__m128i*)userkey);
+    x2 = _mm_setzero_si128();
+    EXPAND_ASSIST(x0, x1, x2, x0, 255, 1);
+    kp[1] = x0;
+    EXPAND_ASSIST(x0, x1, x2, x0, 255, 2);
+    kp[2] = x0;
+    EXPAND_ASSIST(x0, x1, x2, x0, 255, 4);
+    kp[3] = x0;
+    EXPAND_ASSIST(x0, x1, x2, x0, 255, 8);
+    kp[4] = x0;
+    EXPAND_ASSIST(x0, x1, x2, x0, 255, 16);
+    kp[5] = x0;
+    EXPAND_ASSIST(x0, x1, x2, x0, 255, 32);
+    kp[6] = x0;
+    EXPAND_ASSIST(x0, x1, x2, x0, 255, 64);
+    kp[7] = x0;
+    EXPAND_ASSIST(x0, x1, x2, x0, 255, 128);
+    kp[8] = x0;
+    EXPAND_ASSIST(x0, x1, x2, x0, 255, 27);
+    kp[9] = x0;
+    EXPAND_ASSIST(x0, x1, x2, x0, 255, 54);
+    kp[10] = x0;
+}
+
+static void AES_192_Key_Expansion(const unsigned char* userkey, void* key) {
+    __m128i x0, x1, x2, x3, tmp, *kp = (__m128i*)key;
+    kp[0] = x0 = _mm_loadu_si128((__m128i*)userkey);
+    tmp = x3 = _mm_loadu_si128((__m128i*)(userkey + 16));
+    x2 = _mm_setzero_si128();
+    EXPAND192_STEP(1, 1);
+    EXPAND192_STEP(4, 4);
+    EXPAND192_STEP(7, 16);
+    EXPAND192_STEP(10, 64);
+}
+
+static void AES_256_Key_Expansion(const unsigned char* userkey, void* key) {
+    __m128i x0, x1, x2, x3, *kp = (__m128i*)key;
+    kp[0] = x0 = _mm_loadu_si128((__m128i*)userkey);
+    kp[1] = x3 = _mm_loadu_si128((__m128i*)(userkey + 16));
+    x2 = _mm_setzero_si128();
+    EXPAND_ASSIST(x0, x1, x2, x3, 255, 1);
+    kp[2] = x0;
+    EXPAND_ASSIST(x3, x1, x2, x0, 170, 1);
+    kp[3] = x3;
+    EXPAND_ASSIST(x0, x1, x2, x3, 255, 2);
+    kp[4] = x0;
+    EXPAND_ASSIST(x3, x1, x2, x0, 170, 2);
+    kp[5] = x3;
+    EXPAND_ASSIST(x0, x1, x2, x3, 255, 4);
+    kp[6] = x0;
+    EXPAND_ASSIST(x3, x1, x2, x0, 170, 4);
+    kp[7] = x3;
+    EXPAND_ASSIST(x0, x1, x2, x3, 255, 8);
+    kp[8] = x0;
+    EXPAND_ASSIST(x3, x1, x2, x0, 170, 8);
+    kp[9] = x3;
+    EXPAND_ASSIST(x0, x1, x2, x3, 255, 16);
+    kp[10] = x0;
+    EXPAND_ASSIST(x3, x1, x2, x0, 170, 16);
+    kp[11] = x3;
+    EXPAND_ASSIST(x0, x1, x2, x3, 255, 32);
+    kp[12] = x0;
+    EXPAND_ASSIST(x3, x1, x2, x0, 170, 32);
+    kp[13] = x3;
+    EXPAND_ASSIST(x0, x1, x2, x3, 255, 64);
+    kp[14] = x0;
+}
+
+static int AES_set_encrypt_key(const unsigned char* userKey, const int bits, AES_KEY* key) {
+    if (bits == 128) {
+        AES_128_Key_Expansion(userKey, key);
+    } else if (bits == 192) {
+        AES_192_Key_Expansion(userKey, key);
+    } else if (bits == 256) {
+        AES_256_Key_Expansion(userKey, key);
+    }
+#if (OCB_KEY_LEN == 0)
+    key->rounds = 6 + bits / 32;
+#endif
+    return 0;
+}
+
+static void AES_set_decrypt_key_fast(AES_KEY* dkey, const AES_KEY* ekey) {
+    int j = 0;
+    int i = ROUNDS(ekey);
+#if (OCB_KEY_LEN == 0)
+    dkey->rounds = i;
+#endif
+    dkey->rd_key[i--] = ekey->rd_key[j++];
+    while (i)
+        dkey->rd_key[i--] = _mm_aesimc_si128(ekey->rd_key[j++]);
+    dkey->rd_key[i] = ekey->rd_key[j];
+}
+
+static int AES_set_decrypt_key(const unsigned char* userKey, const int bits, AES_KEY* key) {
+    AES_KEY temp_key;
+    AES_set_encrypt_key(userKey, bits, &temp_key);
+    AES_set_decrypt_key_fast(key, &temp_key);
+    return 0;
+}
+
+static inline void AES_encrypt(const unsigned char* in, unsigned char* out, const AES_KEY* key) {
+    int j, rnds = ROUNDS(key);
+    const __m128i* sched = ((__m128i*)(key->rd_key));
+    __m128i tmp = _mm_load_si128((__m128i*)in);
+    tmp = _mm_xor_si128(tmp, sched[0]);
+    for (j = 1; j < rnds; j++)
+        tmp = _mm_aesenc_si128(tmp, sched[j]);
+    tmp = _mm_aesenclast_si128(tmp, sched[j]);
+    _mm_store_si128((__m128i*)out, tmp);
+}
+
+static inline void AES_decrypt(const unsigned char* in, unsigned char* out, const AES_KEY* key) {
+    int j, rnds = ROUNDS(key);
+    const __m128i* sched = ((__m128i*)(key->rd_key));
+    __m128i tmp = _mm_load_si128((__m128i*)in);
+    tmp = _mm_xor_si128(tmp, sched[0]);
+    for (j = 1; j < rnds; j++)
+        tmp = _mm_aesdec_si128(tmp, sched[j]);
+    tmp = _mm_aesdeclast_si128(tmp, sched[j]);
+    _mm_store_si128((__m128i*)out, tmp);
+}
+
+static inline void AES_ecb_encrypt_blks(block* blks, unsigned nblks, AES_KEY* key) {
+    unsigned i, j, rnds = ROUNDS(key);
+    const __m128i* sched = ((__m128i*)(key->rd_key));
+    for (i = 0; i < nblks; ++i)
+        blks[i] = _mm_xor_si128(blks[i], sched[0]);
+    for (j = 1; j < rnds; ++j)
+        for (i = 0; i < nblks; ++i)
+            blks[i] = _mm_aesenc_si128(blks[i], sched[j]);
+    for (i = 0; i < nblks; ++i)
+        blks[i] = _mm_aesenclast_si128(blks[i], sched[j]);
+}
+
+static inline void AES_ecb_decrypt_blks(block* blks, unsigned nblks, AES_KEY* key) {
+    unsigned i, j, rnds = ROUNDS(key);
+    const __m128i* sched = ((__m128i*)(key->rd_key));
+    for (i = 0; i < nblks; ++i)
+        blks[i] = _mm_xor_si128(blks[i], sched[0]);
+    for (j = 1; j < rnds; ++j)
+        for (i = 0; i < nblks; ++i)
+            blks[i] = _mm_aesdec_si128(blks[i], sched[j]);
+    for (i = 0; i < nblks; ++i)
+        blks[i] = _mm_aesdeclast_si128(blks[i], sched[j]);
+}
+
+#define BPI 8 /* Number of blocks in buffer per ECB call   */
+/* Set to 4 for Westmere, 8 for Sandy Bridge */
+
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* Define OCB context structure.                                           */
+/* ----------------------------------------------------------------------- */
+
+/*------------------------------------------------------------------------
+/ Each item in the OCB context is stored either "memory correct" or
+/ "register correct". On big-endian machines, this is identical. On
+/ little-endian machines, one must choose whether the byte-string
+/ is in the correct order when it resides in memory or in registers.
+/ It must be register correct whenever it is to be manipulated
+/ arithmetically, but must be memory correct whenever it interacts
+/ with the plaintext or ciphertext.
+/------------------------------------------------------------------------- */
+
+struct _ae_ctx {
+    block offset;        /* Memory correct               */
+    block checksum;      /* Memory correct               */
+    block Lstar;         /* Memory correct               */
+    block Ldollar;       /* Memory correct               */
+    block L[L_TABLE_SZ]; /* Memory correct               */
+    block ad_checksum;   /* Memory correct               */
+    block ad_offset;     /* Memory correct               */
+    block cached_Top;    /* Memory correct               */
+    uint64_t KtopStr[3]; /* Register correct, each item  */
+    uint32_t ad_blocks_processed;
+    uint32_t blocks_processed;
+    AES_KEY decrypt_key;
+    AES_KEY encrypt_key;
+#if (OCB_TAG_LEN == 0)
+    unsigned tag_len;
+#endif
+};
+
+/* ----------------------------------------------------------------------- */
+/* L table lookup (or on-the-fly generation)                               */
+/* ----------------------------------------------------------------------- */
+
+#if L_TABLE_SZ_IS_ENOUGH
+#define getL(_ctx, _tz) ((_ctx)->L[_tz])
+#else
+static block getL(const ae_ctx* ctx, unsigned tz) {
+    if (tz < L_TABLE_SZ)
+        return ctx->L[tz];
+    else {
+        unsigned i;
+        /* Bring L[MAX] into registers, make it register correct */
+        block rval = swap_if_le(ctx->L[L_TABLE_SZ - 1]);
+        rval = double_block(rval);
+        for (i = L_TABLE_SZ; i < tz; i++)
+            rval = double_block(rval);
+        return swap_if_le(rval); /* To memory correct */
+    }
+}
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* Public functions                                                        */
+/* ----------------------------------------------------------------------- */
+
+/* 32-bit SSE2 and Altivec systems need to be forced to allocate memory
+   on 16-byte alignments. (I believe all major 64-bit systems do already.) */
+
+ae_ctx* ae_allocate(void* misc) {
+    void* p;
+    (void)misc; /* misc unused in this implementation */
+#if (__SSE2__ && !_M_X64 && !_M_AMD64 && !__amd64__)
+    p = _mm_malloc(sizeof(ae_ctx), 16);
+#elif(__ALTIVEC__ && !__PPC64__)
+    if (posix_memalign(&p, 16, sizeof(ae_ctx)) != 0)
+        p = NULL;
+#else
+    p = malloc(sizeof(ae_ctx));
+#endif
+    return (ae_ctx*)p;
+}
+
+void ae_free(ae_ctx* ctx) {
+#if (__SSE2__ && !_M_X64 && !_M_AMD64 && !__amd64__)
+    _mm_free(ctx);
+#else
+    free(ctx);
+#endif
+}
+
+/* ----------------------------------------------------------------------- */
+
+int ae_clear(ae_ctx* ctx) /* Zero ae_ctx and undo initialization          */
+{
+    memset(ctx, 0, sizeof(ae_ctx));
+    return AE_SUCCESS;
+}
+
+int ae_ctx_sizeof(void) {
+    return (int)sizeof(ae_ctx);
+}
+
+/* ----------------------------------------------------------------------- */
+
+int ae_init(ae_ctx* ctx, const void* key, int key_len, int nonce_len, int tag_len) {
+    unsigned i;
+    block tmp_blk;
+
+    if (nonce_len != 12)
+        return AE_NOT_SUPPORTED;
+
+/* Initialize encryption & decryption keys */
+#if (OCB_KEY_LEN > 0)
+    key_len = OCB_KEY_LEN;
+#endif
+    AES_set_encrypt_key((unsigned char*)key, key_len * 8, &ctx->encrypt_key);
+#if USE_AES_NI
+    AES_set_decrypt_key_fast(&ctx->decrypt_key, &ctx->encrypt_key);
+#else
+    AES_set_decrypt_key((unsigned char*)key, (int)(key_len * 8), &ctx->decrypt_key);
+#endif
+
+    /* Zero things that need zeroing */
+    ctx->cached_Top = ctx->ad_checksum = zero_block();
+    ctx->ad_blocks_processed = 0;
+
+    /* Compute key-dependent values */
+    AES_encrypt((unsigned char*)&ctx->cached_Top, (unsigned char*)&ctx->Lstar, &ctx->encrypt_key);
+    tmp_blk = swap_if_le(ctx->Lstar);
+    tmp_blk = double_block(tmp_blk);
+    ctx->Ldollar = swap_if_le(tmp_blk);
+    tmp_blk = double_block(tmp_blk);
+    ctx->L[0] = swap_if_le(tmp_blk);
+    for (i = 1; i < L_TABLE_SZ; i++) {
+        tmp_blk = double_block(tmp_blk);
+        ctx->L[i] = swap_if_le(tmp_blk);
+    }
+
+#if (OCB_TAG_LEN == 0)
+    ctx->tag_len = tag_len;
+#else
+    (void)tag_len; /* Suppress var not used error */
+#endif
+
+    return AE_SUCCESS;
+}
+
+/* ----------------------------------------------------------------------- */
+
+static block gen_offset_from_nonce(ae_ctx* ctx, const void* nonce) {
+    const union {
+        unsigned x;
+        unsigned char endian;
+    } little = {1};
+    union {
+        uint32_t u32[4];
+        uint8_t u8[16];
+        block bl;
+    } tmp;
+    unsigned idx;
+    uint32_t tagadd;
+
+/* Replace cached nonce Top if needed */
+#if (OCB_TAG_LEN > 0)
+    if (little.endian)
+        tmp.u32[0] = 0x01000000 + ((OCB_TAG_LEN * 8 % 128) << 1);
+    else
+        tmp.u32[0] = 0x00000001 + ((OCB_TAG_LEN * 8 % 128) << 25);
+#else
+    if (little.endian)
+        tmp.u32[0] = 0x01000000 + ((ctx->tag_len * 8 % 128) << 1);
+    else
+        tmp.u32[0] = 0x00000001 + ((ctx->tag_len * 8 % 128) << 25);
+#endif
+    tmp.u32[1] = ((uint32_t*)nonce)[0];
+    tmp.u32[2] = ((uint32_t*)nonce)[1];
+    tmp.u32[3] = ((uint32_t*)nonce)[2];
+    idx = (unsigned)(tmp.u8[15] & 0x3f);           /* Get low 6 bits of nonce  */
+    tmp.u8[15] = tmp.u8[15] & 0xc0;                /* Zero low 6 bits of nonce */
+    if (unequal_blocks(tmp.bl, ctx->cached_Top)) { /* Cached?       */
+        ctx->cached_Top = tmp.bl;                  /* Update cache, KtopStr    */
+        AES_encrypt(tmp.u8, (unsigned char*)&ctx->KtopStr, &ctx->encrypt_key);
+        if (little.endian) { /* Make Register Correct    */
+            ctx->KtopStr[0] = bswap64(ctx->KtopStr[0]);
+            ctx->KtopStr[1] = bswap64(ctx->KtopStr[1]);
+        }
+        ctx->KtopStr[2] = ctx->KtopStr[0] ^ (ctx->KtopStr[0] << 8) ^ (ctx->KtopStr[1] >> 56);
+    }
+    return gen_offset(ctx->KtopStr, idx);
+}
+
+static void process_ad(ae_ctx* ctx, const void* ad, int ad_len, int final) {
+    union {
+        uint32_t u32[4];
+        uint8_t u8[16];
+        block bl;
+    } tmp;
+    block ad_offset, ad_checksum;
+    const block* adp = (block*)ad;
+    unsigned i, k, tz, remaining;
+
+    ad_offset = ctx->ad_offset;
+    ad_checksum = ctx->ad_checksum;
+    i = ad_len / (BPI * 16);
+    if (i) {
+        unsigned ad_block_num = ctx->ad_blocks_processed;
+        do {
+            block ta[BPI], oa[BPI];
+            ad_block_num += BPI;
+            tz = ntz(ad_block_num);
+            oa[0] = xor_block(ad_offset, ctx->L[0]);
+            ta[0] = xor_block(oa[0], adp[0]);
+            oa[1] = xor_block(oa[0], ctx->L[1]);
+            ta[1] = xor_block(oa[1], adp[1]);
+            oa[2] = xor_block(ad_offset, ctx->L[1]);
+            ta[2] = xor_block(oa[2], adp[2]);
+#if BPI == 4
+            ad_offset = xor_block(oa[2], getL(ctx, tz));
+            ta[3] = xor_block(ad_offset, adp[3]);
+#elif BPI == 8
+            oa[3] = xor_block(oa[2], ctx->L[2]);
+            ta[3] = xor_block(oa[3], adp[3]);
+            oa[4] = xor_block(oa[1], ctx->L[2]);
+            ta[4] = xor_block(oa[4], adp[4]);
+            oa[5] = xor_block(oa[0], ctx->L[2]);
+            ta[5] = xor_block(oa[5], adp[5]);
+            oa[6] = xor_block(ad_offset, ctx->L[2]);
+            ta[6] = xor_block(oa[6], adp[6]);
+            ad_offset = xor_block(oa[6], getL(ctx, tz));
+            ta[7] = xor_block(ad_offset, adp[7]);
+#endif
+            AES_ecb_encrypt_blks(ta, BPI, &ctx->encrypt_key);
+            ad_checksum = xor_block(ad_checksum, ta[0]);
+            ad_checksum = xor_block(ad_checksum, ta[1]);
+            ad_checksum = xor_block(ad_checksum, ta[2]);
+            ad_checksum = xor_block(ad_checksum, ta[3]);
+#if (BPI == 8)
+            ad_checksum = xor_block(ad_checksum, ta[4]);
+            ad_checksum = xor_block(ad_checksum, ta[5]);
+            ad_checksum = xor_block(ad_checksum, ta[6]);
+            ad_checksum = xor_block(ad_checksum, ta[7]);
+#endif
+            adp += BPI;
+        } while (--i);
+        ctx->ad_blocks_processed = ad_block_num;
+        ctx->ad_offset = ad_offset;
+        ctx->ad_checksum = ad_checksum;
+    }
+
+    if (final) {
+        block ta[BPI];
+
+        /* Process remaining associated data, compute its tag contribution */
+        remaining = ((unsigned)ad_len) % (BPI * 16);
+        if (remaining) {
+            k = 0;
+#if (BPI == 8)
+            if (remaining >= 64) {
+                tmp.bl = xor_block(ad_offset, ctx->L[0]);
+                ta[0] = xor_block(tmp.bl, adp[0]);
+                tmp.bl = xor_block(tmp.bl, ctx->L[1]);
+                ta[1] = xor_block(tmp.bl, adp[1]);
+                ad_offset = xor_block(ad_offset, ctx->L[1]);
+                ta[2] = xor_block(ad_offset, adp[2]);
+                ad_offset = xor_block(ad_offset, ctx->L[2]);
+                ta[3] = xor_block(ad_offset, adp[3]);
+                remaining -= 64;
+                k = 4;
+            }
+#endif
+            if (remaining >= 32) {
+                ad_offset = xor_block(ad_offset, ctx->L[0]);
+                ta[k] = xor_block(ad_offset, adp[k]);
+                ad_offset = xor_block(ad_offset, getL(ctx, ntz(k + 2)));
+                ta[k + 1] = xor_block(ad_offset, adp[k + 1]);
+                remaining -= 32;
+                k += 2;
+            }
+            if (remaining >= 16) {
+                ad_offset = xor_block(ad_offset, ctx->L[0]);
+                ta[k] = xor_block(ad_offset, adp[k]);
+                remaining = remaining - 16;
+                ++k;
+            }
+            if (remaining) {
+                ad_offset = xor_block(ad_offset, ctx->Lstar);
+                tmp.bl = zero_block();
+                memcpy(tmp.u8, adp + k, remaining);
+                tmp.u8[remaining] = (unsigned char)0x80u;
+                ta[k] = xor_block(ad_offset, tmp.bl);
+                ++k;
+            }
+            AES_ecb_encrypt_blks(ta, k, &ctx->encrypt_key);
+            switch (k) {
+#if (BPI == 8)
+            case 8:
+                ad_checksum = xor_block(ad_checksum, ta[7]);
+            case 7:
+                ad_checksum = xor_block(ad_checksum, ta[6]);
+            case 6:
+                ad_checksum = xor_block(ad_checksum, ta[5]);
+            case 5:
+                ad_checksum = xor_block(ad_checksum, ta[4]);
+#endif
+            case 4:
+                ad_checksum = xor_block(ad_checksum, ta[3]);
+            case 3:
+                ad_checksum = xor_block(ad_checksum, ta[2]);
+            case 2:
+                ad_checksum = xor_block(ad_checksum, ta[1]);
+            case 1:
+                ad_checksum = xor_block(ad_checksum, ta[0]);
+            }
+            ctx->ad_checksum = ad_checksum;
+        }
+    }
+}
+
+/* ----------------------------------------------------------------------- */
+
+int ae_encrypt(ae_ctx* ctx, const void* nonce, const void* pt, int pt_len, const void* ad,
+               int ad_len, void* ct, void* tag, int final) {
+    union {
+        uint32_t u32[4];
+        uint8_t u8[16];
+        block bl;
+    } tmp;
+    block offset, checksum;
+    unsigned i, k;
+    block* ctp = (block*)ct;
+    const block* ptp = (block*)pt;
+
+    /* Non-null nonce means start of new message, init per-message values */
+    if (nonce) {
+        ctx->offset = gen_offset_from_nonce(ctx, nonce);
+        ctx->ad_offset = ctx->checksum = zero_block();
+        ctx->ad_blocks_processed = ctx->blocks_processed = 0;
+        if (ad_len >= 0)
+            ctx->ad_checksum = zero_block();
+    }
+
+    /* Process associated data */
+    if (ad_len > 0)
+        process_ad(ctx, ad, ad_len, final);
+
+    /* Encrypt plaintext data BPI blocks at a time */
+    offset = ctx->offset;
+    checksum = ctx->checksum;
+    i = pt_len / (BPI * 16);
+    if (i) {
+        block oa[BPI];
+        unsigned block_num = ctx->blocks_processed;
+        oa[BPI - 1] = offset;
+        do {
+            block ta[BPI];
+            block_num += BPI;
+            oa[0] = xor_block(oa[BPI - 1], ctx->L[0]);
+            ta[0] = xor_block(oa[0], ptp[0]);
+            checksum = xor_block(checksum, ptp[0]);
+            oa[1] = xor_block(oa[0], ctx->L[1]);
+            ta[1] = xor_block(oa[1], ptp[1]);
+            checksum = xor_block(checksum, ptp[1]);
+            oa[2] = xor_block(oa[1], ctx->L[0]);
+            ta[2] = xor_block(oa[2], ptp[2]);
+            checksum = xor_block(checksum, ptp[2]);
+#if BPI == 4
+            oa[3] = xor_block(oa[2], getL(ctx, ntz(block_num)));
+            ta[3] = xor_block(oa[3], ptp[3]);
+            checksum = xor_block(checksum, ptp[3]);
+#elif BPI == 8
+            oa[3] = xor_block(oa[2], ctx->L[2]);
+            ta[3] = xor_block(oa[3], ptp[3]);
+            checksum = xor_block(checksum, ptp[3]);
+            oa[4] = xor_block(oa[1], ctx->L[2]);
+            ta[4] = xor_block(oa[4], ptp[4]);
+            checksum = xor_block(checksum, ptp[4]);
+            oa[5] = xor_block(oa[0], ctx->L[2]);
+            ta[5] = xor_block(oa[5], ptp[5]);
+            checksum = xor_block(checksum, ptp[5]);
+            oa[6] = xor_block(oa[7], ctx->L[2]);
+            ta[6] = xor_block(oa[6], ptp[6]);
+            checksum = xor_block(checksum, ptp[6]);
+            oa[7] = xor_block(oa[6], getL(ctx, ntz(block_num)));
+            ta[7] = xor_block(oa[7], ptp[7]);
+            checksum = xor_block(checksum, ptp[7]);
+#endif
+            AES_ecb_encrypt_blks(ta, BPI, &ctx->encrypt_key);
+            ctp[0] = xor_block(ta[0], oa[0]);
+            ctp[1] = xor_block(ta[1], oa[1]);
+            ctp[2] = xor_block(ta[2], oa[2]);
+            ctp[3] = xor_block(ta[3], oa[3]);
+#if (BPI == 8)
+            ctp[4] = xor_block(ta[4], oa[4]);
+            ctp[5] = xor_block(ta[5], oa[5]);
+            ctp[6] = xor_block(ta[6], oa[6]);
+            ctp[7] = xor_block(ta[7], oa[7]);
+#endif
+            ptp += BPI;
+            ctp += BPI;
+        } while (--i);
+        ctx->offset = offset = oa[BPI - 1];
+        ctx->blocks_processed = block_num;
+        ctx->checksum = checksum;
+    }
+
+    if (final) {
+        block ta[BPI + 1], oa[BPI];
+
+        /* Process remaining plaintext and compute its tag contribution    */
+        unsigned remaining = ((unsigned)pt_len) % (BPI * 16);
+        k = 0; /* How many blocks in ta[] need ECBing */
+        if (remaining) {
+#if (BPI == 8)
+            if (remaining >= 64) {
+                oa[0] = xor_block(offset, ctx->L[0]);
+                ta[0] = xor_block(oa[0], ptp[0]);
+                checksum = xor_block(checksum, ptp[0]);
+                oa[1] = xor_block(oa[0], ctx->L[1]);
+                ta[1] = xor_block(oa[1], ptp[1]);
+                checksum = xor_block(checksum, ptp[1]);
+                oa[2] = xor_block(oa[1], ctx->L[0]);
+                ta[2] = xor_block(oa[2], ptp[2]);
+                checksum = xor_block(checksum, ptp[2]);
+                offset = oa[3] = xor_block(oa[2], ctx->L[2]);
+                ta[3] = xor_block(offset, ptp[3]);
+                checksum = xor_block(checksum, ptp[3]);
+                remaining -= 64;
+                k = 4;
+            }
+#endif
+            if (remaining >= 32) {
+                oa[k] = xor_block(offset, ctx->L[0]);
+                ta[k] = xor_block(oa[k], ptp[k]);
+                checksum = xor_block(checksum, ptp[k]);
+                offset = oa[k + 1] = xor_block(oa[k], ctx->L[1]);
+                ta[k + 1] = xor_block(offset, ptp[k + 1]);
+                checksum = xor_block(checksum, ptp[k + 1]);
+                remaining -= 32;
+                k += 2;
+            }
+            if (remaining >= 16) {
+                offset = oa[k] = xor_block(offset, ctx->L[0]);
+                ta[k] = xor_block(offset, ptp[k]);
+                checksum = xor_block(checksum, ptp[k]);
+                remaining -= 16;
+                ++k;
+            }
+            if (remaining) {
+                tmp.bl = zero_block();
+                memcpy(tmp.u8, ptp + k, remaining);
+                tmp.u8[remaining] = (unsigned char)0x80u;
+                checksum = xor_block(checksum, tmp.bl);
+                ta[k] = offset = xor_block(offset, ctx->Lstar);
+                ++k;
+            }
+        }
+        offset = xor_block(offset, ctx->Ldollar); /* Part of tag gen */
+        ta[k] = xor_block(offset, checksum);      /* Part of tag gen */
+        AES_ecb_encrypt_blks(ta, k + 1, &ctx->encrypt_key);
+        offset = xor_block(ta[k], ctx->ad_checksum); /* Part of tag gen */
+        if (remaining) {
+            --k;
+            tmp.bl = xor_block(tmp.bl, ta[k]);
+            memcpy(ctp + k, tmp.u8, remaining);
+        }
+        switch (k) {
+#if (BPI == 8)
+        case 7:
+            ctp[6] = xor_block(ta[6], oa[6]);
+        case 6:
+            ctp[5] = xor_block(ta[5], oa[5]);
+        case 5:
+            ctp[4] = xor_block(ta[4], oa[4]);
+        case 4:
+            ctp[3] = xor_block(ta[3], oa[3]);
+#endif
+        case 3:
+            ctp[2] = xor_block(ta[2], oa[2]);
+        case 2:
+            ctp[1] = xor_block(ta[1], oa[1]);
+        case 1:
+            ctp[0] = xor_block(ta[0], oa[0]);
+        }
+
+        /* Tag is placed at the correct location
+         */
+        if (tag) {
+#if (OCB_TAG_LEN == 16)
+            *(block*)tag = offset;
+#elif(OCB_TAG_LEN > 0)
+            memcpy((char*)tag, &offset, OCB_TAG_LEN);
+#else
+            memcpy((char*)tag, &offset, ctx->tag_len);
+#endif
+        } else {
+#if (OCB_TAG_LEN > 0)
+            memcpy((char*)ct + pt_len, &offset, OCB_TAG_LEN);
+            pt_len += OCB_TAG_LEN;
+#else
+            memcpy((char*)ct + pt_len, &offset, ctx->tag_len);
+            pt_len += ctx->tag_len;
+#endif
+        }
+    }
+    return (int)pt_len;
+}
+
+/* ----------------------------------------------------------------------- */
+
+/* Compare two regions of memory, taking a constant amount of time for a
+   given buffer size -- under certain assumptions about the compiler
+   and machine, of course.
+
+   Use this to avoid timing side-channel attacks.
+
+   Returns 0 for memory regions with equal contents; non-zero otherwise. */
+static int constant_time_memcmp(const void* av, const void* bv, size_t n) {
+    const uint8_t* a = (const uint8_t*)av;
+    const uint8_t* b = (const uint8_t*)bv;
+    uint8_t result = 0;
+    size_t i;
+
+    for (i = 0; i < n; i++) {
+        result |= *a ^ *b;
+        a++;
+        b++;
+    }
+
+    return (int)result;
+}
+
+int ae_decrypt(ae_ctx* ctx, const void* nonce, const void* ct, int ct_len, const void* ad,
+               int ad_len, void* pt, const void* tag, int final) {
+    union {
+        uint32_t u32[4];
+        uint8_t u8[16];
+        block bl;
+    } tmp;
+    block offset, checksum;
+    unsigned i, k;
+    block* ctp = (block*)ct;
+    block* ptp = (block*)pt;
+
+    /* Reduce ct_len tag bundled in ct */
+    if ((final) && (!tag))
+#if (OCB_TAG_LEN > 0)
+        ct_len -= OCB_TAG_LEN;
+#else
+        ct_len -= ctx->tag_len;
+#endif
+
+    /* Non-null nonce means start of new message, init per-message values */
+    if (nonce) {
+        ctx->offset = gen_offset_from_nonce(ctx, nonce);
+        ctx->ad_offset = ctx->checksum = zero_block();
+        ctx->ad_blocks_processed = ctx->blocks_processed = 0;
+        if (ad_len >= 0)
+            ctx->ad_checksum = zero_block();
+    }
+
+    /* Process associated data */
+    if (ad_len > 0)
+        process_ad(ctx, ad, ad_len, final);
+
+    /* Encrypt plaintext data BPI blocks at a time */
+    offset = ctx->offset;
+    checksum = ctx->checksum;
+    i = ct_len / (BPI * 16);
+    if (i) {
+        block oa[BPI];
+        unsigned block_num = ctx->blocks_processed;
+        oa[BPI - 1] = offset;
+        do {
+            block ta[BPI];
+            block_num += BPI;
+            oa[0] = xor_block(oa[BPI - 1], ctx->L[0]);
+            ta[0] = xor_block(oa[0], ctp[0]);
+            oa[1] = xor_block(oa[0], ctx->L[1]);
+            ta[1] = xor_block(oa[1], ctp[1]);
+            oa[2] = xor_block(oa[1], ctx->L[0]);
+            ta[2] = xor_block(oa[2], ctp[2]);
+#if BPI == 4
+            oa[3] = xor_block(oa[2], getL(ctx, ntz(block_num)));
+            ta[3] = xor_block(oa[3], ctp[3]);
+#elif BPI == 8
+            oa[3] = xor_block(oa[2], ctx->L[2]);
+            ta[3] = xor_block(oa[3], ctp[3]);
+            oa[4] = xor_block(oa[1], ctx->L[2]);
+            ta[4] = xor_block(oa[4], ctp[4]);
+            oa[5] = xor_block(oa[0], ctx->L[2]);
+            ta[5] = xor_block(oa[5], ctp[5]);
+            oa[6] = xor_block(oa[7], ctx->L[2]);
+            ta[6] = xor_block(oa[6], ctp[6]);
+            oa[7] = xor_block(oa[6], getL(ctx, ntz(block_num)));
+            ta[7] = xor_block(oa[7], ctp[7]);
+#endif
+            AES_ecb_decrypt_blks(ta, BPI, &ctx->decrypt_key);
+            ptp[0] = xor_block(ta[0], oa[0]);
+            checksum = xor_block(checksum, ptp[0]);
+            ptp[1] = xor_block(ta[1], oa[1]);
+            checksum = xor_block(checksum, ptp[1]);
+            ptp[2] = xor_block(ta[2], oa[2]);
+            checksum = xor_block(checksum, ptp[2]);
+            ptp[3] = xor_block(ta[3], oa[3]);
+            checksum = xor_block(checksum, ptp[3]);
+#if (BPI == 8)
+            ptp[4] = xor_block(ta[4], oa[4]);
+            checksum = xor_block(checksum, ptp[4]);
+            ptp[5] = xor_block(ta[5], oa[5]);
+            checksum = xor_block(checksum, ptp[5]);
+            ptp[6] = xor_block(ta[6], oa[6]);
+            checksum = xor_block(checksum, ptp[6]);
+            ptp[7] = xor_block(ta[7], oa[7]);
+            checksum = xor_block(checksum, ptp[7]);
+#endif
+            ptp += BPI;
+            ctp += BPI;
+        } while (--i);
+        ctx->offset = offset = oa[BPI - 1];
+        ctx->blocks_processed = block_num;
+        ctx->checksum = checksum;
+    }
+
+    if (final) {
+        block ta[BPI + 1], oa[BPI];
+
+        /* Process remaining plaintext and compute its tag contribution    */
+        unsigned remaining = ((unsigned)ct_len) % (BPI * 16);
+        k = 0; /* How many blocks in ta[] need ECBing */
+        if (remaining) {
+#if (BPI == 8)
+            if (remaining >= 64) {
+                oa[0] = xor_block(offset, ctx->L[0]);
+                ta[0] = xor_block(oa[0], ctp[0]);
+                oa[1] = xor_block(oa[0], ctx->L[1]);
+                ta[1] = xor_block(oa[1], ctp[1]);
+                oa[2] = xor_block(oa[1], ctx->L[0]);
+                ta[2] = xor_block(oa[2], ctp[2]);
+                offset = oa[3] = xor_block(oa[2], ctx->L[2]);
+                ta[3] = xor_block(offset, ctp[3]);
+                remaining -= 64;
+                k = 4;
+            }
+#endif
+            if (remaining >= 32) {
+                oa[k] = xor_block(offset, ctx->L[0]);
+                ta[k] = xor_block(oa[k], ctp[k]);
+                offset = oa[k + 1] = xor_block(oa[k], ctx->L[1]);
+                ta[k + 1] = xor_block(offset, ctp[k + 1]);
+                remaining -= 32;
+                k += 2;
+            }
+            if (remaining >= 16) {
+                offset = oa[k] = xor_block(offset, ctx->L[0]);
+                ta[k] = xor_block(offset, ctp[k]);
+                remaining -= 16;
+                ++k;
+            }
+            if (remaining) {
+                block pad;
+                offset = xor_block(offset, ctx->Lstar);
+                AES_encrypt((unsigned char*)&offset, tmp.u8, &ctx->encrypt_key);
+                pad = tmp.bl;
+                memcpy(tmp.u8, ctp + k, remaining);
+                tmp.bl = xor_block(tmp.bl, pad);
+                tmp.u8[remaining] = (unsigned char)0x80u;
+                memcpy(ptp + k, tmp.u8, remaining);
+                checksum = xor_block(checksum, tmp.bl);
+            }
+        }
+        AES_ecb_decrypt_blks(ta, k, &ctx->decrypt_key);
+        switch (k) {
+#if (BPI == 8)
+        case 7:
+            ptp[6] = xor_block(ta[6], oa[6]);
+            checksum = xor_block(checksum, ptp[6]);
+        case 6:
+            ptp[5] = xor_block(ta[5], oa[5]);
+            checksum = xor_block(checksum, ptp[5]);
+        case 5:
+            ptp[4] = xor_block(ta[4], oa[4]);
+            checksum = xor_block(checksum, ptp[4]);
+        case 4:
+            ptp[3] = xor_block(ta[3], oa[3]);
+            checksum = xor_block(checksum, ptp[3]);
+#endif
+        case 3:
+            ptp[2] = xor_block(ta[2], oa[2]);
+            checksum = xor_block(checksum, ptp[2]);
+        case 2:
+            ptp[1] = xor_block(ta[1], oa[1]);
+            checksum = xor_block(checksum, ptp[1]);
+        case 1:
+            ptp[0] = xor_block(ta[0], oa[0]);
+            checksum = xor_block(checksum, ptp[0]);
+        }
+
+        /* Calculate expected tag */
+        offset = xor_block(offset, ctx->Ldollar);
+        tmp.bl = xor_block(offset, checksum);
+        AES_encrypt(tmp.u8, tmp.u8, &ctx->encrypt_key);
+        tmp.bl = xor_block(tmp.bl, ctx->ad_checksum); /* Full tag */
+
+        /* Compare with proposed tag, change ct_len if invalid */
+        if ((OCB_TAG_LEN == 16) && tag) {
+            if (unequal_blocks(tmp.bl, *(block*)tag))
+                ct_len = AE_INVALID;
+        } else {
+#if (OCB_TAG_LEN > 0)
+            int len = OCB_TAG_LEN;
+#else
+            int len = ctx->tag_len;
+#endif
+            if (tag) {
+                if (constant_time_memcmp(tag, tmp.u8, len) != 0)
+                    ct_len = AE_INVALID;
+            } else {
+                if (constant_time_memcmp((char*)ct + ct_len, tmp.u8, len) != 0)
+                    ct_len = AE_INVALID;
+            }
+        }
+    }
+    return ct_len;
+}
+
+/* ----------------------------------------------------------------------- */
+/* Simple test program                                                     */
+/* ----------------------------------------------------------------------- */
+
+#if 0
+
+#include <stdio.h>
+#include <time.h>
+
+#if __GNUC__
+#define ALIGN(n) __attribute__((aligned(n)))
+#elif _MSC_VER
+#define ALIGN(n) __declspec(align(n))
+#else /* Not GNU/Microsoft: delete alignment uses.     */
+#define ALIGN(n)
+#endif
+
+static void pbuf(void *p, unsigned len, const void *s)
+{
+    unsigned i;
+    if (s)
+        printf("%s", (char *)s);
+    for (i = 0; i < len; i++)
+        printf("%02X", (unsigned)(((unsigned char *)p)[i]));
+    printf("\n");
+}
+
+static void vectors(ae_ctx *ctx, int len)
+{
+    ALIGN(16) char pt[128];
+    ALIGN(16) char ct[144];
+    ALIGN(16) char nonce[] = {0,1,2,3,4,5,6,7,8,9,10,11};
+    int i;
+    for (i=0; i < 128; i++) pt[i] = i;
+    i = ae_encrypt(ctx,nonce,pt,len,pt,len,ct,NULL,AE_FINALIZE);
+    printf("P=%d,A=%d: ",len,len); pbuf(ct, i, NULL);
+    i = ae_encrypt(ctx,nonce,pt,0,pt,len,ct,NULL,AE_FINALIZE);
+    printf("P=%d,A=%d: ",0,len); pbuf(ct, i, NULL);
+    i = ae_encrypt(ctx,nonce,pt,len,pt,0,ct,NULL,AE_FINALIZE);
+    printf("P=%d,A=%d: ",len,0); pbuf(ct, i, NULL);
+}
+
+void validate()
+{
+    ALIGN(16) char pt[1024];
+    ALIGN(16) char ct[1024];
+    ALIGN(16) char tag[16];
+    ALIGN(16) char nonce[12] = {0,};
+    ALIGN(16) char key[32] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
+    ae_ctx ctx;
+    char *val_buf, *next;
+    int i, len;
+
+    val_buf = (char *)malloc(22400 + 16);
+    next = val_buf = (char *)(((size_t)val_buf + 16) & ~((size_t)15));
+
+    if (0) {
+		ae_init(&ctx, key, 16, 12, 16);
+		/* pbuf(&ctx, sizeof(ctx), "CTX: "); */
+		vectors(&ctx,0);
+		vectors(&ctx,8);
+		vectors(&ctx,16);
+		vectors(&ctx,24);
+		vectors(&ctx,32);
+		vectors(&ctx,40);
+    }
+
+    memset(key,0,32);
+    memset(pt,0,128);
+    ae_init(&ctx, key, OCB_KEY_LEN, 12, OCB_TAG_LEN);
+
+    /* RFC Vector test */
+    for (i = 0; i < 128; i++) {
+        int first = ((i/3)/(BPI*16))*(BPI*16);
+        int second = first;
+        int third = i - (first + second);
+
+        nonce[11] = i;
+
+        if (0) {
+            ae_encrypt(&ctx,nonce,pt,i,pt,i,ct,NULL,AE_FINALIZE);
+            memcpy(next,ct,(size_t)i+OCB_TAG_LEN);
+            next = next+i+OCB_TAG_LEN;
+
+            ae_encrypt(&ctx,nonce,pt,i,pt,0,ct,NULL,AE_FINALIZE);
+            memcpy(next,ct,(size_t)i+OCB_TAG_LEN);
+            next = next+i+OCB_TAG_LEN;
+
+            ae_encrypt(&ctx,nonce,pt,0,pt,i,ct,NULL,AE_FINALIZE);
+            memcpy(next,ct,OCB_TAG_LEN);
+            next = next+OCB_TAG_LEN;
+        } else {
+            ae_encrypt(&ctx,nonce,pt,first,pt,first,ct,NULL,AE_PENDING);
+            ae_encrypt(&ctx,NULL,pt+first,second,pt+first,second,ct+first,NULL,AE_PENDING);
+            ae_encrypt(&ctx,NULL,pt+first+second,third,pt+first+second,third,ct+first+second,NULL,AE_FINALIZE);
+            memcpy(next,ct,(size_t)i+OCB_TAG_LEN);
+            next = next+i+OCB_TAG_LEN;
+
+            ae_encrypt(&ctx,nonce,pt,first,pt,0,ct,NULL,AE_PENDING);
+            ae_encrypt(&ctx,NULL,pt+first,second,pt,0,ct+first,NULL,AE_PENDING);
+            ae_encrypt(&ctx,NULL,pt+first+second,third,pt,0,ct+first+second,NULL,AE_FINALIZE);
+            memcpy(next,ct,(size_t)i+OCB_TAG_LEN);
+            next = next+i+OCB_TAG_LEN;
+
+            ae_encrypt(&ctx,nonce,pt,0,pt,first,ct,NULL,AE_PENDING);
+            ae_encrypt(&ctx,NULL,pt,0,pt+first,second,ct,NULL,AE_PENDING);
+            ae_encrypt(&ctx,NULL,pt,0,pt+first+second,third,ct,NULL,AE_FINALIZE);
+            memcpy(next,ct,OCB_TAG_LEN);
+            next = next+OCB_TAG_LEN;
+        }
+
+    }
+    nonce[11] = 0;
+    ae_encrypt(&ctx,nonce,NULL,0,val_buf,next-val_buf,ct,tag,AE_FINALIZE);
+    pbuf(tag,OCB_TAG_LEN,0);
+
+
+    /* Encrypt/Decrypt test */
+    for (i = 0; i < 128; i++) {
+        int first = ((i/3)/(BPI*16))*(BPI*16);
+        int second = first;
+        int third = i - (first + second);
+
+        nonce[11] = i%128;
+
+        if (1) {
+            len = ae_encrypt(&ctx,nonce,val_buf,i,val_buf,i,ct,tag,AE_FINALIZE);
+            len = ae_encrypt(&ctx,nonce,val_buf,i,val_buf,-1,ct,tag,AE_FINALIZE);
+            len = ae_decrypt(&ctx,nonce,ct,len,val_buf,-1,pt,tag,AE_FINALIZE);
+            if (len == -1) { printf("Authentication error: %d\n", i); return; }
+            if (len != i) { printf("Length error: %d\n", i); return; }
+            if (memcmp(val_buf,pt,i)) { printf("Decrypt error: %d\n", i); return; }
+        } else {
+            len = ae_encrypt(&ctx,nonce,val_buf,i,val_buf,i,ct,NULL,AE_FINALIZE);
+            ae_decrypt(&ctx,nonce,ct,first,val_buf,first,pt,NULL,AE_PENDING);
+            ae_decrypt(&ctx,NULL,ct+first,second,val_buf+first,second,pt+first,NULL,AE_PENDING);
+            len = ae_decrypt(&ctx,NULL,ct+first+second,len-(first+second),val_buf+first+second,third,pt+first+second,NULL,AE_FINALIZE);
+            if (len == -1) { printf("Authentication error: %d\n", i); return; }
+            if (memcmp(val_buf,pt,i)) { printf("Decrypt error: %d\n", i); return; }
+        }
+
+    }
+    printf("Decrypt: PASS\n");
+}
+
+int main()
+{
+    validate();
+    return 0;
+}
+#endif
+
+#if USE_AES_NI
+char infoString[] = "OCB3 (AES-NI)";
+#elif USE_REFERENCE_AES
+char infoString[] = "OCB3 (Reference)";
+#elif USE_OPENSSL_AES
+char infoString[] = "OCB3 (OpenSSL)";
+#endif