external/boringssl: Sync to 8625ec4b436ccb4098ed4aac10891eff8372be41.

This includes the following changes:

https://boringssl.googlesource.com/boringssl/+log/c596415ec62b501523d80f9afa26b135406da6bf..8625ec4b436ccb4098ed4aac10891eff8372be41

Test: cts -m CtsLibcoreTestCases
Change-Id: I47a45e6b6f46b19fcbcb6c917895867d56dcd2ca
diff --git a/src/crypto/fipsmodule/aes/aes.c b/src/crypto/fipsmodule/aes/aes.c
index a988b39..f654cb1 100644
--- a/src/crypto/fipsmodule/aes/aes.c
+++ b/src/crypto/fipsmodule/aes/aes.c
@@ -6,7 +6,7 @@
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer. 
+ *    notice, this list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
@@ -534,7 +534,8 @@
     // for 128-bit blocks, Rijndael never uses more than 10 rcon values
 };
 
-int AES_set_encrypt_key(const uint8_t *key, unsigned bits, AES_KEY *aeskey) {
+static int aes_nohw_set_encrypt_key(const uint8_t *key, unsigned bits,
+                                    AES_KEY *aeskey) {
   uint32_t *rk;
   int i = 0;
   uint32_t temp;
@@ -629,7 +630,8 @@
   return 0;
 }
 
-int AES_set_decrypt_key(const uint8_t *key, unsigned bits, AES_KEY *aeskey) {
+static int aes_nohw_set_decrypt_key(const uint8_t *key, unsigned bits,
+                                    AES_KEY *aeskey) {
   uint32_t *rk;
   int i, j, status;
   uint32_t temp;
@@ -677,12 +679,11 @@
   return 0;
 }
 
-void AES_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
+static void aes_nohw_encrypt(const uint8_t *in, uint8_t *out,
+                             const AES_KEY *key) {
   const uint32_t *rk;
   uint32_t s0, s1, s2, s3, t0, t1, t2, t3;
-#ifndef FULL_UNROLL
   int r;
-#endif  // ?FULL_UNROLL
 
   assert(in && out && key);
   rk = key->rd_key;
@@ -693,130 +694,7 @@
   s1 = GETU32(in + 4) ^ rk[1];
   s2 = GETU32(in + 8) ^ rk[2];
   s3 = GETU32(in + 12) ^ rk[3];
-#ifdef FULL_UNROLL
-  // round 1:
-  t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^
-       Te3[s3 & 0xff] ^ rk[4];
-  t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^
-       Te3[s0 & 0xff] ^ rk[5];
-  t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^
-       Te3[s1 & 0xff] ^ rk[6];
-  t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^
-       Te3[s2 & 0xff] ^ rk[7];
-  // round 2:
-  s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^
-       Te3[t3 & 0xff] ^ rk[8];
-  s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^
-       Te3[t0 & 0xff] ^ rk[9];
-  s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^
-       Te3[t1 & 0xff] ^ rk[10];
-  s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^
-       Te3[t2 & 0xff] ^ rk[11];
-  // round 3:
-  t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^
-       Te3[s3 & 0xff] ^ rk[12];
-  t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^
-       Te3[s0 & 0xff] ^ rk[13];
-  t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^
-       Te3[s1 & 0xff] ^ rk[14];
-  t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^
-       Te3[s2 & 0xff] ^ rk[15];
-  // round 4:
-  s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^
-       Te3[t3 & 0xff] ^ rk[16];
-  s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^
-       Te3[t0 & 0xff] ^ rk[17];
-  s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^
-       Te3[t1 & 0xff] ^ rk[18];
-  s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^
-       Te3[t2 & 0xff] ^ rk[19];
-  // round 5:
-  t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^
-       Te3[s3 & 0xff] ^ rk[20];
-  t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^
-       Te3[s0 & 0xff] ^ rk[21];
-  t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^
-       Te3[s1 & 0xff] ^ rk[22];
-  t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^
-       Te3[s2 & 0xff] ^ rk[23];
-  // round 6:
-  s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^
-       Te3[t3 & 0xff] ^ rk[24];
-  s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^
-       Te3[t0 & 0xff] ^ rk[25];
-  s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^
-       Te3[t1 & 0xff] ^ rk[26];
-  s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^
-       Te3[t2 & 0xff] ^ rk[27];
-  // round 7:
-  t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^
-       Te3[s3 & 0xff] ^ rk[28];
-  t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^
-       Te3[s0 & 0xff] ^ rk[29];
-  t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^
-       Te3[s1 & 0xff] ^ rk[30];
-  t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^
-       Te3[s2 & 0xff] ^ rk[31];
-  // round 8:
-  s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^
-       Te3[t3 & 0xff] ^ rk[32];
-  s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^
-       Te3[t0 & 0xff] ^ rk[33];
-  s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^
-       Te3[t1 & 0xff] ^ rk[34];
-  s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^
-       Te3[t2 & 0xff] ^ rk[35];
-  // round 9:
-  t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^
-       Te3[s3 & 0xff] ^ rk[36];
-  t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^
-       Te3[s0 & 0xff] ^ rk[37];
-  t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^
-       Te3[s1 & 0xff] ^ rk[38];
-  t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^
-       Te3[s2 & 0xff] ^ rk[39];
-  if (key->rounds > 10) {
-    // round 10:
-    s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^
-         Te3[t3 & 0xff] ^ rk[40];
-    s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^
-         Te3[t0 & 0xff] ^ rk[41];
-    s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^
-         Te3[t1 & 0xff] ^ rk[42];
-    s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^
-         Te3[t2 & 0xff] ^ rk[43];
-    // round 11:
-    t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^
-         Te3[s3 & 0xff] ^ rk[44];
-    t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^
-         Te3[s0 & 0xff] ^ rk[45];
-    t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^
-         Te3[s1 & 0xff] ^ rk[46];
-    t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^
-         Te3[s2 & 0xff] ^ rk[47];
-    if (key->rounds > 12) {
-      // round 12:
-      s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^
-           Te3[t3 & 0xff] ^ rk[48];
-      s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^
-           Te3[t0 & 0xff] ^ rk[49];
-      s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^
-           Te3[t1 & 0xff] ^ rk[50];
-      s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^
-           Te3[t2 & 0xff] ^ rk[51];
-      // round 13:
-      t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^
-           Te3[s3 & 0xff] ^ rk[52];
-      t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^
-           Te3[s0 & 0xff] ^ rk[53];
-      t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^
-           Te3[s1 & 0xff] ^ rk[54];
-      t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^
-           Te3[s2 & 0xff] ^ rk[55];
-    }
-  }
-  rk += key->rounds << 2;
-#else  // !FULL_UNROLL
+
   // Nr - 1 full rounds:
   r = key->rounds >> 1;
   for (;;) {
@@ -843,7 +721,7 @@
     s3 = Te0[(t3 >> 24)] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^
          Te3[(t2) & 0xff] ^ rk[3];
   }
-#endif  // ?FULL_UNROLL
+
   //  apply last round and map cipher state to byte array block:
   s0 = (Te2[(t0 >> 24)] & 0xff000000) ^ (Te3[(t1 >> 16) & 0xff] & 0x00ff0000) ^
        (Te0[(t2 >> 8) & 0xff] & 0x0000ff00) ^ (Te1[(t3) & 0xff] & 0x000000ff) ^
@@ -863,12 +741,11 @@
   PUTU32(out + 12, s3);
 }
 
-void AES_decrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
+static void aes_nohw_decrypt(const uint8_t *in, uint8_t *out,
+                             const AES_KEY *key) {
   const uint32_t *rk;
   uint32_t s0, s1, s2, s3, t0, t1, t2, t3;
-#ifndef FULL_UNROLL
   int r;
-#endif  // ?FULL_UNROLL
 
   assert(in && out && key);
   rk = key->rd_key;
@@ -879,130 +756,7 @@
   s1 = GETU32(in + 4) ^ rk[1];
   s2 = GETU32(in + 8) ^ rk[2];
   s3 = GETU32(in + 12) ^ rk[3];
-#ifdef FULL_UNROLL
-  // round 1:
-  t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^
-       Td3[s1 & 0xff] ^ rk[4];
-  t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^
-       Td3[s2 & 0xff] ^ rk[5];
-  t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^
-       Td3[s3 & 0xff] ^ rk[6];
-  t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^
-       Td3[s0 & 0xff] ^ rk[7];
-  // round 2:
-  s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^
-       Td3[t1 & 0xff] ^ rk[8];
-  s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^
-       Td3[t2 & 0xff] ^ rk[9];
-  s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^
-       Td3[t3 & 0xff] ^ rk[10];
-  s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^
-       Td3[t0 & 0xff] ^ rk[11];
-  // round 3:
-  t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^
-       Td3[s1 & 0xff] ^ rk[12];
-  t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^
-       Td3[s2 & 0xff] ^ rk[13];
-  t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^
-       Td3[s3 & 0xff] ^ rk[14];
-  t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^
-       Td3[s0 & 0xff] ^ rk[15];
-  // round 4:
-  s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^
-       Td3[t1 & 0xff] ^ rk[16];
-  s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^
-       Td3[t2 & 0xff] ^ rk[17];
-  s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^
-       Td3[t3 & 0xff] ^ rk[18];
-  s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^
-       Td3[t0 & 0xff] ^ rk[19];
-  // round 5:
-  t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^
-       Td3[s1 & 0xff] ^ rk[20];
-  t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^
-       Td3[s2 & 0xff] ^ rk[21];
-  t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^
-       Td3[s3 & 0xff] ^ rk[22];
-  t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^
-       Td3[s0 & 0xff] ^ rk[23];
-  // round 6:
-  s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^
-       Td3[t1 & 0xff] ^ rk[24];
-  s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^
-       Td3[t2 & 0xff] ^ rk[25];
-  s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^
-       Td3[t3 & 0xff] ^ rk[26];
-  s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^
-       Td3[t0 & 0xff] ^ rk[27];
-  // round 7:
-  t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^
-       Td3[s1 & 0xff] ^ rk[28];
-  t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^
-       Td3[s2 & 0xff] ^ rk[29];
-  t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^
-       Td3[s3 & 0xff] ^ rk[30];
-  t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^
-       Td3[s0 & 0xff] ^ rk[31];
-  // round 8:
-  s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^
-       Td3[t1 & 0xff] ^ rk[32];
-  s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^
-       Td3[t2 & 0xff] ^ rk[33];
-  s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^
-       Td3[t3 & 0xff] ^ rk[34];
-  s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^
-       Td3[t0 & 0xff] ^ rk[35];
-  // round 9:
-  t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^
-       Td3[s1 & 0xff] ^ rk[36];
-  t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^
-       Td3[s2 & 0xff] ^ rk[37];
-  t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^
-       Td3[s3 & 0xff] ^ rk[38];
-  t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^
-       Td3[s0 & 0xff] ^ rk[39];
-  if (key->rounds > 10) {
-    // round 10:
-    s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^
-         Td3[t1 & 0xff] ^ rk[40];
-    s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^
-         Td3[t2 & 0xff] ^ rk[41];
-    s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^
-         Td3[t3 & 0xff] ^ rk[42];
-    s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^
-         Td3[t0 & 0xff] ^ rk[43];
-    // round 11:
-    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^
-         Td3[s1 & 0xff] ^ rk[44];
-    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^
-         Td3[s2 & 0xff] ^ rk[45];
-    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^
-         Td3[s3 & 0xff] ^ rk[46];
-    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^
-         Td3[s0 & 0xff] ^ rk[47];
-    if (key->rounds > 12) {
-      // round 12:
-      s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^
-           Td3[t1 & 0xff] ^ rk[48];
-      s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^
-           Td3[t2 & 0xff] ^ rk[49];
-      s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^
-           Td3[t3 & 0xff] ^ rk[50];
-      s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^
-           Td3[t0 & 0xff] ^ rk[51];
-      // round 13:
-      t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^
-           Td3[s1 & 0xff] ^ rk[52];
-      t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^
-           Td3[s2 & 0xff] ^ rk[53];
-      t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^
-           Td3[s3 & 0xff] ^ rk[54];
-      t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^
-           Td3[s0 & 0xff] ^ rk[55];
-    }
-  }
-  rk += key->rounds << 2;
-#else  // !FULL_UNROLL
+
   // Nr - 1 full rounds:
   r = key->rounds >> 1;
   for (;;) {
@@ -1029,7 +783,7 @@
     s3 = Td0[(t3 >> 24)] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^
          Td3[(t0) & 0xff] ^ rk[3];
   }
-#endif  // ?FULL_UNROLL
+
   // apply last round and
   // map cipher state to byte array block:
   s0 = ((uint32_t)Td4[(t0 >> 24)] << 24) ^
@@ -1054,47 +808,52 @@
   PUTU32(out + 12, s3);
 }
 
-#else
+#else  // NO_ASM || (!X86 && !X86_64 && !ARM)
 
-// In this case several functions are provided by asm code. However, one cannot
-// control asm symbol visibility with command line flags and such so they are
-// always hidden and wrapped by these C functions, which can be so
-// controlled.
+// If not implemented in C, these functions will be provided by assembly code.
+void aes_nohw_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
+void aes_nohw_decrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
+int aes_nohw_set_encrypt_key(const uint8_t *key, unsigned bits,
+                             AES_KEY *aeskey);
+int aes_nohw_set_decrypt_key(const uint8_t *key, unsigned bits,
+                             AES_KEY *aeskey);
 
-void asm_AES_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
+#endif
+
+// Be aware that on x86(-64), the |aes_nohw_*| functions are incompatible with
+// the aes_hw_* functions. The latter set |AES_KEY.rounds| to one less than the
+// true value, which breaks the former. Therefore the two functions cannot mix.
+// Also, on Aarch64, the plain-C code, above, is incompatible with the
+// |aes_hw_*| functions.
+
 void AES_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
   if (hwaes_capable()) {
     aes_hw_encrypt(in, out, key);
   } else {
-    asm_AES_encrypt(in, out, key);
+    aes_nohw_encrypt(in, out, key);
   }
 }
 
-void asm_AES_decrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
 void AES_decrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
   if (hwaes_capable()) {
     aes_hw_decrypt(in, out, key);
   } else {
-    asm_AES_decrypt(in, out, key);
+    aes_nohw_decrypt(in, out, key);
   }
 }
 
-int asm_AES_set_encrypt_key(const uint8_t *key, unsigned bits, AES_KEY *aeskey);
 int AES_set_encrypt_key(const uint8_t *key, unsigned bits, AES_KEY *aeskey) {
   if (hwaes_capable()) {
     return aes_hw_set_encrypt_key(key, bits, aeskey);
   } else {
-    return asm_AES_set_encrypt_key(key, bits, aeskey);
+    return aes_nohw_set_encrypt_key(key, bits, aeskey);
   }
 }
 
-int asm_AES_set_decrypt_key(const uint8_t *key, unsigned bits, AES_KEY *aeskey);
 int AES_set_decrypt_key(const uint8_t *key, unsigned bits, AES_KEY *aeskey) {
   if (hwaes_capable()) {
     return aes_hw_set_decrypt_key(key, bits, aeskey);
   } else {
-    return asm_AES_set_decrypt_key(key, bits, aeskey);
+    return aes_nohw_set_decrypt_key(key, bits, aeskey);
   }
 }
-
-#endif  // OPENSSL_NO_ASM || (!OPENSSL_X86 && !OPENSSL_X86_64 && !OPENSSL_ARM)
diff --git a/src/crypto/fipsmodule/aes/aes_test.cc b/src/crypto/fipsmodule/aes/aes_test.cc
index 73ae255..ccec597 100644
--- a/src/crypto/fipsmodule/aes/aes_test.cc
+++ b/src/crypto/fipsmodule/aes/aes_test.cc
@@ -13,6 +13,7 @@
  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
 
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
 
 #include <memory>
@@ -25,6 +26,7 @@
 #include "../../internal.h"
 #include "../../test/file_test.h"
 #include "../../test/test_util.h"
+#include "../../test/wycheproof_util.h"
 
 
 static void TestRaw(FileTest *t) {
@@ -129,3 +131,58 @@
     }
   });
 }
+
+TEST(AESTest, WycheproofKeyWrap) {
+  FileTestGTest("third_party/wycheproof_testvectors/kw_test.txt",
+                [](FileTest *t) {
+    std::string key_size;
+    ASSERT_TRUE(t->GetInstruction(&key_size, "keySize"));
+    std::vector<uint8_t> ct, key, msg;
+    ASSERT_TRUE(t->GetBytes(&ct, "ct"));
+    ASSERT_TRUE(t->GetBytes(&key, "key"));
+    ASSERT_TRUE(t->GetBytes(&msg, "msg"));
+    ASSERT_EQ(static_cast<unsigned>(atoi(key_size.c_str())), key.size() * 8);
+    WycheproofResult result;
+    ASSERT_TRUE(GetWycheproofResult(t, &result));
+
+    if (result != WycheproofResult::kInvalid) {
+      ASSERT_GE(ct.size(), 8u);
+
+      AES_KEY aes;
+      ASSERT_EQ(0, AES_set_decrypt_key(key.data(), 8 * key.size(), &aes));
+      std::vector<uint8_t> out(ct.size() - 8);
+      int len = AES_unwrap_key(&aes, nullptr, out.data(), ct.data(), ct.size());
+      ASSERT_EQ(static_cast<int>(out.size()), len);
+      EXPECT_EQ(Bytes(msg), Bytes(out));
+
+      out.resize(msg.size() + 8);
+      ASSERT_EQ(0, AES_set_encrypt_key(key.data(), 8 * key.size(), &aes));
+      len = AES_wrap_key(&aes, nullptr, out.data(), msg.data(), msg.size());
+      ASSERT_EQ(static_cast<int>(out.size()), len);
+      EXPECT_EQ(Bytes(ct), Bytes(out));
+    } else {
+      AES_KEY aes;
+      ASSERT_EQ(0, AES_set_decrypt_key(key.data(), 8 * key.size(), &aes));
+      std::vector<uint8_t> out(ct.size() < 8 ? 0 : ct.size() - 8);
+      int len = AES_unwrap_key(&aes, nullptr, out.data(), ct.data(), ct.size());
+      EXPECT_EQ(-1, len);
+    }
+  });
+}
+
+TEST(AESTest, WrapBadLengths) {
+  uint8_t key[128/8] = {0};
+  AES_KEY aes;
+  ASSERT_EQ(0, AES_set_encrypt_key(key, 128, &aes));
+
+  // Input lengths to |AES_wrap_key| must be a multiple of 8 and at least 16.
+  static const size_t kLengths[] = {0, 1,  2,  3,  4,  5,  6,  7, 8,
+                                    9, 10, 11, 12, 13, 14, 15, 20};
+  for (size_t len : kLengths) {
+    SCOPED_TRACE(len);
+    std::vector<uint8_t> in(len);
+    std::vector<uint8_t> out(len + 8);
+    EXPECT_EQ(-1,
+              AES_wrap_key(&aes, nullptr, out.data(), in.data(), in.size()));
+  }
+}
diff --git a/src/crypto/fipsmodule/aes/asm/aes-586.pl b/src/crypto/fipsmodule/aes/asm/aes-586.pl
index 09483d9..25f1813 100755
--- a/src/crypto/fipsmodule/aes/asm/aes-586.pl
+++ b/src/crypto/fipsmodule/aes/asm/aes-586.pl
@@ -1172,8 +1172,8 @@
 	&data_word(0x00000000, 0x00000000, 0x00000000, 0x00000000);
 &function_end_B("_x86_AES_encrypt");
 
-# void asm_AES_encrypt (const void *inp,void *out,const AES_KEY *key);
-&function_begin("asm_AES_encrypt");
+# void aes_nohw_encrypt (const void *inp,void *out,const AES_KEY *key);
+&function_begin("aes_nohw_encrypt");
 	&mov	($acc,&wparam(0));		# load inp
 	&mov	($key,&wparam(2));		# load key
 
@@ -1229,7 +1229,7 @@
 	&mov	(&DWP(4,$acc),$s1);
 	&mov	(&DWP(8,$acc),$s2);
 	&mov	(&DWP(12,$acc),$s3);
-&function_end("asm_AES_encrypt");
+&function_end("aes_nohw_encrypt");
 
 #--------------------------------------------------------------------#
 
@@ -1963,8 +1963,8 @@
 	&data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d);
 &function_end_B("_x86_AES_decrypt");
 
-# void asm_AES_decrypt (const void *inp,void *out,const AES_KEY *key);
-&function_begin("asm_AES_decrypt");
+# void aes_nohw_decrypt (const void *inp,void *out,const AES_KEY *key);
+&function_begin("aes_nohw_decrypt");
 	&mov	($acc,&wparam(0));		# load inp
 	&mov	($key,&wparam(2));		# load key
 
@@ -2020,9 +2020,9 @@
 	&mov	(&DWP(4,$acc),$s1);
 	&mov	(&DWP(8,$acc),$s2);
 	&mov	(&DWP(12,$acc),$s3);
-&function_end("asm_AES_decrypt");
+&function_end("aes_nohw_decrypt");
 
-# void asm_AES_cbc_encrypt (const void char *inp, unsigned char *out,
+# void aes_nohw_cbc_encrypt (const void char *inp, unsigned char *out,
 #			    size_t length, const AES_KEY *key,
 #			    unsigned char *ivp,const int enc);
 {
@@ -2047,7 +2047,7 @@
 my $aes_key=&DWP(76,"esp");	# copy of aes_key
 my $mark=&DWP(76+240,"esp");	# copy of aes_key->rounds
 
-&function_begin("asm_AES_cbc_encrypt");
+&function_begin("aes_nohw_cbc_encrypt");
 	&mov	($s2 eq "ecx"? $s2 : "",&wparam(2));	# load len
 	&cmp	($s2,0);
 	&je	(&label("drop_out"));
@@ -2638,7 +2638,7 @@
 
 	&mov	("esp",$_esp);
 	&popf	();
-&function_end("asm_AES_cbc_encrypt");
+&function_end("aes_nohw_cbc_encrypt");
 }
 
 #------------------------------------------------------------------#
@@ -2872,12 +2872,12 @@
     &set_label("exit");
 &function_end("_x86_AES_set_encrypt_key");
 
-# int asm_AES_set_encrypt_key(const unsigned char *userKey, const int bits,
-#                             AES_KEY *key)
-&function_begin_B("asm_AES_set_encrypt_key");
+# int aes_nohw_set_encrypt_key(const unsigned char *userKey, const int bits,
+#                              AES_KEY *key)
+&function_begin_B("aes_nohw_set_encrypt_key");
 	&call	("_x86_AES_set_encrypt_key");
 	&ret	();
-&function_end_B("asm_AES_set_encrypt_key");
+&function_end_B("aes_nohw_set_encrypt_key");
 
 sub deckey()
 { my ($i,$key,$tp1,$tp2,$tp4,$tp8) = @_;
@@ -2934,9 +2934,9 @@
 	&mov	(&DWP(4*$i,$key),$tp1);
 }
 
-# int asm_AES_set_decrypt_key(const unsigned char *userKey, const int bits,
-#                             AES_KEY *key)
-&function_begin_B("asm_AES_set_decrypt_key");
+# int aes_nohw_set_decrypt_key(const unsigned char *userKey, const int bits,
+#                              AES_KEY *key)
+&function_begin_B("aes_nohw_set_decrypt_key");
 	&call	("_x86_AES_set_encrypt_key");
 	&cmp	("eax",0);
 	&je	(&label("proceed"));
@@ -2992,7 +2992,7 @@
 	&jb	(&label("permute"));
 
 	&xor	("eax","eax");			# return success
-&function_end("asm_AES_set_decrypt_key");
+&function_end("aes_nohw_set_decrypt_key");
 &asciz("AES for x86, CRYPTOGAMS by <appro\@openssl.org>");
 
 &asm_finish();
diff --git a/src/crypto/fipsmodule/aes/asm/aes-armv4.pl b/src/crypto/fipsmodule/aes/asm/aes-armv4.pl
index 4b507dd..f4ae922 100644
--- a/src/crypto/fipsmodule/aes/asm/aes-armv4.pl
+++ b/src/crypto/fipsmodule/aes/asm/aes-armv4.pl
@@ -196,14 +196,14 @@
 .word	0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
 .size	AES_Te,.-AES_Te
 
-@ void asm_AES_encrypt(const unsigned char *in, unsigned char *out,
-@ 		       const AES_KEY *key) {
-.global asm_AES_encrypt
-.type   asm_AES_encrypt,%function
+@ void aes_nohw_encrypt(const unsigned char *in, unsigned char *out,
+@ 		                  const AES_KEY *key) {
+.global aes_nohw_encrypt
+.type   aes_nohw_encrypt,%function
 .align	5
-asm_AES_encrypt:
+aes_nohw_encrypt:
 #ifndef	__thumb2__
-	sub	r3,pc,#8		@ asm_AES_encrypt
+	sub	r3,pc,#8		@ aes_nohw_encrypt
 #else
 	adr	r3,.
 #endif
@@ -211,7 +211,7 @@
 #if defined(__thumb2__) || defined(__APPLE__)
 	adr	$tbl,AES_Te
 #else
-	sub	$tbl,r3,#asm_AES_encrypt-AES_Te	@ Te
+	sub	$tbl,r3,#aes_nohw_encrypt-AES_Te	@ Te
 #endif
 	mov	$rounds,r0		@ inp
 	mov	$key,r2
@@ -308,7 +308,7 @@
 	moveq	pc,lr			@ be binary compatible with V4, yet
 	bx	lr			@ interoperable with Thumb ISA:-)
 #endif
-.size	asm_AES_encrypt,.-asm_AES_encrypt
+.size	aes_nohw_encrypt,.-aes_nohw_encrypt
 
 .type   _armv4_AES_encrypt,%function
 .align	2
@@ -447,13 +447,13 @@
 	ldr	pc,[sp],#4		@ pop and return
 .size	_armv4_AES_encrypt,.-_armv4_AES_encrypt
 
-.global asm_AES_set_encrypt_key
-.type   asm_AES_set_encrypt_key,%function
+.global aes_nohw_set_encrypt_key
+.type   aes_nohw_set_encrypt_key,%function
 .align	5
-asm_AES_set_encrypt_key:
+aes_nohw_set_encrypt_key:
 _armv4_AES_set_encrypt_key:
 #ifndef	__thumb2__
-	sub	r3,pc,#8		@ asm_AES_set_encrypt_key
+	sub	r3,pc,#8		@ aes_nohw_set_encrypt_key
 #else
 	adr	r3,.
 #endif
@@ -751,22 +751,22 @@
 	moveq	pc,lr			@ be binary compatible with V4, yet
 	bx	lr			@ interoperable with Thumb ISA:-)
 #endif
-.size	asm_AES_set_encrypt_key,.-asm_AES_set_encrypt_key
+.size	aes_nohw_set_encrypt_key,.-aes_nohw_set_encrypt_key
 
-.global asm_AES_set_decrypt_key
-.type   asm_AES_set_decrypt_key,%function
+.global aes_nohw_set_decrypt_key
+.type   aes_nohw_set_decrypt_key,%function
 .align	5
-asm_AES_set_decrypt_key:
+aes_nohw_set_decrypt_key:
 	str	lr,[sp,#-4]!            @ push lr
 	bl	_armv4_AES_set_encrypt_key
 	teq	r0,#0
 	ldr	lr,[sp],#4              @ pop lr
 	bne	.Labrt
 
-	mov	r0,r2			@ asm_AES_set_encrypt_key preserves r2,
+	mov	r0,r2			@ aes_nohw_set_encrypt_key preserves r2,
 	mov	r1,r2			@ which is AES_KEY *key
 	b	_armv4_AES_set_enc2dec_key
-.size	asm_AES_set_decrypt_key,.-asm_AES_set_decrypt_key
+.size	aes_nohw_set_decrypt_key,.-aes_nohw_set_decrypt_key
 
 @ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out)
 .global	AES_set_enc2dec_key
@@ -972,14 +972,14 @@
 .byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
 .size	AES_Td,.-AES_Td
 
-@ void asm_AES_decrypt(const unsigned char *in, unsigned char *out,
-@ 		       const AES_KEY *key) {
-.global asm_AES_decrypt
-.type   asm_AES_decrypt,%function
+@ void aes_nohw_decrypt(const unsigned char *in, unsigned char *out,
+@ 		                  const AES_KEY *key) {
+.global aes_nohw_decrypt
+.type   aes_nohw_decrypt,%function
 .align	5
-asm_AES_decrypt:
+aes_nohw_decrypt:
 #ifndef	__thumb2__
-	sub	r3,pc,#8		@ asm_AES_decrypt
+	sub	r3,pc,#8		@ aes_nohw_decrypt
 #else
 	adr	r3,.
 #endif
@@ -987,7 +987,7 @@
 #if defined(__thumb2__) || defined(__APPLE__)
 	adr	$tbl,AES_Td
 #else
-	sub	$tbl,r3,#asm_AES_decrypt-AES_Td	@ Td
+	sub	$tbl,r3,#aes_nohw_decrypt-AES_Td	@ Td
 #endif
 	mov	$rounds,r0		@ inp
 	mov	$key,r2
@@ -1084,7 +1084,7 @@
 	moveq	pc,lr			@ be binary compatible with V4, yet
 	bx	lr			@ interoperable with Thumb ISA:-)
 #endif
-.size	asm_AES_decrypt,.-asm_AES_decrypt
+.size	aes_nohw_decrypt,.-aes_nohw_decrypt
 
 .type   _armv4_AES_decrypt,%function
 .align	2
diff --git a/src/crypto/fipsmodule/aes/asm/aes-x86_64.pl b/src/crypto/fipsmodule/aes/asm/aes-x86_64.pl
index 8b74ef0..57c4e08 100755
--- a/src/crypto/fipsmodule/aes/asm/aes-x86_64.pl
+++ b/src/crypto/fipsmodule/aes/asm/aes-x86_64.pl
@@ -590,13 +590,13 @@
 .size	_x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact
 ___
 
-# void asm_AES_encrypt (const void *inp,void *out,const AES_KEY *key);
+# void aes_nohw_encrypt (const void *inp,void *out,const AES_KEY *key);
 $code.=<<___;
 .align	16
-.globl	asm_AES_encrypt
-.type	asm_AES_encrypt,\@function,3
-.hidden	asm_AES_encrypt
-asm_AES_encrypt:
+.globl	aes_nohw_encrypt
+.type	aes_nohw_encrypt,\@function,3
+.hidden	aes_nohw_encrypt
+aes_nohw_encrypt:
 .cfi_startproc
 	mov	%rsp,%rax
 .cfi_def_cfa_register	%rax
@@ -674,7 +674,7 @@
 .Lenc_epilogue:
 	ret
 .cfi_endproc
-.size	asm_AES_encrypt,.-asm_AES_encrypt
+.size	aes_nohw_encrypt,.-aes_nohw_encrypt
 ___
 
 #------------------------------------------------------------------#
@@ -1204,13 +1204,13 @@
 .size	_x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact
 ___
 
-# void asm_AES_decrypt (const void *inp,void *out,const AES_KEY *key);
+# void aes_nohw_decrypt (const void *inp,void *out,const AES_KEY *key);
 $code.=<<___;
 .align	16
-.globl	asm_AES_decrypt
-.type	asm_AES_decrypt,\@function,3
-.hidden	asm_AES_decrypt
-asm_AES_decrypt:
+.globl	aes_nohw_decrypt
+.type	aes_nohw_decrypt,\@function,3
+.hidden	aes_nohw_decrypt
+aes_nohw_decrypt:
 .cfi_startproc
 	mov	%rsp,%rax
 .cfi_def_cfa_register	%rax
@@ -1290,7 +1290,7 @@
 .Ldec_epilogue:
 	ret
 .cfi_endproc
-.size	asm_AES_decrypt,.-asm_AES_decrypt
+.size	aes_nohw_decrypt,.-aes_nohw_decrypt
 ___
 #------------------------------------------------------------------#
 
@@ -1321,12 +1321,12 @@
 ___
 }
 
-# int asm_AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key)
+# int aes_nohw_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key)
 $code.=<<___;
 .align	16
-.globl asm_AES_set_encrypt_key
-.type  asm_AES_set_encrypt_key,\@function,3
-asm_AES_set_encrypt_key:
+.globl aes_nohw_set_encrypt_key
+.type  aes_nohw_set_encrypt_key,\@function,3
+aes_nohw_set_encrypt_key:
 .cfi_startproc
 	push	%rbx
 .cfi_push	%rbx
@@ -1355,7 +1355,7 @@
 .Lenc_key_epilogue:
 	ret
 .cfi_endproc
-.size asm_AES_set_encrypt_key,.-asm_AES_set_encrypt_key
+.size aes_nohw_set_encrypt_key,.-aes_nohw_set_encrypt_key
 
 .type	_x86_64_AES_set_encrypt_key,\@abi-omnipotent
 .align	16
@@ -1598,12 +1598,12 @@
 ___
 }
 
-# int asm_AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key)
+# int aes_nohw_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key)
 $code.=<<___;
 .align	16
-.globl asm_AES_set_decrypt_key
-.type  asm_AES_set_decrypt_key,\@function,3
-asm_AES_set_decrypt_key:
+.globl aes_nohw_set_decrypt_key
+.type  aes_nohw_set_decrypt_key,\@function,3
+aes_nohw_set_decrypt_key:
 .cfi_startproc
 	push	%rbx
 .cfi_push	%rbx
@@ -1688,12 +1688,12 @@
 .Ldec_key_epilogue:
 	ret
 .cfi_endproc
-.size	asm_AES_set_decrypt_key,.-asm_AES_set_decrypt_key
+.size	aes_nohw_set_decrypt_key,.-aes_nohw_set_decrypt_key
 ___
 
-# void asm_AES_cbc_encrypt (const void char *inp, unsigned char *out,
-#			    size_t length, const AES_KEY *key,
-#			    unsigned char *ivp,const int enc);
+# void aes_nohw_cbc_encrypt (const void char *inp, unsigned char *out,
+#			                       size_t length, const AES_KEY *key,
+#			                       unsigned char *ivp,const int enc);
 {
 # stack frame layout
 # -8(%rsp)		return address
@@ -1711,11 +1711,11 @@
 
 $code.=<<___;
 .align	16
-.globl	asm_AES_cbc_encrypt
-.type	asm_AES_cbc_encrypt,\@function,6
+.globl	aes_nohw_cbc_encrypt
+.type	aes_nohw_cbc_encrypt,\@function,6
 .extern	OPENSSL_ia32cap_P
-.hidden	asm_AES_cbc_encrypt
-asm_AES_cbc_encrypt:
+.hidden	aes_nohw_cbc_encrypt
+aes_nohw_cbc_encrypt:
 .cfi_startproc
 	cmp	\$0,%rdx	# check length
 	je	.Lcbc_epilogue
@@ -2184,7 +2184,7 @@
 .Lcbc_epilogue:
 	ret
 .cfi_endproc
-.size	asm_AES_cbc_encrypt,.-asm_AES_cbc_encrypt
+.size	aes_nohw_cbc_encrypt,.-aes_nohw_cbc_encrypt
 ___
 }
 
@@ -2844,45 +2844,45 @@
 
 .section	.pdata
 .align	4
-	.rva	.LSEH_begin_asm_AES_encrypt
-	.rva	.LSEH_end_asm_AES_encrypt
-	.rva	.LSEH_info_asm_AES_encrypt
+	.rva	.LSEH_begin_aes_nohw_encrypt
+	.rva	.LSEH_end_aes_nohw_encrypt
+	.rva	.LSEH_info_aes_nohw_encrypt
 
-	.rva	.LSEH_begin_asm_AES_decrypt
-	.rva	.LSEH_end_asm_AES_decrypt
-	.rva	.LSEH_info_asm_AES_decrypt
+	.rva	.LSEH_begin_aes_nohw_decrypt
+	.rva	.LSEH_end_aes_nohw_decrypt
+	.rva	.LSEH_info_aes_nohw_decrypt
 
-	.rva	.LSEH_begin_asm_AES_set_encrypt_key
-	.rva	.LSEH_end_asm_AES_set_encrypt_key
-	.rva	.LSEH_info_asm_AES_set_encrypt_key
+	.rva	.LSEH_begin_aes_nohw_set_encrypt_key
+	.rva	.LSEH_end_aes_nohw_set_encrypt_key
+	.rva	.LSEH_info_aes_nohw_set_encrypt_key
 
-	.rva	.LSEH_begin_asm_AES_set_decrypt_key
-	.rva	.LSEH_end_asm_AES_set_decrypt_key
-	.rva	.LSEH_info_asm_AES_set_decrypt_key
+	.rva	.LSEH_begin_aes_nohw_set_decrypt_key
+	.rva	.LSEH_end_aes_nohw_set_decrypt_key
+	.rva	.LSEH_info_aes_nohw_set_decrypt_key
 
-	.rva	.LSEH_begin_asm_AES_cbc_encrypt
-	.rva	.LSEH_end_asm_AES_cbc_encrypt
-	.rva	.LSEH_info_asm_AES_cbc_encrypt
+	.rva	.LSEH_begin_aes_nohw_cbc_encrypt
+	.rva	.LSEH_end_aes_nohw_cbc_encrypt
+	.rva	.LSEH_info_aes_nohw_cbc_encrypt
 
 .section	.xdata
 .align	8
-.LSEH_info_asm_AES_encrypt:
+.LSEH_info_aes_nohw_encrypt:
 	.byte	9,0,0,0
 	.rva	block_se_handler
 	.rva	.Lenc_prologue,.Lenc_epilogue	# HandlerData[]
-.LSEH_info_asm_AES_decrypt:
+.LSEH_info_aes_nohw_decrypt:
 	.byte	9,0,0,0
 	.rva	block_se_handler
 	.rva	.Ldec_prologue,.Ldec_epilogue	# HandlerData[]
-.LSEH_info_asm_AES_set_encrypt_key:
+.LSEH_info_aes_nohw_set_encrypt_key:
 	.byte	9,0,0,0
 	.rva	key_se_handler
 	.rva	.Lenc_key_prologue,.Lenc_key_epilogue	# HandlerData[]
-.LSEH_info_asm_AES_set_decrypt_key:
+.LSEH_info_aes_nohw_set_decrypt_key:
 	.byte	9,0,0,0
 	.rva	key_se_handler
 	.rva	.Ldec_key_prologue,.Ldec_key_epilogue	# HandlerData[]
-.LSEH_info_asm_AES_cbc_encrypt:
+.LSEH_info_aes_nohw_cbc_encrypt:
 	.byte	9,0,0,0
 	.rva	cbc_se_handler
 ___
diff --git a/src/crypto/fipsmodule/aes/asm/aesni-x86_64.pl b/src/crypto/fipsmodule/aes/asm/aesni-x86_64.pl
index a186941..6545274 100644
--- a/src/crypto/fipsmodule/aes/asm/aesni-x86_64.pl
+++ b/src/crypto/fipsmodule/aes/asm/aesni-x86_64.pl
@@ -188,7 +188,7 @@
 #	incurred by operations on %xmm8-15. As ECB is not considered
 #	critical, nothing was done to mitigate the problem.
 
-$PREFIX="aesni";	# if $PREFIX is set to "AES", the script
+$PREFIX="aes_hw";	# if $PREFIX is set to "AES", the script
 			# generates drop-in replacement for
 			# crypto/aes/asm/aes-x86_64.pl:-)
 
@@ -206,7 +206,7 @@
 open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
 *STDOUT=*OUT;
 
-$movkey = $PREFIX eq "aesni" ? "movups" : "movups";
+$movkey = $PREFIX eq "aes_hw" ? "movups" : "movups";
 @_4args=$win64?	("%rcx","%rdx","%r8", "%r9") :	# Win64 order
 		("%rdi","%rsi","%rdx","%rcx");	# Unix order
 
@@ -577,27 +577,27 @@
 .size	_aesni_${dir}rypt8,.-_aesni_${dir}rypt8
 ___
 }
-&aesni_generate2("enc") if ($PREFIX eq "aesni");
+&aesni_generate2("enc") if ($PREFIX eq "aes_hw");
 &aesni_generate2("dec");
-&aesni_generate3("enc") if ($PREFIX eq "aesni");
+&aesni_generate3("enc") if ($PREFIX eq "aes_hw");
 &aesni_generate3("dec");
-&aesni_generate4("enc") if ($PREFIX eq "aesni");
+&aesni_generate4("enc") if ($PREFIX eq "aes_hw");
 &aesni_generate4("dec");
-&aesni_generate6("enc") if ($PREFIX eq "aesni");
+&aesni_generate6("enc") if ($PREFIX eq "aes_hw");
 &aesni_generate6("dec");
-&aesni_generate8("enc") if ($PREFIX eq "aesni");
+&aesni_generate8("enc") if ($PREFIX eq "aes_hw");
 &aesni_generate8("dec");
 
-if ($PREFIX eq "aesni") {
+if ($PREFIX eq "aes_hw") {
 ########################################################################
 # void aesni_ecb_encrypt (const void *in, void *out,
 #			  size_t length, const AES_KEY *key,
 #			  int enc);
 $code.=<<___;
-.globl	aesni_ecb_encrypt
-.type	aesni_ecb_encrypt,\@function,5
+.globl	${PREFIX}_ecb_encrypt
+.type	${PREFIX}_ecb_encrypt,\@function,5
 .align	16
-aesni_ecb_encrypt:
+${PREFIX}_ecb_encrypt:
 ___
 $code.=<<___ if ($win64);
 	lea	-0x58(%rsp),%rsp
@@ -943,7 +943,7 @@
 ___
 $code.=<<___;
 	ret
-.size	aesni_ecb_encrypt,.-aesni_ecb_encrypt
+.size	${PREFIX}_ecb_encrypt,.-${PREFIX}_ecb_encrypt
 ___
 
 {
@@ -964,10 +964,10 @@
 my $bswap_mask="%xmm7";
 
 $code.=<<___;
-.globl	aesni_ccm64_encrypt_blocks
-.type	aesni_ccm64_encrypt_blocks,\@function,6
+.globl	${PREFIX}_ccm64_encrypt_blocks
+.type	${PREFIX}_ccm64_encrypt_blocks,\@function,6
 .align	16
-aesni_ccm64_encrypt_blocks:
+${PREFIX}_ccm64_encrypt_blocks:
 ___
 $code.=<<___ if ($win64);
 	lea	-0x58(%rsp),%rsp
@@ -1050,14 +1050,14 @@
 ___
 $code.=<<___;
 	ret
-.size	aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks
+.size	${PREFIX}_ccm64_encrypt_blocks,.-${PREFIX}_ccm64_encrypt_blocks
 ___
 ######################################################################
 $code.=<<___;
-.globl	aesni_ccm64_decrypt_blocks
-.type	aesni_ccm64_decrypt_blocks,\@function,6
+.globl	${PREFIX}_ccm64_decrypt_blocks
+.type	${PREFIX}_ccm64_decrypt_blocks,\@function,6
 .align	16
-aesni_ccm64_decrypt_blocks:
+${PREFIX}_ccm64_decrypt_blocks:
 ___
 $code.=<<___ if ($win64);
 	lea	-0x58(%rsp),%rsp
@@ -1157,7 +1157,7 @@
 ___
 $code.=<<___;
 	ret
-.size	aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks
+.size	${PREFIX}_ccm64_decrypt_blocks,.-${PREFIX}_ccm64_decrypt_blocks
 ___
 }
 ######################################################################
@@ -1178,10 +1178,10 @@
 my $frame_size = 0x80 + ($win64?160:0);
 
 $code.=<<___;
-.globl	aesni_ctr32_encrypt_blocks
-.type	aesni_ctr32_encrypt_blocks,\@function,5
+.globl	${PREFIX}_ctr32_encrypt_blocks
+.type	${PREFIX}_ctr32_encrypt_blocks,\@function,5
 .align	16
-aesni_ctr32_encrypt_blocks:
+${PREFIX}_ctr32_encrypt_blocks:
 .cfi_startproc
 	cmp	\$1,$len
 	jne	.Lctr32_bulk
@@ -1734,7 +1734,7 @@
 .Lctr32_epilogue:
 	ret
 .cfi_endproc
-.size	aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
+.size	${PREFIX}_ctr32_encrypt_blocks,.-${PREFIX}_ctr32_encrypt_blocks
 ___
 }
 
@@ -1751,10 +1751,10 @@
 my $key_ = "%rbp";	# override so that we can use %r11 as FP
 
 $code.=<<___;
-.globl	aesni_xts_encrypt
-.type	aesni_xts_encrypt,\@function,6
+.globl	${PREFIX}_xts_encrypt
+.type	${PREFIX}_xts_encrypt,\@function,6
 .align	16
-aesni_xts_encrypt:
+${PREFIX}_xts_encrypt:
 .cfi_startproc
 	lea	(%rsp),%r11			# frame pointer
 .cfi_def_cfa_register	%r11
@@ -2230,14 +2230,14 @@
 .Lxts_enc_epilogue:
 	ret
 .cfi_endproc
-.size	aesni_xts_encrypt,.-aesni_xts_encrypt
+.size	${PREFIX}_xts_encrypt,.-${PREFIX}_xts_encrypt
 ___
 
 $code.=<<___;
-.globl	aesni_xts_decrypt
-.type	aesni_xts_decrypt,\@function,6
+.globl	${PREFIX}_xts_decrypt
+.type	${PREFIX}_xts_decrypt,\@function,6
 .align	16
-aesni_xts_decrypt:
+${PREFIX}_xts_decrypt:
 .cfi_startproc
 	lea	(%rsp),%r11			# frame pointer
 .cfi_def_cfa_register	%r11
@@ -2739,7 +2739,7 @@
 .Lxts_dec_epilogue:
 	ret
 .cfi_endproc
-.size	aesni_xts_decrypt,.-aesni_xts_decrypt
+.size	${PREFIX}_xts_decrypt,.-${PREFIX}_xts_decrypt
 ___
 }
 
@@ -2759,10 +2759,10 @@
 my $blocks = $len;
 
 $code.=<<___;
-.globl	aesni_ocb_encrypt
-.type	aesni_ocb_encrypt,\@function,6
+.globl	${PREFIX}_ocb_encrypt
+.type	${PREFIX}_ocb_encrypt,\@function,6
 .align	32
-aesni_ocb_encrypt:
+${PREFIX}_ocb_encrypt:
 .cfi_startproc
 	lea	(%rsp),%rax
 	push	%rbx
@@ -3011,7 +3011,7 @@
 .Locb_enc_epilogue:
 	ret
 .cfi_endproc
-.size	aesni_ocb_encrypt,.-aesni_ocb_encrypt
+.size	${PREFIX}_ocb_encrypt,.-${PREFIX}_ocb_encrypt
 
 .type	__ocb_encrypt6,\@abi-omnipotent
 .align	32
@@ -3219,10 +3219,10 @@
 	ret
 .size	__ocb_encrypt1,.-__ocb_encrypt1
 
-.globl	aesni_ocb_decrypt
-.type	aesni_ocb_decrypt,\@function,6
+.globl	${PREFIX}_ocb_decrypt
+.type	${PREFIX}_ocb_decrypt,\@function,6
 .align	32
-aesni_ocb_decrypt:
+${PREFIX}_ocb_decrypt:
 .cfi_startproc
 	lea	(%rsp),%rax
 	push	%rbx
@@ -3493,7 +3493,7 @@
 .Locb_dec_epilogue:
 	ret
 .cfi_endproc
-.size	aesni_ocb_decrypt,.-aesni_ocb_decrypt
+.size	${PREFIX}_ocb_decrypt,.-${PREFIX}_ocb_decrypt
 
 .type	__ocb_decrypt6,\@abi-omnipotent
 .align	32
@@ -4736,7 +4736,7 @@
 $code.=<<___;
 .extern	__imp_RtlVirtualUnwind
 ___
-$code.=<<___ if ($PREFIX eq "aesni");
+$code.=<<___ if ($PREFIX eq "aes_hw");
 .type	ecb_ccm64_se_handler,\@abi-omnipotent
 .align	16
 ecb_ccm64_se_handler:
@@ -4776,7 +4776,7 @@
 	lea	0x58(%rax),%rax		# adjust stack pointer
 
 	jmp	.Lcommon_seh_tail
-.size	ecb_ccm64_se_handler,.-ecb_ccm64_se_handler
+.size	${PREFIX}_ccm64_se_handler,.-${PREFIX}_ccm64_se_handler
 
 .type	ctr_xts_se_handler,\@abi-omnipotent
 .align	16
@@ -4968,37 +4968,37 @@
 .section	.pdata
 .align	4
 ___
-$code.=<<___ if ($PREFIX eq "aesni");
-	.rva	.LSEH_begin_aesni_ecb_encrypt
-	.rva	.LSEH_end_aesni_ecb_encrypt
+$code.=<<___ if ($PREFIX eq "aes_hw");
+	.rva	.LSEH_begin_${PREFIX}_ecb_encrypt
+	.rva	.LSEH_end_${PREFIX}_ecb_encrypt
 	.rva	.LSEH_info_ecb
 
-	.rva	.LSEH_begin_aesni_ccm64_encrypt_blocks
-	.rva	.LSEH_end_aesni_ccm64_encrypt_blocks
+	.rva	.LSEH_begin_${PREFIX}_ccm64_encrypt_blocks
+	.rva	.LSEH_end_${PREFIX}_ccm64_encrypt_blocks
 	.rva	.LSEH_info_ccm64_enc
 
-	.rva	.LSEH_begin_aesni_ccm64_decrypt_blocks
-	.rva	.LSEH_end_aesni_ccm64_decrypt_blocks
+	.rva	.LSEH_begin_${PREFIX}_ccm64_decrypt_blocks
+	.rva	.LSEH_end_${PREFIX}_ccm64_decrypt_blocks
 	.rva	.LSEH_info_ccm64_dec
 
-	.rva	.LSEH_begin_aesni_ctr32_encrypt_blocks
-	.rva	.LSEH_end_aesni_ctr32_encrypt_blocks
+	.rva	.LSEH_begin_${PREFIX}_ctr32_encrypt_blocks
+	.rva	.LSEH_end_${PREFIX}_ctr32_encrypt_blocks
 	.rva	.LSEH_info_ctr32
 
-	.rva	.LSEH_begin_aesni_xts_encrypt
-	.rva	.LSEH_end_aesni_xts_encrypt
+	.rva	.LSEH_begin_${PREFIX}_xts_encrypt
+	.rva	.LSEH_end_${PREFIX}_xts_encrypt
 	.rva	.LSEH_info_xts_enc
 
-	.rva	.LSEH_begin_aesni_xts_decrypt
-	.rva	.LSEH_end_aesni_xts_decrypt
+	.rva	.LSEH_begin_${PREFIX}_xts_decrypt
+	.rva	.LSEH_end_${PREFIX}_xts_decrypt
 	.rva	.LSEH_info_xts_dec
 
-	.rva	.LSEH_begin_aesni_ocb_encrypt
-	.rva	.LSEH_end_aesni_ocb_encrypt
+	.rva	.LSEH_begin_${PREFIX}_ocb_encrypt
+	.rva	.LSEH_end_${PREFIX}_ocb_encrypt
 	.rva	.LSEH_info_ocb_enc
 
-	.rva	.LSEH_begin_aesni_ocb_decrypt
-	.rva	.LSEH_end_aesni_ocb_decrypt
+	.rva	.LSEH_begin_${PREFIX}_ocb_decrypt
+	.rva	.LSEH_end_${PREFIX}_ocb_decrypt
 	.rva	.LSEH_info_ocb_dec
 ___
 $code.=<<___;
@@ -5016,7 +5016,7 @@
 .section	.xdata
 .align	8
 ___
-$code.=<<___ if ($PREFIX eq "aesni");
+$code.=<<___ if ($PREFIX eq "aes_hw");
 .LSEH_info_ecb:
 	.byte	9,0,0,0
 	.rva	ecb_ccm64_se_handler
diff --git a/src/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl b/src/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl
index 607f8e7..11b7a9d 100644
--- a/src/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl
+++ b/src/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl
@@ -810,8 +810,8 @@
 $code.=<<___;
 .text
 
-.extern	asm_AES_encrypt
-.extern	asm_AES_decrypt
+.extern	aes_nohw_encrypt
+.extern	aes_nohw_decrypt
 
 .type	_bsaes_encrypt8,\@abi-omnipotent
 .align	64
@@ -1327,7 +1327,7 @@
 	lea	($inp), $arg1
 	lea	($out), $arg2
 	lea	($key), $arg3
-	call	asm_AES_encrypt
+	call	aes_nohw_encrypt
 	lea	16($inp), $inp
 	lea	16($out), $out
 	dec	$len
@@ -1547,7 +1547,7 @@
 	lea	($inp), $arg1
 	lea	($out), $arg2
 	lea	($key), $arg3
-	call	asm_AES_decrypt
+	call	aes_nohw_decrypt
 	lea	16($inp), $inp
 	lea	16($out), $out
 	dec	$len
@@ -1602,7 +1602,7 @@
 ___
 }
 $code.=<<___;
-.extern	asm_AES_cbc_encrypt
+.extern	aes_nohw_cbc_encrypt
 .globl	bsaes_cbc_encrypt
 .type	bsaes_cbc_encrypt,\@abi-omnipotent
 .align	16
@@ -1614,9 +1614,9 @@
 ___
 $code.=<<___;
 	cmp	\$0,$arg6
-	jne	asm_AES_cbc_encrypt
+	jne	aes_nohw_cbc_encrypt
 	cmp	\$128,$arg3
-	jb	asm_AES_cbc_encrypt
+	jb	aes_nohw_cbc_encrypt
 
 	mov	%rsp, %rax
 .Lcbc_dec_prologue:
@@ -1855,7 +1855,7 @@
 	lea	($inp), $arg1
 	lea	0x20(%rbp), $arg2	# buffer output
 	lea	($key), $arg3
-	call	asm_AES_decrypt		# doesn't touch %xmm
+	call	aes_nohw_decrypt		# doesn't touch %xmm
 	pxor	0x20(%rbp), @XMM[15]	# ^= IV
 	movdqu	@XMM[15], ($out)	# write output
 	movdqa	@XMM[0], @XMM[15]	# IV
@@ -2097,7 +2097,7 @@
 	lea	0x20(%rbp), $arg1
 	lea	0x30(%rbp), $arg2
 	lea	($key), $arg3
-	call	asm_AES_encrypt
+	call	aes_nohw_encrypt
 	movdqu	($inp), @XMM[1]
 	lea	16($inp), $inp
 	mov	0x2c(%rbp), %eax	# load 32-bit counter
@@ -2216,7 +2216,7 @@
 	lea	($arg6), $arg1
 	lea	0x20(%rbp), $arg2
 	lea	($arg5), $arg3
-	call	asm_AES_encrypt		# generate initial tweak
+	call	aes_nohw_encrypt		# generate initial tweak
 
 	mov	240($key), %eax		# rounds
 	mov	$len, %rbx		# backup $len
@@ -2482,7 +2482,7 @@
 	lea	0x20(%rbp), $arg1
 	lea	0x20(%rbp), $arg2
 	lea	($key), $arg3
-	call	asm_AES_encrypt		# doesn't touch %xmm
+	call	aes_nohw_encrypt		# doesn't touch %xmm
 	pxor	0x20(%rbp), @XMM[0]	# ^= tweak[]
 	#pxor	@XMM[8], @XMM[0]
 	#lea	0x80(%rsp), %rax	# pass key schedule
@@ -2515,7 +2515,7 @@
 	lea	0x20(%rbp), $arg2
 	movdqa	@XMM[0], 0x20(%rbp)
 	lea	($key), $arg3
-	call	asm_AES_encrypt		# doesn't touch %xmm
+	call	aes_nohw_encrypt		# doesn't touch %xmm
 	pxor	0x20(%rbp), @XMM[7]
 	movdqu	@XMM[7], -16($out)
 
@@ -2614,7 +2614,7 @@
 	lea	($arg6), $arg1
 	lea	0x20(%rbp), $arg2
 	lea	($arg5), $arg3
-	call	asm_AES_encrypt		# generate initial tweak
+	call	aes_nohw_encrypt		# generate initial tweak
 
 	mov	240($key), %eax		# rounds
 	mov	$len, %rbx		# backup $len
@@ -2887,7 +2887,7 @@
 	lea	0x20(%rbp), $arg1
 	lea	0x20(%rbp), $arg2
 	lea	($key), $arg3
-	call	asm_AES_decrypt		# doesn't touch %xmm
+	call	aes_nohw_decrypt		# doesn't touch %xmm
 	pxor	0x20(%rbp), @XMM[0]	# ^= tweak[]
 	#pxor	@XMM[8], @XMM[0]
 	#lea	0x80(%rsp), %rax	# pass key schedule
@@ -2918,7 +2918,7 @@
 	lea	0x20(%rbp), $arg2
 	movdqa	@XMM[0], 0x20(%rbp)
 	lea	($key), $arg3
-	call	asm_AES_decrypt		# doesn't touch %xmm
+	call	aes_nohw_decrypt		# doesn't touch %xmm
 	pxor	0x20(%rbp), @XMM[7]
 	mov	$out, %rdx
 	movdqu	@XMM[7], ($out)
@@ -2939,7 +2939,7 @@
 	lea	0x20(%rbp), $arg2
 	movdqa	@XMM[0], 0x20(%rbp)
 	lea	($key), $arg3
-	call	asm_AES_decrypt		# doesn't touch %xmm
+	call	aes_nohw_decrypt		# doesn't touch %xmm
 	pxor	0x20(%rbp), @XMM[6]
 	movdqu	@XMM[6], ($out)
 
diff --git a/src/crypto/fipsmodule/aes/internal.h b/src/crypto/fipsmodule/aes/internal.h
index 45db9ee..7c19b9c 100644
--- a/src/crypto/fipsmodule/aes/internal.h
+++ b/src/crypto/fipsmodule/aes/internal.h
@@ -24,21 +24,30 @@
 #endif
 
 
-#if !defined(OPENSSL_NO_ASM) && (defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64))
+#if !defined(OPENSSL_NO_ASM)
+
+#if defined(OPENSSL_X86_64)
+#define HWAES
+#define HWAES_ECB
+
+static int hwaes_capable(void) {
+  return (OPENSSL_ia32cap_P[1] & (1 << (57 - 32))) != 0;
+}
+#elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
 #define HWAES
 
 static int hwaes_capable(void) {
   return CRYPTO_is_ARMv8_AES_capable();
 }
-#endif  // !NO_ASM && (AES || AARCH64)
-
-#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_PPC64LE)
+#elif defined(OPENSSL_PPC64LE)
 #define HWAES
 
 static int hwaes_capable(void) {
   return CRYPTO_is_PPC64LE_vcrypto_capable();
 }
-#endif  // !NO_ASM && PPC64LE
+#endif
+
+#endif  // !NO_ASM
 
 
 #if defined(HWAES)
@@ -93,6 +102,12 @@
 
 #endif  // !HWAES
 
+
+#if defined(HWAES_ECB)
+void aes_hw_ecb_encrypt(const uint8_t *in, uint8_t *out, size_t length,
+                        const AES_KEY *key, const int enc);
+#endif
+
 #if defined(__cplusplus)
 }  // extern C
 #endif
diff --git a/src/crypto/fipsmodule/aes/key_wrap.c b/src/crypto/fipsmodule/aes/key_wrap.c
index feee0c7..a52c983 100644
--- a/src/crypto/fipsmodule/aes/key_wrap.c
+++ b/src/crypto/fipsmodule/aes/key_wrap.c
@@ -65,9 +65,10 @@
 
 int AES_wrap_key(const AES_KEY *key, const uint8_t *iv, uint8_t *out,
                  const uint8_t *in, size_t in_len) {
-  // See RFC 3394, section 2.2.1.
+  // See RFC 3394, section 2.2.1. Additionally, note that section 2 requires the
+  // plaintext be at least two 8-byte blocks.
 
-  if (in_len > INT_MAX - 8 || in_len < 8 || in_len % 8 != 0) {
+  if (in_len > INT_MAX - 8 || in_len < 16 || in_len % 8 != 0) {
     return -1;
   }
 
@@ -101,9 +102,11 @@
 
 int AES_unwrap_key(const AES_KEY *key, const uint8_t *iv, uint8_t *out,
                    const uint8_t *in, size_t in_len) {
-  // See RFC 3394, section 2.2.2.
+  // See RFC 3394, section 2.2.2. Additionally, note that section 2 requires the
+  // plaintext be at least two 8-byte blocks, so the ciphertext must be at least
+  // three blocks.
 
-  if (in_len > INT_MAX || in_len < 16 || in_len % 8 != 0) {
+  if (in_len > INT_MAX || in_len < 24 || in_len % 8 != 0) {
     return -1;
   }
 
diff --git a/src/crypto/fipsmodule/aes/mode_wrappers.c b/src/crypto/fipsmodule/aes/mode_wrappers.c
index 34514db..5c50c85 100644
--- a/src/crypto/fipsmodule/aes/mode_wrappers.c
+++ b/src/crypto/fipsmodule/aes/mode_wrappers.c
@@ -6,7 +6,7 @@
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer. 
+ *    notice, this list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
@@ -50,6 +50,7 @@
 
 #include <assert.h>
 
+#include "../aes/internal.h"
 #include "../modes/internal.h"
 
 
@@ -72,27 +73,29 @@
   }
 }
 
-#if defined(OPENSSL_NO_ASM) || \
-    (!defined(OPENSSL_X86_64) && !defined(OPENSSL_X86))
+#if !defined(OPENSSL_NO_ASM) && (defined(OPENSSL_X86_64) || defined(OPENSSL_X86))
+void aes_nohw_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t len,
+                          const AES_KEY *key, uint8_t *ivec, const int enc);
+#endif
+
 void AES_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t len,
                      const AES_KEY *key, uint8_t *ivec, const int enc) {
+  if (hwaes_capable()) {
+    aes_hw_cbc_encrypt(in, out, len, key, ivec, enc);
+    return;
+  }
 
+#if !defined(OPENSSL_NO_ASM) && \
+    (defined(OPENSSL_X86_64) || defined(OPENSSL_X86))
+  aes_nohw_cbc_encrypt(in, out, len, key, ivec, enc);
+#else
   if (enc) {
     CRYPTO_cbc128_encrypt(in, out, len, key, ivec, (block128_f)AES_encrypt);
   } else {
     CRYPTO_cbc128_decrypt(in, out, len, key, ivec, (block128_f)AES_decrypt);
   }
+#endif
 }
-#else
-
-void asm_AES_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t len,
-                         const AES_KEY *key, uint8_t *ivec, const int enc);
-void AES_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t len,
-                     const AES_KEY *key, uint8_t *ivec, const int enc) {
-  asm_AES_cbc_encrypt(in, out, len, key, ivec, enc);
-}
-
-#endif  // OPENSSL_NO_ASM || (!OPENSSL_X86_64 && !OPENSSL_X86)
 
 void AES_ofb128_encrypt(const uint8_t *in, uint8_t *out, size_t length,
                         const AES_KEY *key, uint8_t *ivec, int *num) {