Revert "Revert "Revert "external/boringssl: Sync to 81080a729af568f7b5fde92b9170cc17065027c9."""

This reverts commit a5c947b7c91bac52eeb5086507b67e52a59ef980.

Reason for revert: Breaks blueline target on qt-dev-plus-aosp and pi-dev-plus-aosp

Change-Id: Ib3f71674ce7f7114e5925043ead7e8e51e9bc31e
diff --git a/src/crypto/fipsmodule/CMakeLists.txt b/src/crypto/fipsmodule/CMakeLists.txt
index e978820..d1e2cb9 100644
--- a/src/crypto/fipsmodule/CMakeLists.txt
+++ b/src/crypto/fipsmodule/CMakeLists.txt
@@ -129,10 +129,6 @@
 perlasm(x86-mont.${ASM_EXT} bn/asm/x86-mont.pl)
 
 if(FIPS_DELOCATE)
-  if(FIPS_SHARED)
-    error("Can't set both delocate and shared mode for FIPS build")
-  endif()
-
   if(OPENSSL_NO_ASM)
     # If OPENSSL_NO_ASM was defined then ASM will not have been enabled, but in
     # FIPS mode we have to have it because the module build requires going via
@@ -193,49 +189,12 @@
 
     OBJECT
 
-    fips_shared_support.c
     is_fips.c
   )
 
   add_dependencies(fipsmodule global_target)
 
   set_target_properties(fipsmodule PROPERTIES LINKER_LANGUAGE C)
-elseif(FIPS_SHARED)
-  if(NOT BUILD_SHARED_LIBS)
-    error("FIPS_SHARED set but not BUILD_SHARED_LIBS")
-  endif()
-
-  add_library(
-    fipsmodule
-
-    OBJECT
-
-    fips_shared_support.c
-    is_fips.c
-  )
-
-  add_dependencies(fipsmodule global_target)
-
-  add_library(
-    bcm_library
-
-    STATIC
-
-    bcm.c
-
-    ${BCM_ASM_SOURCES}
-  )
-
-  add_dependencies(bcm_library global_target)
-
-  add_custom_command(
-    OUTPUT bcm.o
-    COMMAND ld -r -T ${CMAKE_CURRENT_SOURCE_DIR}/fips_shared.lds -o bcm.o --whole-archive $<TARGET_FILE:bcm_library>
-    DEPENDS bcm_library fips_shared.lds
-    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
-  )
-
-  add_custom_target(bcm_o_target DEPENDS bcm.o)
 else()
   add_library(
     fipsmodule
@@ -243,7 +202,6 @@
     OBJECT
 
     bcm.c
-    fips_shared_support.c
     is_fips.c
 
     ${BCM_ASM_SOURCES}
diff --git a/src/crypto/fipsmodule/FIPS.md b/src/crypto/fipsmodule/FIPS.md
index 69719b6..a60e2bf 100644
--- a/src/crypto/fipsmodule/FIPS.md
+++ b/src/crypto/fipsmodule/FIPS.md
@@ -13,9 +13,9 @@
 
 ## Running CAVP tests
 
-CAVP results are calculated by `util/fipstools/cavp`, but that binary is almost always run by `util/fipstools/run_cavp.go`. The latter knows the set of tests to be processed and the flags needed to configure `cavp` for each one. It must be run from the top of a CAVP directory and needs the following options:
+CAVP results are calculated by `fipstools/cavp`, but that binary is almost always run by `fipstools/run_cavp.go`. The latter knows the set of tests to be processed and the flags needed to configure `cavp` for each one. It must be run from the top of a CAVP directory and needs the following options:
 
-1. `-oracle-bin`: points to the location of `util/fipstools/cavp`
+1. `-oracle-bin`: points to the location of `fipstools/cavp`
 2. `-no-fax`: this is needed to suppress checking of the FAX files, which are only included in sample sets.
 
 ## Breaking power-on and continuous tests
@@ -62,25 +62,9 @@
 
 BoringCrypto is linked (often statically) into a large number of binaries. It would be a significant cost if each of these binaries had to be post-processed in order to calculate the known-good HMAC value. We would much prefer if the value could be calculated, once, when BoringCrypto itself is compiled.
 
-In order for the value to be calculated before the final link, there can be no relocations in the hashed code and data. This document describes how we build C and assembly code in order to produce a binary file containing all the code and data for the FIPS module without that code having any relocations.
+In order for the value to be calculated before the final link, there can be no relocations in the hashed code and data. This document describes how we build C and assembly code in order to produce an object file containing all the code and data for the FIPS module without that code having any relocations.
 
-There are two build configurations supported: static and shared. The shared build produces `libcrypto.so`, which includes the FIPS module and is significantly more straightforward and so is described first:
-
-### Shared build
-
-First, all the C source files for the module are compiled as a single unit by compiling a single source file that `#include`s them all (this is `bcm.c`). This, along with some assembly sources, comprise the FIPS module.
-
-The object files resulting from compiling (or assembling) those files is linked in partial-linking mode with a linker script that causes the linker to insert symbols marking the beginning and end of the text and rodata sections. The linker script also discards other types of data sections to ensure that no unhashed data is used by the module.
-
-One source of such data are `rel.ro` sections, which contain data that includes function pointers. Since these function pointers are absolute, they are written by the dynamic linker at run-time and so we must eliminate them. The pattern that causes them is when we have a static `EVP_MD` or `EVP_CIPHER` object thus, inside the module, this pattern is changed to instead reserve space in the BSS for the object, and to add a `CRYPTO_once_t` to protect its initialisation.
-
-Once the partially-linked result is linked again, with other parts of libcrypto, to produce `libcrypto.so`, the contents of the module are fixed, as required. The module code uses the linker-added symbols to find the its code and data at run-time and hashes them upon initialisation. The result is compared against a value stored inside `libcrypto.so`, but outside of the module. That value will, initially, be incorrect, but `inject-hash.go` can inject the correct value.
-
-### Static build
-
-The static build cannot depend on the shared-object link to resolve relocations and thus must take another path.
-
-As with the shared build, all the C sources are build in a single compilation unit. The `-fPIC` flag is used to cause the compiler to use IP-relative addressing in many (but not all) cases. Also the `-S` flag is used to instruct the compiler to produce a textual assembly file rather than a binary object file.
+First, all the C source files for the module are compiled as a single unit by compiling a single source file that `#include`s them all (this is `bcm.c`). The `-fPIC` flag is used to cause the compiler to use IP-relative addressing in many (but not all) cases. Also the `-S` flag is used to instruct the compiler to produce a textual assembly file rather than a binary object file.
 
 The textual assembly file is then processed by a script to merge in assembly implementations of some primitives and to eliminate the remaining sources of relocations.
 
@@ -96,9 +80,9 @@
 
 ##### Read-only data
 
-Normally read-only data is placed in an `.rodata` segment that doesn't get mapped into memory with execute permissions. However, the offset of the data segment from the text segment is another thing that isn't determined until the final link. In order to fix data offsets before the link, read-only data is simply placed in the module's `.text` segment. This might make building ROP chains easier for an attacker, but so it goes.
+Normally read-only data is placed in a `.data` segment that doesn't get mapped into memory with execute permissions. However, the offset of the data segment from the text segment is another thing that isn't determined until the final link. In order to fix data offsets before the link, read-only data is simply placed in the module's `.text` segment. This might make building ROP chains easier for an attacker, but so it goes.
 
-Data containing function pointers remains an issue. The source-code changes described above for the shared build apply here too, but no direct references to a BSS section are possible because the offset to that section is not known at compile time. Instead, the script generates functions outside of the module that return pointers to these areas of memory—they effectively act like special-purpose malloc calls that cannot fail.
+One special case is `rel.ro` data, which is data that contains function pointers. Since these function pointers are absolute, they are written by the dynamic linker at run-time and so we must eliminate them. The pattern that causes them is when we have a static `EVP_MD` or `EVP_CIPHER` object thus, inside the module, we'll change this pattern to instead to reserve space in the BSS for the object, and add a `CRYPTO_once_t` to protect its initialisation. The script will generate functions outside of the module that return pointers to these areas of memory—they effectively act like a special-purpose malloc calls that cannot fail.
 
 ##### Read-write data
 
@@ -108,7 +92,7 @@
 
 ##### Other transforms
 
-The script performs a number of other transformations which are worth noting but do not warrant their own discussions:
+The script performs a number of other transformations which are worth noting but do not warrant their own sections:
 
 1.  It duplicates each global symbol with a local symbol that has `_local_target` appended to the name. References to the global symbols are rewritten to use these duplicates instead. Otherwise, although the generated code uses IP-relative references, relocations are emitted for global symbols in case they are overridden by a different object file during the link.
 1.  Various sections, notably `.rodata`, are moved to the `.text` section, inside the module, so module code may reference it without relocations.
@@ -128,15 +112,15 @@
 
 (This is based on reading OpenSSL's [user guide](https://www.openssl.org/docs/fips/UserGuide-2.0.pdf) and inspecting the code of OpenSSL FIPS 2.0.12.)
 
-OpenSSL's solution to this problem is very similar to our shared build, with just a few differences:
+OpenSSL's solution to this problem is broadly similar but has a number of differences:
 
 1.  OpenSSL deals with run-time relocations by not hashing parts of the module's data.
-1.  OpenSSL uses `ld -r` (the partial linking mode) to merge a number of object files into their `fipscanister.o`. For BoringCrypto's static build, we merge all the C source files by building a single C file that #includes all the others, and we merge the assembly sources by appending them to the assembly output from the C compiler.
-1.  OpenSSL depends on the link order and inserts two object files, `fips_start.o` and `fips_end.o`, in order to establish the `module_start` and `module_end` values. BoringCrypto adds labels at the correct places in the assembly for the static build, or uses a linker script for the shared build.
+1.  OpenSSL uses `ld -r` (the partial linking mode) to merge a number of object files into their `fipscanister.o`. For BoringCrypto, we merge all the C source files by building a single C file that #includes all the others, and we merge the assembly sources by appending them to the assembly output from the C compiler.
+1.  OpenSSL depends on the link order and inserts two object files, `fips_start.o` and `fips_end.o`, in order to establish the `module_start` and `module_end` values. BoringCrypto adds labels at the correct places in the assembly.
 1.  OpenSSL calculates the hash after the final link and either injects it into the binary or recompiles with the value of the hash passed in as a #define. BoringCrypto calculates it prior to the final link and injects it into the object file.
 1.  OpenSSL references read-write data directly, since it can know the offsets to it. BoringCrypto indirects these loads and stores.
 1.  OpenSSL doesn't run the power-on test until `FIPS_module_mode_set` is called. BoringCrypto does it in a constructor function. Failure of the test is non-fatal in OpenSSL, BoringCrypto will crash.
-1.  Since the contents of OpenSSL's module change between compilation and use, OpenSSL generates `fipscanister.o.sha1` to check that the compiled object doesn't change before linking. Since BoringCrypto's module is fixed after compilation (in the static case), the final integrity check is unbroken through the linking process.
+1.  Since the contents of OpenSSL's module change between compilation and use, OpenSSL generates `fipscanister.o.sha1` to check that the compiled object doesn't change before linking. Since BoringCrypto's module is fixed after compilation, the final integrity check is unbroken through the linking process.
 
 Some of the similarities are worth noting:
 
diff --git a/src/crypto/fipsmodule/aes/aes_test.cc b/src/crypto/fipsmodule/aes/aes_test.cc
index 7fadb35..1f9a491 100644
--- a/src/crypto/fipsmodule/aes/aes_test.cc
+++ b/src/crypto/fipsmodule/aes/aes_test.cc
@@ -122,40 +122,12 @@
                                ciphertext.data(), ciphertext.size()));
 }
 
-static void TestKeyWrapWithPadding(FileTest *t) {
-  std::vector<uint8_t> key, plaintext, ciphertext;
-  ASSERT_TRUE(t->GetBytes(&key, "Key"));
-  ASSERT_TRUE(t->GetBytes(&plaintext, "Plaintext"));
-  ASSERT_TRUE(t->GetBytes(&ciphertext, "Ciphertext"));
-
-  // Test encryption.
-  AES_KEY aes_key;
-  ASSERT_EQ(0, AES_set_encrypt_key(key.data(), 8 * key.size(), &aes_key));
-  std::unique_ptr<uint8_t[]> buf(new uint8_t[plaintext.size() + 15]);
-  size_t len;
-  ASSERT_TRUE(AES_wrap_key_padded(&aes_key, buf.get(), &len,
-                                  plaintext.size() + 15, plaintext.data(),
-                                  plaintext.size()));
-  EXPECT_EQ(Bytes(ciphertext), Bytes(buf.get(), static_cast<size_t>(len)));
-
-  // Test decryption
-  ASSERT_EQ(0, AES_set_decrypt_key(key.data(), 8 * key.size(), &aes_key));
-  buf.reset(new uint8_t[ciphertext.size() - 8]);
-  ASSERT_TRUE(AES_unwrap_key_padded(&aes_key, buf.get(), &len,
-                                    ciphertext.size() - 8, ciphertext.data(),
-                                    ciphertext.size()));
-  ASSERT_EQ(len, plaintext.size());
-  EXPECT_EQ(Bytes(plaintext), Bytes(buf.get(), static_cast<size_t>(len)));
-}
-
 TEST(AESTest, TestVectors) {
   FileTestGTest("crypto/fipsmodule/aes/aes_tests.txt", [](FileTest *t) {
     if (t->GetParameter() == "Raw") {
       TestRaw(t);
     } else if (t->GetParameter() == "KeyWrap") {
       TestKeyWrap(t);
-    } else if (t->GetParameter() == "KeyWrapWithPadding") {
-      TestKeyWrapWithPadding(t);
     } else {
       ADD_FAILURE() << "Unknown mode " << t->GetParameter();
     }
@@ -200,48 +172,6 @@
   });
 }
 
-TEST(AESTest, WycheproofKeyWrapWithPadding) {
-  FileTestGTest("third_party/wycheproof_testvectors/kwp_test.txt",
-                [](FileTest *t) {
-    std::string key_size;
-    ASSERT_TRUE(t->GetInstruction(&key_size, "keySize"));
-    std::vector<uint8_t> ct, key, msg;
-    ASSERT_TRUE(t->GetBytes(&ct, "ct"));
-    ASSERT_TRUE(t->GetBytes(&key, "key"));
-    ASSERT_TRUE(t->GetBytes(&msg, "msg"));
-    ASSERT_EQ(static_cast<unsigned>(atoi(key_size.c_str())), key.size() * 8);
-    WycheproofResult result;
-    ASSERT_TRUE(GetWycheproofResult(t, &result));
-
-    // Wycheproof contains test vectors with empty messages that it believes
-    // should pass. However, both RFC 5649 and SP 800-38F section 5.3.1 say that
-    // the minimum length is one. Therefore we consider test cases with an empty
-    // message to be invalid.
-    if (result != WycheproofResult::kInvalid && !msg.empty()) {
-      AES_KEY aes;
-      ASSERT_EQ(0, AES_set_decrypt_key(key.data(), 8 * key.size(), &aes));
-      std::vector<uint8_t> out(ct.size() - 8);
-      size_t len;
-      ASSERT_TRUE(AES_unwrap_key_padded(&aes, out.data(), &len, ct.size() - 8,
-                                        ct.data(), ct.size()));
-      EXPECT_EQ(Bytes(msg), Bytes(out.data(), len));
-
-      out.resize(msg.size() + 15);
-      ASSERT_EQ(0, AES_set_encrypt_key(key.data(), 8 * key.size(), &aes));
-      ASSERT_TRUE(AES_wrap_key_padded(&aes, out.data(), &len, msg.size() + 15,
-                                      msg.data(), msg.size()));
-      EXPECT_EQ(Bytes(ct), Bytes(out.data(), len));
-    } else {
-      AES_KEY aes;
-      ASSERT_EQ(0, AES_set_decrypt_key(key.data(), 8 * key.size(), &aes));
-      std::vector<uint8_t> out(ct.size());
-      size_t len;
-      ASSERT_FALSE(AES_unwrap_key_padded(&aes, out.data(), &len, ct.size(),
-                                         ct.data(), ct.size()));
-    }
-  });
-}
-
 TEST(AESTest, WrapBadLengths) {
   uint8_t key[128/8] = {0};
   AES_KEY aes;
diff --git a/src/crypto/fipsmodule/aes/aes_tests.txt b/src/crypto/fipsmodule/aes/aes_tests.txt
index efbe294..d4e4c61 100644
--- a/src/crypto/fipsmodule/aes/aes_tests.txt
+++ b/src/crypto/fipsmodule/aes/aes_tests.txt
@@ -48,16 +48,3 @@
 Key = 000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f
 Plaintext = 00112233445566778899aabbccddeeff000102030405060708090a0b0c0d0e0f
 Ciphertext = 28c9f404c4b810f4cbccb35cfb87f8263f5786e2d80ed326cbc7f0e71a99f43bfb988b9b7a02dd21
-
-
-# Test vectors from https://tools.ietf.org/html/rfc5649#section-6
-
-Mode = KeyWrapWithPadding
-Key = 5840df6e29b02af1ab493b705bf16ea1ae8338f4dcc176a8
-Plaintext = c37b7e6492584340bed12207808941155068f738
-Ciphertext = 138bdeaa9b8fa7fc61f97742e72248ee5ae6ae5360d1ae6a5f54f373fa543b6a
-
-Mode = KeyWrapWithPadding
-Key = 5840df6e29b02af1ab493b705bf16ea1ae8338f4dcc176a8
-Plaintext = 466f7250617369
-Ciphertext = afbeb0f07dfbf5419200f2ccb50bb24f
diff --git a/src/crypto/fipsmodule/aes/asm/aes-586.pl b/src/crypto/fipsmodule/aes/asm/aes-586.pl
index 9b373de..25f1813 100755
--- a/src/crypto/fipsmodule/aes/asm/aes-586.pl
+++ b/src/crypto/fipsmodule/aes/asm/aes-586.pl
@@ -2997,4 +2997,4 @@
 
 &asm_finish();
 
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
diff --git a/src/crypto/fipsmodule/aes/asm/aes-armv4.pl b/src/crypto/fipsmodule/aes/asm/aes-armv4.pl
index 9eebb22..f4ae922 100644
--- a/src/crypto/fipsmodule/aes/asm/aes-armv4.pl
+++ b/src/crypto/fipsmodule/aes/asm/aes-armv4.pl
@@ -1247,4 +1247,4 @@
 close SELF;
 
 print $code;
-close STDOUT or die "error closing STDOUT";	# enforce flush
+close STDOUT;	# enforce flush
diff --git a/src/crypto/fipsmodule/aes/asm/aes-x86_64.pl b/src/crypto/fipsmodule/aes/asm/aes-x86_64.pl
index 5b95785..ea8b9a4 100755
--- a/src/crypto/fipsmodule/aes/asm/aes-x86_64.pl
+++ b/src/crypto/fipsmodule/aes/asm/aes-x86_64.pl
@@ -2906,4 +2906,4 @@
 
 print $code;
 
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
diff --git a/src/crypto/fipsmodule/aes/asm/aesni-x86.pl b/src/crypto/fipsmodule/aes/asm/aesni-x86.pl
index d57127a..fcb5b98 100644
--- a/src/crypto/fipsmodule/aes/asm/aesni-x86.pl
+++ b/src/crypto/fipsmodule/aes/asm/aesni-x86.pl
@@ -2551,4 +2551,4 @@
 
 &asm_finish();
 
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
diff --git a/src/crypto/fipsmodule/aes/asm/aesni-x86_64.pl b/src/crypto/fipsmodule/aes/asm/aesni-x86_64.pl
index 15f6805..b608425 100644
--- a/src/crypto/fipsmodule/aes/asm/aesni-x86_64.pl
+++ b/src/crypto/fipsmodule/aes/asm/aesni-x86_64.pl
@@ -5108,4 +5108,4 @@
 
 print $code;
 
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
diff --git a/src/crypto/fipsmodule/aes/asm/aesp8-ppc.pl b/src/crypto/fipsmodule/aes/asm/aesp8-ppc.pl
index 62d4842..201da1a 100644
--- a/src/crypto/fipsmodule/aes/asm/aesp8-ppc.pl
+++ b/src/crypto/fipsmodule/aes/asm/aesp8-ppc.pl
@@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
@@ -40,8 +40,6 @@
 #		CBC en-/decrypt	CTR	XTS
 # POWER8[le]	3.96/0.72	0.74	1.1
 # POWER8[be]	3.75/0.65	0.66	1.0
-# POWER9[le]	4.02/0.86	0.84	1.05
-# POWER9[be]	3.99/0.78	0.79	0.97
 
 $flavour = shift;
 
@@ -1829,7 +1827,7 @@
 	stvx_u		$out1,$x10,$out
 	stvx_u		$out2,$x20,$out
 	addi		$out,$out,0x30
-	b		Lctr32_enc8x_done
+	b		Lcbc_dec8x_done
 
 .align	5
 Lctr32_enc8x_two:
@@ -1841,7 +1839,7 @@
 	stvx_u		$out0,$x00,$out
 	stvx_u		$out1,$x10,$out
 	addi		$out,$out,0x20
-	b		Lctr32_enc8x_done
+	b		Lcbc_dec8x_done
 
 .align	5
 Lctr32_enc8x_one:
@@ -3804,4 +3802,4 @@
         print $_,"\n";
 }
 
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
diff --git a/src/crypto/fipsmodule/aes/asm/aesv8-armx.pl b/src/crypto/fipsmodule/aes/asm/aesv8-armx.pl
index 187c221..13f86a0 100644
--- a/src/crypto/fipsmodule/aes/asm/aesv8-armx.pl
+++ b/src/crypto/fipsmodule/aes/asm/aesv8-armx.pl
@@ -1018,4 +1018,4 @@
     }
 }
 
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
diff --git a/src/crypto/fipsmodule/aes/asm/bsaes-armv7.pl b/src/crypto/fipsmodule/aes/asm/bsaes-armv7.pl
index 932b3b6..d4db3b4 100644
--- a/src/crypto/fipsmodule/aes/asm/bsaes-armv7.pl
+++ b/src/crypto/fipsmodule/aes/asm/bsaes-armv7.pl
@@ -2431,4 +2431,4 @@
 
 print $code;
 
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
diff --git a/src/crypto/fipsmodule/aes/asm/vpaes-armv8.pl b/src/crypto/fipsmodule/aes/asm/vpaes-armv8.pl
index bae5e7e..5fa06d8 100755
--- a/src/crypto/fipsmodule/aes/asm/vpaes-armv8.pl
+++ b/src/crypto/fipsmodule/aes/asm/vpaes-armv8.pl
@@ -1359,4 +1359,4 @@
 
 print $code;
 
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
diff --git a/src/crypto/fipsmodule/aes/asm/vpaes-x86.pl b/src/crypto/fipsmodule/aes/asm/vpaes-x86.pl
index 2b40362..81e7e84 100644
--- a/src/crypto/fipsmodule/aes/asm/vpaes-x86.pl
+++ b/src/crypto/fipsmodule/aes/asm/vpaes-x86.pl
@@ -920,4 +920,4 @@
 
 &asm_finish();
 
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
diff --git a/src/crypto/fipsmodule/aes/asm/vpaes-x86_64.pl b/src/crypto/fipsmodule/aes/asm/vpaes-x86_64.pl
index 45463e8..9429344 100644
--- a/src/crypto/fipsmodule/aes/asm/vpaes-x86_64.pl
+++ b/src/crypto/fipsmodule/aes/asm/vpaes-x86_64.pl
@@ -1550,4 +1550,4 @@
 
 print $code;
 
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
diff --git a/src/crypto/fipsmodule/aes/key_wrap.c b/src/crypto/fipsmodule/aes/key_wrap.c
index 9a5b28d..a52c983 100644
--- a/src/crypto/fipsmodule/aes/key_wrap.c
+++ b/src/crypto/fipsmodule/aes/key_wrap.c
@@ -48,7 +48,6 @@
 
 #include <openssl/aes.h>
 
-#include <assert.h>
 #include <limits.h>
 #include <string.h>
 
@@ -101,17 +100,18 @@
   return (int)in_len + 8;
 }
 
-// aes_unwrap_key_inner performs steps one and two from
-// https://tools.ietf.org/html/rfc3394#section-2.2.2
-static int aes_unwrap_key_inner(const AES_KEY *key, uint8_t *out,
-                                uint8_t out_iv[8], const uint8_t *in,
-                                size_t in_len) {
+int AES_unwrap_key(const AES_KEY *key, const uint8_t *iv, uint8_t *out,
+                   const uint8_t *in, size_t in_len) {
   // See RFC 3394, section 2.2.2. Additionally, note that section 2 requires the
   // plaintext be at least two 8-byte blocks, so the ciphertext must be at least
   // three blocks.
 
   if (in_len > INT_MAX || in_len < 24 || in_len % 8 != 0) {
-    return 0;
+    return -1;
+  }
+
+  if (iv == NULL) {
+    iv = kDefaultIV;
   }
 
   uint8_t A[AES_BLOCK_SIZE];
@@ -133,104 +133,9 @@
     }
   }
 
-  memcpy(out_iv, A, 8);
-  return 1;
-}
-
-int AES_unwrap_key(const AES_KEY *key, const uint8_t *iv, uint8_t *out,
-                   const uint8_t *in, size_t in_len) {
-  uint8_t calculated_iv[8];
-  if (!aes_unwrap_key_inner(key, out, calculated_iv, in, in_len)) {
-    return -1;
-  }
-
-  if (iv == NULL) {
-    iv = kDefaultIV;
-  }
-  if (CRYPTO_memcmp(calculated_iv, iv, 8) != 0) {
+  if (CRYPTO_memcmp(A, iv, 8) != 0) {
     return -1;
   }
 
   return (int)in_len - 8;
 }
-
-// kPaddingConstant is used in Key Wrap with Padding. See
-// https://tools.ietf.org/html/rfc5649#section-3
-static const uint8_t kPaddingConstant[4] = {0xa6, 0x59, 0x59, 0xa6};
-
-int AES_wrap_key_padded(const AES_KEY *key, uint8_t *out, size_t *out_len,
-                        size_t max_out, const uint8_t *in, size_t in_len) {
-  // See https://tools.ietf.org/html/rfc5649#section-4.1
-  const uint32_t in_len32_be = CRYPTO_bswap4(in_len);
-  const uint64_t in_len64 = in_len;
-  const size_t padded_len = (in_len + 7) & ~7;
-
-  *out_len = 0;
-  if (in_len == 0 || in_len64 > 0xffffffffu || in_len + 7 < in_len ||
-      padded_len + 8 < padded_len || max_out < padded_len + 8) {
-    return 0;
-  }
-
-  uint8_t block[AES_BLOCK_SIZE];
-  memcpy(block, kPaddingConstant, sizeof(kPaddingConstant));
-  memcpy(block + 4, &in_len32_be, sizeof(in_len32_be));
-
-  if (in_len <= 8) {
-    memset(block + 8, 0, 8);
-    memcpy(block + 8, in, in_len);
-    AES_encrypt(block, out, key);
-    *out_len = AES_BLOCK_SIZE;
-    return 1;
-  }
-
-  uint8_t *padded_in = OPENSSL_malloc(padded_len);
-  if (padded_in == NULL) {
-    return 0;
-  }
-  assert(padded_len >= 8);
-  memset(padded_in + padded_len - 8, 0, 8);
-  memcpy(padded_in, in, in_len);
-  const int ret = AES_wrap_key(key, block, out, padded_in, padded_len);
-  OPENSSL_free(padded_in);
-  if (ret < 0) {
-    return 0;
-  }
-  *out_len = ret;
-  return 1;
-}
-
-int AES_unwrap_key_padded(const AES_KEY *key, uint8_t *out, size_t *out_len,
-                          size_t max_out, const uint8_t *in, size_t in_len) {
-  *out_len = 0;
-  if (in_len < AES_BLOCK_SIZE || max_out < in_len - 8) {
-    return 0;
-  }
-
-  uint8_t iv[8];
-  if (in_len == AES_BLOCK_SIZE) {
-    uint8_t block[AES_BLOCK_SIZE];
-    AES_decrypt(in, block, key);
-    memcpy(iv, block, sizeof(iv));
-    memcpy(out, block + 8, 8);
-  } else if (!aes_unwrap_key_inner(key, out, iv, in, in_len)) {
-    return 0;
-  }
-  assert(in_len % 8 == 0);
-
-  crypto_word_t ok = constant_time_eq_int(
-      CRYPTO_memcmp(iv, kPaddingConstant, sizeof(kPaddingConstant)), 0);
-
-  uint32_t claimed_len32;
-  memcpy(&claimed_len32, iv + 4, sizeof(claimed_len32));
-  const size_t claimed_len = CRYPTO_bswap4(claimed_len32);
-  ok &= ~constant_time_is_zero_w(claimed_len);
-  ok &= constant_time_eq_w((claimed_len - 1) >> 3, (in_len - 9) >> 3);
-
-  // Check that padding bytes are all zero.
-  for (size_t i = in_len - 15; i < in_len - 8; i++) {
-    ok &= constant_time_is_zero_w(constant_time_ge_8(i, claimed_len) & out[i]);
-  }
-
-  *out_len = constant_time_select_w(ok, claimed_len, 0);
-  return ok & 1;
-}
diff --git a/src/crypto/fipsmodule/bcm.c b/src/crypto/fipsmodule/bcm.c
index 7666222..e15ecb8 100644
--- a/src/crypto/fipsmodule/bcm.c
+++ b/src/crypto/fipsmodule/bcm.c
@@ -76,6 +76,7 @@
 #include "md4/md4.c"
 #include "md5/md5.c"
 #include "modes/cbc.c"
+#include "modes/ccm.c"
 #include "modes/cfb.c"
 #include "modes/ctr.c"
 #include "modes/gcm.c"
@@ -99,16 +100,11 @@
 #if defined(BORINGSSL_FIPS)
 
 #if !defined(OPENSSL_ASAN)
-// These symbols are filled in by delocate.go (in static builds) or a linker
-// script (in shared builds). They point to the start and end of the module, and
-// the location of the integrity hash, respectively.
+// These symbols are filled in by delocate.go. They point to the start and end
+// of the module, and the location of the integrity hash, respectively.
 extern const uint8_t BORINGSSL_bcm_text_start[];
 extern const uint8_t BORINGSSL_bcm_text_end[];
 extern const uint8_t BORINGSSL_bcm_text_hash[];
-#if defined(BORINGSSL_SHARED_LIBRARY)
-extern const uint8_t BORINGSSL_bcm_rodata_start[];
-extern const uint8_t BORINGSSL_bcm_rodata_end[];
-#endif
 #endif
 
 static void __attribute__((constructor))
@@ -120,39 +116,17 @@
   // .text section, which triggers the global-buffer overflow detection.
   const uint8_t *const start = BORINGSSL_bcm_text_start;
   const uint8_t *const end = BORINGSSL_bcm_text_end;
-#if defined(BORINGSSL_SHARED_LIBRARY)
-  const uint8_t *const rodata_start = BORINGSSL_bcm_rodata_start;
-  const uint8_t *const rodata_end = BORINGSSL_bcm_rodata_end;
-#endif
 
   static const uint8_t kHMACKey[64] = {0};
   uint8_t result[SHA512_DIGEST_LENGTH];
 
   unsigned result_len;
-  HMAC_CTX hmac_ctx;
-  HMAC_CTX_init(&hmac_ctx);
-  if (!HMAC_Init_ex(&hmac_ctx, kHMACKey, sizeof(kHMACKey), EVP_sha512(),
-                    NULL /* no ENGINE */)) {
-    fprintf(stderr, "HMAC_Init_ex failed.\n");
-    goto err;
-  }
-#if defined(BORINGSSL_SHARED_LIBRARY)
-  uint64_t length = end - start;
-  HMAC_Update(&hmac_ctx, (const uint8_t *) &length, sizeof(length));
-  HMAC_Update(&hmac_ctx, start, length);
-
-  length = rodata_end - rodata_start;
-  HMAC_Update(&hmac_ctx, (const uint8_t *) &length, sizeof(length));
-  HMAC_Update(&hmac_ctx, rodata_start, length);
-#else
-  HMAC_Update(&hmac_ctx, start, end - start);
-#endif
-  if (!HMAC_Final(&hmac_ctx, result, &result_len) ||
+  if (!HMAC(EVP_sha512(), kHMACKey, sizeof(kHMACKey), start, end - start,
+            result, &result_len) ||
       result_len != sizeof(result)) {
     fprintf(stderr, "HMAC failed.\n");
     goto err;
   }
-  HMAC_CTX_cleanup(&hmac_ctx);
 
   const uint8_t *expected = BORINGSSL_bcm_text_hash;
 
diff --git a/src/crypto/fipsmodule/bn/asm/armv4-mont.pl b/src/crypto/fipsmodule/bn/asm/armv4-mont.pl
index f3aa4be..2ee389e 100644
--- a/src/crypto/fipsmodule/bn/asm/armv4-mont.pl
+++ b/src/crypto/fipsmodule/bn/asm/armv4-mont.pl
@@ -759,4 +759,4 @@
 	print $_,"\n";
 }
 
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
diff --git a/src/crypto/fipsmodule/bn/asm/armv8-mont.pl b/src/crypto/fipsmodule/bn/asm/armv8-mont.pl
index db2ba49..aab9eaa 100644
--- a/src/crypto/fipsmodule/bn/asm/armv8-mont.pl
+++ b/src/crypto/fipsmodule/bn/asm/armv8-mont.pl
@@ -1507,4 +1507,4 @@
 
 print $code;
 
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
diff --git a/src/crypto/fipsmodule/bn/asm/bn-586.pl b/src/crypto/fipsmodule/bn/asm/bn-586.pl
index 05ef28c..16818d5 100644
--- a/src/crypto/fipsmodule/bn/asm/bn-586.pl
+++ b/src/crypto/fipsmodule/bn/asm/bn-586.pl
@@ -31,7 +31,7 @@
 
 &asm_finish();
 
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
 
 sub bn_mul_add_words
 	{
diff --git a/src/crypto/fipsmodule/bn/asm/co-586.pl b/src/crypto/fipsmodule/bn/asm/co-586.pl
index abe328a..5eeeef9 100644
--- a/src/crypto/fipsmodule/bn/asm/co-586.pl
+++ b/src/crypto/fipsmodule/bn/asm/co-586.pl
@@ -22,7 +22,7 @@
 
 &asm_finish();
 
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
 
 sub mul_add_c
 	{
diff --git a/src/crypto/fipsmodule/bn/asm/rsaz-avx2.pl b/src/crypto/fipsmodule/bn/asm/rsaz-avx2.pl
index a0da239..51feb69 100755
--- a/src/crypto/fipsmodule/bn/asm/rsaz-avx2.pl
+++ b/src/crypto/fipsmodule/bn/asm/rsaz-avx2.pl
@@ -1940,4 +1940,4 @@
 ___
 }}}
 
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
diff --git a/src/crypto/fipsmodule/bn/asm/x86-mont.pl b/src/crypto/fipsmodule/bn/asm/x86-mont.pl
index 1f61ae5..214f2b0 100755
--- a/src/crypto/fipsmodule/bn/asm/x86-mont.pl
+++ b/src/crypto/fipsmodule/bn/asm/x86-mont.pl
@@ -628,4 +628,4 @@
 
 &asm_finish();
 
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
diff --git a/src/crypto/fipsmodule/bn/asm/x86_64-mont.pl b/src/crypto/fipsmodule/bn/asm/x86_64-mont.pl
index 0a9e4d1..3d98e72 100755
--- a/src/crypto/fipsmodule/bn/asm/x86_64-mont.pl
+++ b/src/crypto/fipsmodule/bn/asm/x86_64-mont.pl
@@ -1578,4 +1578,4 @@
 }
 
 print $code;
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
diff --git a/src/crypto/fipsmodule/bn/asm/x86_64-mont5.pl b/src/crypto/fipsmodule/bn/asm/x86_64-mont5.pl
index b2ff114..abcfe6a 100755
--- a/src/crypto/fipsmodule/bn/asm/x86_64-mont5.pl
+++ b/src/crypto/fipsmodule/bn/asm/x86_64-mont5.pl
@@ -3930,4 +3930,4 @@
 $code =~ s/\`([^\`]*)\`/eval($1)/gem;
 
 print $code;
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
diff --git a/src/crypto/fipsmodule/delocate.h b/src/crypto/fipsmodule/delocate.h
index d6564e4..59effde 100644
--- a/src/crypto/fipsmodule/delocate.h
+++ b/src/crypto/fipsmodule/delocate.h
@@ -20,8 +20,7 @@
 #include "../internal.h"
 
 
-#if !defined(BORINGSSL_SHARED_LIBRARY) && defined(BORINGSSL_FIPS) && \
-    !defined(OPENSSL_ASAN) && !defined(OPENSSL_MSAN)
+#if defined(BORINGSSL_FIPS) && !defined(OPENSSL_ASAN) && !defined(OPENSSL_MSAN)
 #define DEFINE_BSS_GET(type, name)        \
   static type name __attribute__((used)); \
   type *name##_bss_get(void) __attribute__((const));
diff --git a/src/crypto/fipsmodule/digest/digest.c b/src/crypto/fipsmodule/digest/digest.c
index 68e81c4..6705867 100644
--- a/src/crypto/fipsmodule/digest/digest.c
+++ b/src/crypto/fipsmodule/digest/digest.c
@@ -115,11 +115,6 @@
 
 void EVP_MD_CTX_destroy(EVP_MD_CTX *ctx) { EVP_MD_CTX_free(ctx); }
 
-int EVP_DigestFinalXOF(EVP_MD_CTX *ctx, uint8_t *out, size_t len) {
-  OPENSSL_PUT_ERROR(DIGEST, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
-  return 0;
-}
-
 int EVP_MD_CTX_copy_ex(EVP_MD_CTX *out, const EVP_MD_CTX *in) {
   // |in->digest| may be NULL if this is a signing |EVP_MD_CTX| for, e.g.,
   // Ed25519 which does not hash with |EVP_MD_CTX|.
diff --git a/src/crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl b/src/crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl
index 994cb82..5402885 100755
--- a/src/crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl
+++ b/src/crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl
@@ -3112,24 +3112,17 @@
 
 	or	$acc5, $acc4			# see if result is zero
 	or	$acc0, $acc4
-	or	$acc1, $acc4			# !is_equal(U1, U2)
+	or	$acc1, $acc4
 
+	.byte	0x3e				# predict taken
+	jnz	.Ladd_proceed$x			# is_equal(U1,U2)?
 	movq	%xmm2, $acc0
 	movq	%xmm3, $acc1
-	or	$acc0, $acc4
-	.byte	0x3e				# predict taken
-	jnz	.Ladd_proceed$x			# !is_equal(U1, U2) || in1infty || in2infty
-
-	# We now know A = B or A = -B and neither is infinity. Compare the
-	# y-coordinates via S1 and S2.
+	test	$acc0, $acc0
+	jnz	.Ladd_proceed$x			# (in1infty || in2infty)?
 	test	$acc1, $acc1
-	jz	.Ladd_double$x			# is_equal(S1, S2)
+	jz	.Ladd_double$x			# is_equal(S1,S2)?
 
-	# A = -B, so the result is infinity.
-	#
-	# TODO(davidben): Does .Ladd_proceed handle this case? It seems to, in
-	# which case we should eliminate this special-case and simplify the
-	# timing analysis.
 	movq	%xmm0, $r_ptr			# restore $r_ptr
 	pxor	%xmm0, %xmm0
 	movdqu	%xmm0, 0x00($r_ptr)
@@ -4153,4 +4146,4 @@
 
 $code =~ s/\`([^\`]*)\`/eval $1/gem;
 print $code;
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
diff --git a/src/crypto/fipsmodule/ec/asm/p256_beeu-x86_64-asm.pl b/src/crypto/fipsmodule/ec/asm/p256_beeu-x86_64-asm.pl
index c05abba..0bb6547 100644
--- a/src/crypto/fipsmodule/ec/asm/p256_beeu-x86_64-asm.pl
+++ b/src/crypto/fipsmodule/ec/asm/p256_beeu-x86_64-asm.pl
@@ -400,4 +400,4 @@
 ___
 
 print $code;
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
diff --git a/src/crypto/fipsmodule/ec/ec.c b/src/crypto/fipsmodule/ec/ec.c
index 158d66c..a0305a6 100644
--- a/src/crypto/fipsmodule/ec/ec.c
+++ b/src/crypto/fipsmodule/ec/ec.c
@@ -892,6 +892,8 @@
   }
 
   int ret = 0;
+  EC_SCALAR g_scalar_storage, p_scalar_storage;
+  EC_SCALAR *g_scalar_arg = NULL, *p_scalar_arg = NULL;
   BN_CTX *new_ctx = NULL;
   if (ctx == NULL) {
     new_ctx = BN_CTX_new();
@@ -901,50 +903,35 @@
     ctx = new_ctx;
   }
 
-  // If both |g_scalar| and |p_scalar| are non-NULL,
-  // |ec_point_mul_scalar_public| would share the doublings between the two
-  // products, which would be more efficient. However, we conservatively assume
-  // the caller needs a constant-time operation. (ECDSA verification does not
-  // use this function.)
-  //
-  // Previously, the low-level constant-time multiplication function aligned
-  // with this function's calling convention, but this was misleading. Curves
-  // which combined the two multiplications did not avoid the doubling case
-  // in the incomplete addition formula and were not constant-time.
-
   if (g_scalar != NULL) {
-    EC_SCALAR scalar;
-    if (!arbitrary_bignum_to_scalar(group, &scalar, g_scalar, ctx) ||
-        !ec_point_mul_scalar_base(group, &r->raw, &scalar)) {
+    if (!arbitrary_bignum_to_scalar(group, &g_scalar_storage, g_scalar, ctx)) {
       goto err;
     }
+    g_scalar_arg = &g_scalar_storage;
   }
 
   if (p_scalar != NULL) {
-    EC_SCALAR scalar;
-    EC_RAW_POINT tmp;
-    if (!arbitrary_bignum_to_scalar(group, &scalar, p_scalar, ctx) ||
-        !ec_point_mul_scalar(group, &tmp, &p->raw, &scalar)) {
+    if (!arbitrary_bignum_to_scalar(group, &p_scalar_storage, p_scalar, ctx)) {
       goto err;
     }
-    if (g_scalar == NULL) {
-      OPENSSL_memcpy(&r->raw, &tmp, sizeof(EC_RAW_POINT));
-    } else {
-      group->meth->add(group, &r->raw, &r->raw, &tmp);
-    }
+    p_scalar_arg = &p_scalar_storage;
   }
 
-  ret = 1;
+  ret = ec_point_mul_scalar(group, &r->raw, g_scalar_arg,
+                            p == NULL ? NULL : &p->raw, p_scalar_arg);
 
 err:
   BN_CTX_free(new_ctx);
+  OPENSSL_cleanse(&g_scalar_storage, sizeof(g_scalar_storage));
+  OPENSSL_cleanse(&p_scalar_storage, sizeof(p_scalar_storage));
   return ret;
 }
 
 int ec_point_mul_scalar_public(const EC_GROUP *group, EC_RAW_POINT *r,
                                const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
                                const EC_SCALAR *p_scalar) {
-  if (g_scalar == NULL || p_scalar == NULL || p == NULL) {
+  if ((g_scalar == NULL && p_scalar == NULL) ||
+      (p == NULL) != (p_scalar == NULL)) {
     OPENSSL_PUT_ERROR(EC, ERR_R_PASSED_NULL_PARAMETER);
     return 0;
   }
@@ -954,24 +941,15 @@
 }
 
 int ec_point_mul_scalar(const EC_GROUP *group, EC_RAW_POINT *r,
-                        const EC_RAW_POINT *p, const EC_SCALAR *scalar) {
-  if (p == NULL || scalar == NULL) {
+                        const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
+                        const EC_SCALAR *p_scalar) {
+  if ((g_scalar == NULL && p_scalar == NULL) ||
+      (p == NULL) != (p_scalar == NULL)) {
     OPENSSL_PUT_ERROR(EC, ERR_R_PASSED_NULL_PARAMETER);
     return 0;
   }
 
-  group->meth->mul(group, r, p, scalar);
-  return 1;
-}
-
-int ec_point_mul_scalar_base(const EC_GROUP *group, EC_RAW_POINT *r,
-                             const EC_SCALAR *scalar) {
-  if (scalar == NULL) {
-    OPENSSL_PUT_ERROR(EC, ERR_R_PASSED_NULL_PARAMETER);
-    return 0;
-  }
-
-  group->meth->mul_base(group, r, scalar);
+  group->meth->mul(group, r, g_scalar, p, p_scalar);
   return 1;
 }
 
diff --git a/src/crypto/fipsmodule/ec/ec_key.c b/src/crypto/fipsmodule/ec/ec_key.c
index 3851c19..3ef17d9 100644
--- a/src/crypto/fipsmodule/ec/ec_key.c
+++ b/src/crypto/fipsmodule/ec/ec_key.c
@@ -322,8 +322,8 @@
   if (eckey->priv_key != NULL) {
     point = EC_POINT_new(eckey->group);
     if (point == NULL ||
-        !ec_point_mul_scalar_base(eckey->group, &point->raw,
-                                  &eckey->priv_key->scalar)) {
+        !ec_point_mul_scalar(eckey->group, &point->raw,
+                             &eckey->priv_key->scalar, NULL, NULL)) {
       OPENSSL_PUT_ERROR(EC, ERR_R_EC_LIB);
       goto err;
     }
@@ -440,7 +440,8 @@
       // Generate the private key by testing candidates (FIPS 186-4 B.4.2).
       !ec_random_nonzero_scalar(key->group, &priv_key->scalar,
                                 kDefaultAdditionalData) ||
-      !ec_point_mul_scalar_base(key->group, &pub_key->raw, &priv_key->scalar)) {
+      !ec_point_mul_scalar(key->group, &pub_key->raw, &priv_key->scalar, NULL,
+                           NULL)) {
     EC_POINT_free(pub_key);
     ec_wrapped_scalar_free(priv_key);
     return 0;
diff --git a/src/crypto/fipsmodule/ec/ec_montgomery.c b/src/crypto/fipsmodule/ec/ec_montgomery.c
index 0cf1d91..caa1966 100644
--- a/src/crypto/fipsmodule/ec/ec_montgomery.c
+++ b/src/crypto/fipsmodule/ec/ec_montgomery.c
@@ -282,8 +282,7 @@
   BN_ULONG yneq = ec_felem_non_zero_mask(group, &r);
 
   // This case will never occur in the constant-time |ec_GFp_mont_mul|.
-  BN_ULONG is_nontrivial_double = ~xneq & ~yneq & z1nz & z2nz;
-  if (is_nontrivial_double) {
+  if (!xneq && !yneq && z1nz && z2nz) {
     ec_GFp_mont_dbl(group, out, a);
     return;
   }
@@ -471,7 +470,6 @@
   out->add = ec_GFp_mont_add;
   out->dbl = ec_GFp_mont_dbl;
   out->mul = ec_GFp_mont_mul;
-  out->mul_base = ec_GFp_mont_mul_base;
   out->mul_public = ec_GFp_mont_mul_public;
   out->felem_mul = ec_GFp_mont_felem_mul;
   out->felem_sqr = ec_GFp_mont_felem_sqr;
diff --git a/src/crypto/fipsmodule/ec/ec_test.cc b/src/crypto/fipsmodule/ec/ec_test.cc
index c0ad61f..1219e2b 100644
--- a/src/crypto/fipsmodule/ec/ec_test.cc
+++ b/src/crypto/fipsmodule/ec/ec_test.cc
@@ -764,15 +764,7 @@
   ASSERT_TRUE(BN_set_word(bn32.get(), 32));
   ASSERT_TRUE(EC_POINT_mul(group(), ret.get(), bn32.get(), p.get(), bn31.get(),
                            nullptr));
-  EXPECT_EQ(0, EC_POINT_cmp(group(), ret.get(), g, nullptr));
 
-  // Repeat the computation with |ec_point_mul_scalar_public|, which ties the
-  // additions together.
-  EC_SCALAR sc31, sc32;
-  ASSERT_TRUE(ec_bignum_to_scalar(group(), &sc31, bn31.get()));
-  ASSERT_TRUE(ec_bignum_to_scalar(group(), &sc32, bn32.get()));
-  ASSERT_TRUE(
-      ec_point_mul_scalar_public(group(), &ret->raw, &sc32, &p->raw, &sc31));
   EXPECT_EQ(0, EC_POINT_cmp(group(), ret.get(), g, nullptr));
 }
 
diff --git a/src/crypto/fipsmodule/ec/internal.h b/src/crypto/fipsmodule/ec/internal.h
index 7934c3a..05175a5 100644
--- a/src/crypto/fipsmodule/ec/internal.h
+++ b/src/crypto/fipsmodule/ec/internal.h
@@ -140,15 +140,16 @@
   // dbl sets |r| to |a| + |a|.
   void (*dbl)(const EC_GROUP *group, EC_RAW_POINT *r, const EC_RAW_POINT *a);
 
-  // mul sets |r| to |scalar|*|p|.
-  void (*mul)(const EC_GROUP *group, EC_RAW_POINT *r, const EC_RAW_POINT *p,
-              const EC_SCALAR *scalar);
-  // mul_base sets |r| to |scalar|*generator.
-  void (*mul_base)(const EC_GROUP *group, EC_RAW_POINT *r,
-                   const EC_SCALAR *scalar);
-  // mul_public sets |r| to |g_scalar|*generator + |p_scalar|*|p|. It assumes
-  // that the inputs are public so there is no concern about leaking their
-  // values through timing.
+  // Computes |r = g_scalar*generator + p_scalar*p| if |g_scalar| and |p_scalar|
+  // are both non-null. Computes |r = g_scalar*generator| if |p_scalar| is null.
+  // Computes |r = p_scalar*p| if g_scalar is null. At least one of |g_scalar|
+  // and |p_scalar| must be non-null, and |p| must be non-null if |p_scalar| is
+  // non-null.
+  void (*mul)(const EC_GROUP *group, EC_RAW_POINT *r, const EC_SCALAR *g_scalar,
+              const EC_RAW_POINT *p, const EC_SCALAR *p_scalar);
+  // mul_public performs the same computation as mul. It further assumes that
+  // the inputs are public so there is no concern about leaking their values
+  // through timing.
   void (*mul_public)(const EC_GROUP *group, EC_RAW_POINT *r,
                      const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
                      const EC_SCALAR *p_scalar);
@@ -324,15 +325,13 @@
 int ec_scalar_inv_montgomery_vartime(const EC_GROUP *group, EC_SCALAR *r,
                                      const EC_SCALAR *a);
 
-// ec_point_mul_scalar sets |r| to |p| * |scalar|. Both inputs are considered
-// secret.
+// ec_point_mul_scalar sets |r| to generator * |g_scalar| + |p| *
+// |p_scalar|. Unlike other functions which take |EC_SCALAR|, |g_scalar| and
+// |p_scalar| need not be fully reduced. They need only contain as many bits as
+// the order.
 int ec_point_mul_scalar(const EC_GROUP *group, EC_RAW_POINT *r,
-                        const EC_RAW_POINT *p, const EC_SCALAR *scalar);
-
-// ec_point_mul_scalar_base sets |r| to generator * |scalar|. |scalar| is
-// treated as secret.
-int ec_point_mul_scalar_base(const EC_GROUP *group, EC_RAW_POINT *r,
-                             const EC_SCALAR *scalar);
+                        const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
+                        const EC_SCALAR *p_scalar);
 
 // ec_point_mul_scalar_public performs the same computation as
 // ec_point_mul_scalar.  It further assumes that the inputs are public so
@@ -371,9 +370,8 @@
 int ec_field_element_to_scalar(const EC_GROUP *group, BIGNUM *r);
 
 void ec_GFp_mont_mul(const EC_GROUP *group, EC_RAW_POINT *r,
-                     const EC_RAW_POINT *p, const EC_SCALAR *scalar);
-void ec_GFp_mont_mul_base(const EC_GROUP *group, EC_RAW_POINT *r,
-                          const EC_SCALAR *scalar);
+                     const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
+                     const EC_SCALAR *p_scalar);
 
 // ec_compute_wNAF writes the modified width-(w+1) Non-Adjacent Form (wNAF) of
 // |scalar| to |out|. |out| must have room for |bits| + 1 elements, each of
diff --git a/src/crypto/fipsmodule/ec/p224-64.c b/src/crypto/fipsmodule/ec/p224-64.c
index f8af39b..2749686 100644
--- a/src/crypto/fipsmodule/ec/p224-64.c
+++ b/src/crypto/fipsmodule/ec/p224-64.c
@@ -758,9 +758,7 @@
   z1_is_zero = p224_felem_is_zero(z1);
   z2_is_zero = p224_felem_is_zero(z2);
   // In affine coordinates, (X_1, Y_1) == (X_2, Y_2)
-  p224_limb is_nontrivial_double =
-      x_equal & y_equal & (1 - z1_is_zero) & (1 - z2_is_zero);
-  if (is_nontrivial_double) {
+  if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
     p224_point_double(x3, y3, z3, x1, y1, z1);
     return;
   }
@@ -873,6 +871,95 @@
   return (in[i >> 3] >> (i & 7)) & 1;
 }
 
+// Interleaved point multiplication using precomputed point multiples:
+// The small point multiples 0*P, 1*P, ..., 16*P are in p_pre_comp, the scalars
+// in p_scalar, if non-NULL. If g_scalar is non-NULL, we also add this multiple
+// of the generator, using certain (large) precomputed multiples in
+// g_p224_pre_comp. Output point (X, Y, Z) is stored in x_out, y_out, z_out
+static void p224_batch_mul(p224_felem x_out, p224_felem y_out, p224_felem z_out,
+                           const uint8_t *p_scalar, const uint8_t *g_scalar,
+                           const p224_felem p_pre_comp[17][3]) {
+  p224_felem nq[3], tmp[4];
+  uint64_t bits;
+  uint8_t sign, digit;
+
+  // set nq to the point at infinity
+  OPENSSL_memset(nq, 0, 3 * sizeof(p224_felem));
+
+  // Loop over both scalars msb-to-lsb, interleaving additions of multiples of
+  // the generator (two in each of the last 28 rounds) and additions of p (every
+  // 5th round).
+  int skip = 1;  // save two point operations in the first round
+  size_t i = p_scalar != NULL ? 220 : 27;
+  for (;;) {
+    // double
+    if (!skip) {
+      p224_point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
+    }
+
+    // add multiples of the generator
+    if (g_scalar != NULL && i <= 27) {
+      // first, look 28 bits upwards
+      bits = p224_get_bit(g_scalar, i + 196) << 3;
+      bits |= p224_get_bit(g_scalar, i + 140) << 2;
+      bits |= p224_get_bit(g_scalar, i + 84) << 1;
+      bits |= p224_get_bit(g_scalar, i + 28);
+      // select the point to add, in constant time
+      p224_select_point(bits, 16, g_p224_pre_comp[1], tmp);
+
+      if (!skip) {
+        p224_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */,
+                       tmp[0], tmp[1], tmp[2]);
+      } else {
+        OPENSSL_memcpy(nq, tmp, 3 * sizeof(p224_felem));
+        skip = 0;
+      }
+
+      // second, look at the current position
+      bits = p224_get_bit(g_scalar, i + 168) << 3;
+      bits |= p224_get_bit(g_scalar, i + 112) << 2;
+      bits |= p224_get_bit(g_scalar, i + 56) << 1;
+      bits |= p224_get_bit(g_scalar, i);
+      // select the point to add, in constant time
+      p224_select_point(bits, 16, g_p224_pre_comp[0], tmp);
+      p224_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */,
+                     tmp[0], tmp[1], tmp[2]);
+    }
+
+    // do other additions every 5 doublings
+    if (p_scalar != NULL && i % 5 == 0) {
+      bits = p224_get_bit(p_scalar, i + 4) << 5;
+      bits |= p224_get_bit(p_scalar, i + 3) << 4;
+      bits |= p224_get_bit(p_scalar, i + 2) << 3;
+      bits |= p224_get_bit(p_scalar, i + 1) << 2;
+      bits |= p224_get_bit(p_scalar, i) << 1;
+      bits |= p224_get_bit(p_scalar, i - 1);
+      ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
+
+      // select the point to add or subtract
+      p224_select_point(digit, 17, p_pre_comp, tmp);
+      p224_felem_neg(tmp[3], tmp[1]);  // (X, -Y, Z) is the negative point
+      p224_copy_conditional(tmp[1], tmp[3], sign);
+
+      if (!skip) {
+        p224_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 0 /* mixed */,
+                       tmp[0], tmp[1], tmp[2]);
+      } else {
+        OPENSSL_memcpy(nq, tmp, 3 * sizeof(p224_felem));
+        skip = 0;
+      }
+    }
+
+    if (i == 0) {
+      break;
+    }
+    --i;
+  }
+  p224_felem_assign(x_out, nq[0]);
+  p224_felem_assign(y_out, nq[1]);
+  p224_felem_assign(z_out, nq[2]);
+}
+
 // Takes the Jacobian coordinates (X, Y, Z) of a point and returns
 // (X', Y') = (X/Z^2, Y/Z^3)
 static int ec_GFp_nistp224_point_get_affine_coordinates(
@@ -940,197 +1027,49 @@
   p224_felem_to_generic(&r->Z, z);
 }
 
-static void ec_GFp_nistp224_make_precomp(p224_felem out[17][3],
-                                         const EC_RAW_POINT *p) {
-  OPENSSL_memset(out[0], 0, sizeof(p224_felem) * 3);
-
-  p224_generic_to_felem(out[1][0], &p->X);
-  p224_generic_to_felem(out[1][1], &p->Y);
-  p224_generic_to_felem(out[1][2], &p->Z);
-
-  for (size_t j = 2; j <= 16; ++j) {
-    if (j & 1) {
-      p224_point_add(out[j][0], out[j][1], out[j][2], out[1][0], out[1][1],
-                     out[1][2], 0, out[j - 1][0], out[j - 1][1], out[j - 1][2]);
-    } else {
-      p224_point_double(out[j][0], out[j][1], out[j][2], out[j / 2][0],
-                        out[j / 2][1], out[j / 2][2]);
-    }
-  }
-}
-
-static void ec_GFp_nistp224_point_mul(const EC_GROUP *group, EC_RAW_POINT *r,
-                                      const EC_RAW_POINT *p,
-                                      const EC_SCALAR *scalar) {
+static void ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_RAW_POINT *r,
+                                       const EC_SCALAR *g_scalar,
+                                       const EC_RAW_POINT *p,
+                                       const EC_SCALAR *p_scalar) {
   p224_felem p_pre_comp[17][3];
-  ec_GFp_nistp224_make_precomp(p_pre_comp, p);
+  p224_felem x_out, y_out, z_out;
 
-  // Set nq to the point at infinity.
-  p224_felem nq[3], tmp[4];
-  OPENSSL_memset(nq, 0, 3 * sizeof(p224_felem));
+  if (p != NULL && p_scalar != NULL) {
+    // We treat NULL scalars as 0, and NULL points as points at infinity, i.e.,
+    // they contribute nothing to the linear combination.
+    OPENSSL_memset(&p_pre_comp, 0, sizeof(p_pre_comp));
+    // precompute multiples
+    p224_generic_to_felem(x_out, &p->X);
+    p224_generic_to_felem(y_out, &p->Y);
+    p224_generic_to_felem(z_out, &p->Z);
 
-  int skip = 1;  // Save two point operations in the first round.
-  for (size_t i = 220; i < 221; i--) {
-    if (!skip) {
-      p224_point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
-    }
+    p224_felem_assign(p_pre_comp[1][0], x_out);
+    p224_felem_assign(p_pre_comp[1][1], y_out);
+    p224_felem_assign(p_pre_comp[1][2], z_out);
 
-    // Add every 5 doublings.
-    if (i % 5 == 0) {
-      uint64_t bits = p224_get_bit(scalar->bytes, i + 4) << 5;
-      bits |= p224_get_bit(scalar->bytes, i + 3) << 4;
-      bits |= p224_get_bit(scalar->bytes, i + 2) << 3;
-      bits |= p224_get_bit(scalar->bytes, i + 1) << 2;
-      bits |= p224_get_bit(scalar->bytes, i) << 1;
-      bits |= p224_get_bit(scalar->bytes, i - 1);
-      uint8_t sign, digit;
-      ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
-
-      // Select the point to add or subtract.
-      p224_select_point(digit, 17, (const p224_felem(*)[3])p_pre_comp, tmp);
-      p224_felem_neg(tmp[3], tmp[1]);  // (X, -Y, Z) is the negative point
-      p224_copy_conditional(tmp[1], tmp[3], sign);
-
-      if (!skip) {
-        p224_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 0 /* mixed */,
-                       tmp[0], tmp[1], tmp[2]);
+    for (size_t j = 2; j <= 16; ++j) {
+      if (j & 1) {
+        p224_point_add(p_pre_comp[j][0], p_pre_comp[j][1], p_pre_comp[j][2],
+                       p_pre_comp[1][0], p_pre_comp[1][1], p_pre_comp[1][2], 0,
+                       p_pre_comp[j - 1][0], p_pre_comp[j - 1][1],
+                       p_pre_comp[j - 1][2]);
       } else {
-        OPENSSL_memcpy(nq, tmp, 3 * sizeof(p224_felem));
-        skip = 0;
+        p224_point_double(p_pre_comp[j][0], p_pre_comp[j][1], p_pre_comp[j][2],
+                          p_pre_comp[j / 2][0], p_pre_comp[j / 2][1],
+                          p_pre_comp[j / 2][2]);
       }
     }
   }
 
-  // Reduce the output to its unique minimal representation.
-  p224_felem_to_generic(&r->X, nq[0]);
-  p224_felem_to_generic(&r->Y, nq[1]);
-  p224_felem_to_generic(&r->Z, nq[2]);
-}
+  p224_batch_mul(x_out, y_out, z_out,
+                 (p != NULL && p_scalar != NULL) ? p_scalar->bytes : NULL,
+                 g_scalar != NULL ? g_scalar->bytes : NULL,
+                 (const p224_felem(*)[3])p_pre_comp);
 
-static void ec_GFp_nistp224_point_mul_base(const EC_GROUP *group,
-                                           EC_RAW_POINT *r,
-                                           const EC_SCALAR *scalar) {
-  // Set nq to the point at infinity.
-  p224_felem nq[3], tmp[3];
-  OPENSSL_memset(nq, 0, 3 * sizeof(p224_felem));
-
-  int skip = 1;  // Save two point operations in the first round.
-  for (size_t i = 27; i < 28; i--) {
-    // double
-    if (!skip) {
-      p224_point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
-    }
-
-    // First, look 28 bits upwards.
-    uint64_t bits = p224_get_bit(scalar->bytes, i + 196) << 3;
-    bits |= p224_get_bit(scalar->bytes, i + 140) << 2;
-    bits |= p224_get_bit(scalar->bytes, i + 84) << 1;
-    bits |= p224_get_bit(scalar->bytes, i + 28);
-    // Select the point to add, in constant time.
-    p224_select_point(bits, 16, g_p224_pre_comp[1], tmp);
-
-    if (!skip) {
-      p224_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */,
-                     tmp[0], tmp[1], tmp[2]);
-    } else {
-      OPENSSL_memcpy(nq, tmp, 3 * sizeof(p224_felem));
-      skip = 0;
-    }
-
-    // Second, look at the current position/
-    bits = p224_get_bit(scalar->bytes, i + 168) << 3;
-    bits |= p224_get_bit(scalar->bytes, i + 112) << 2;
-    bits |= p224_get_bit(scalar->bytes, i + 56) << 1;
-    bits |= p224_get_bit(scalar->bytes, i);
-    // Select the point to add, in constant time.
-    p224_select_point(bits, 16, g_p224_pre_comp[0], tmp);
-    p224_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */,
-                   tmp[0], tmp[1], tmp[2]);
-  }
-
-  // Reduce the output to its unique minimal representation.
-  p224_felem_to_generic(&r->X, nq[0]);
-  p224_felem_to_generic(&r->Y, nq[1]);
-  p224_felem_to_generic(&r->Z, nq[2]);
-}
-
-static void ec_GFp_nistp224_point_mul_public(const EC_GROUP *group,
-                                             EC_RAW_POINT *r,
-                                             const EC_SCALAR *g_scalar,
-                                             const EC_RAW_POINT *p,
-                                             const EC_SCALAR *p_scalar) {
-  // TODO(davidben): If P-224 ECDSA verify performance ever matters, using
-  // |ec_compute_wNAF| for |p_scalar| would likely be an easy improvement.
-  p224_felem p_pre_comp[17][3];
-  ec_GFp_nistp224_make_precomp(p_pre_comp, p);
-
-  // Set nq to the point at infinity.
-  p224_felem nq[3], tmp[3];
-  OPENSSL_memset(nq, 0, 3 * sizeof(p224_felem));
-
-  // Loop over both scalars msb-to-lsb, interleaving additions of multiples of
-  // the generator (two in each of the last 28 rounds) and additions of p (every
-  // 5th round).
-  int skip = 1;  // Save two point operations in the first round.
-  for (size_t i = 220; i < 221; i--) {
-    if (!skip) {
-      p224_point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
-    }
-
-    // Add multiples of the generator.
-    if (i <= 27) {
-      // First, look 28 bits upwards.
-      uint64_t bits = p224_get_bit(g_scalar->bytes, i + 196) << 3;
-      bits |= p224_get_bit(g_scalar->bytes, i + 140) << 2;
-      bits |= p224_get_bit(g_scalar->bytes, i + 84) << 1;
-      bits |= p224_get_bit(g_scalar->bytes, i + 28);
-
-      p224_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */,
-                     g_p224_pre_comp[1][bits][0], g_p224_pre_comp[1][bits][1],
-                     g_p224_pre_comp[1][bits][2]);
-      assert(!skip);
-
-      // Second, look at the current position.
-      bits = p224_get_bit(g_scalar->bytes, i + 168) << 3;
-      bits |= p224_get_bit(g_scalar->bytes, i + 112) << 2;
-      bits |= p224_get_bit(g_scalar->bytes, i + 56) << 1;
-      bits |= p224_get_bit(g_scalar->bytes, i);
-      p224_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */,
-                     g_p224_pre_comp[0][bits][0], g_p224_pre_comp[0][bits][1],
-                     g_p224_pre_comp[0][bits][2]);
-    }
-
-    // Incorporate |p_scalar| every 5 doublings.
-    if (i % 5 == 0) {
-      uint64_t bits = p224_get_bit(p_scalar->bytes, i + 4) << 5;
-      bits |= p224_get_bit(p_scalar->bytes, i + 3) << 4;
-      bits |= p224_get_bit(p_scalar->bytes, i + 2) << 3;
-      bits |= p224_get_bit(p_scalar->bytes, i + 1) << 2;
-      bits |= p224_get_bit(p_scalar->bytes, i) << 1;
-      bits |= p224_get_bit(p_scalar->bytes, i - 1);
-      uint8_t sign, digit;
-      ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
-
-      // Select the point to add or subtract.
-      OPENSSL_memcpy(tmp, p_pre_comp[digit], 3 * sizeof(p224_felem));
-      if (sign) {
-        p224_felem_neg(tmp[1], tmp[1]);  // (X, -Y, Z) is the negative point
-      }
-
-      if (!skip) {
-        p224_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 0 /* mixed */,
-                       tmp[0], tmp[1], tmp[2]);
-      } else {
-        OPENSSL_memcpy(nq, tmp, 3 * sizeof(p224_felem));
-        skip = 0;
-      }
-    }
-  }
-
-  // Reduce the output to its unique minimal representation.
-  p224_felem_to_generic(&r->X, nq[0]);
-  p224_felem_to_generic(&r->Y, nq[1]);
-  p224_felem_to_generic(&r->Z, nq[2]);
+  // reduce the output to its unique minimal representation
+  p224_felem_to_generic(&r->X, x_out);
+  p224_felem_to_generic(&r->Y, y_out);
+  p224_felem_to_generic(&r->Z, z_out);
 }
 
 static void ec_GFp_nistp224_felem_mul(const EC_GROUP *group, EC_FELEM *r,
@@ -1172,9 +1111,8 @@
       ec_GFp_nistp224_point_get_affine_coordinates;
   out->add = ec_GFp_nistp224_add;
   out->dbl = ec_GFp_nistp224_dbl;
-  out->mul = ec_GFp_nistp224_point_mul;
-  out->mul_base = ec_GFp_nistp224_point_mul_base;
-  out->mul_public = ec_GFp_nistp224_point_mul_public;
+  out->mul = ec_GFp_nistp224_points_mul;
+  out->mul_public = ec_GFp_nistp224_points_mul;
   out->felem_mul = ec_GFp_nistp224_felem_mul;
   out->felem_sqr = ec_GFp_nistp224_felem_sqr;
   out->bignum_to_felem = ec_GFp_nistp224_bignum_to_felem;
diff --git a/src/crypto/fipsmodule/ec/p256-x86_64.c b/src/crypto/fipsmodule/ec/p256-x86_64.c
index b4af544..dd8108d 100644
--- a/src/crypto/fipsmodule/ec/p256-x86_64.c
+++ b/src/crypto/fipsmodule/ec/p256-x86_64.c
@@ -48,8 +48,7 @@
 // Precomputed tables for the default generator
 #include "p256-x86_64-table.h"
 
-// Recode window to a signed digit, see |ec_GFp_nistp_recode_scalar_bits| in
-// util.c for details
+// Recode window to a signed digit, see util-64.c for details
 static unsigned booth_recode_w5(unsigned in) {
   unsigned s, d;
 
@@ -317,57 +316,74 @@
   return booth_recode_w7(wvalue);
 }
 
-static void ecp_nistz256_point_mul(const EC_GROUP *group, EC_RAW_POINT *r,
-                                   const EC_RAW_POINT *p,
-                                   const EC_SCALAR *scalar) {
-  alignas(32) P256_POINT out;
-  ecp_nistz256_windowed_mul(group, &out, p, scalar);
+static void mul_p_add_and_store(const EC_GROUP *group, EC_RAW_POINT *r,
+                                const EC_SCALAR *g_scalar,
+                                const EC_RAW_POINT *p_,
+                                const EC_SCALAR *p_scalar,
+                                p256_point_union_t *t, p256_point_union_t *p) {
+  const int p_is_infinity = g_scalar == NULL;
+  if (p_scalar != NULL) {
+    P256_POINT *out = &t->p;
+    if (p_is_infinity) {
+      out = &p->p;
+    }
 
-  assert(group->field.width == P256_LIMBS);
-  OPENSSL_memcpy(r->X.words, out.X, P256_LIMBS * sizeof(BN_ULONG));
-  OPENSSL_memcpy(r->Y.words, out.Y, P256_LIMBS * sizeof(BN_ULONG));
-  OPENSSL_memcpy(r->Z.words, out.Z, P256_LIMBS * sizeof(BN_ULONG));
-}
-
-static void ecp_nistz256_point_mul_base(const EC_GROUP *group, EC_RAW_POINT *r,
-                                        const EC_SCALAR *scalar) {
-  alignas(32) p256_point_union_t t, p;
-
-  uint8_t p_str[33];
-  OPENSSL_memcpy(p_str, scalar->bytes, 32);
-  p_str[32] = 0;
-
-  // First window
-  unsigned index = 0;
-  unsigned wvalue = calc_first_wvalue(&index, p_str);
-
-  ecp_nistz256_select_w7(&p.a, ecp_nistz256_precomputed[0], wvalue >> 1);
-  ecp_nistz256_neg(p.p.Z, p.p.Y);
-  copy_conditional(p.p.Y, p.p.Z, wvalue & 1);
-
-  // Convert |p| from affine to Jacobian coordinates. We set Z to zero if |p|
-  // is infinity and |ONE| otherwise. |p| was computed from the table, so it
-  // is infinity iff |wvalue >> 1| is zero.
-  OPENSSL_memset(p.p.Z, 0, sizeof(p.p.Z));
-  copy_conditional(p.p.Z, ONE, is_not_zero(wvalue >> 1));
-
-  for (int i = 1; i < 37; i++) {
-    wvalue = calc_wvalue(&index, p_str);
-
-    ecp_nistz256_select_w7(&t.a, ecp_nistz256_precomputed[i], wvalue >> 1);
-
-    ecp_nistz256_neg(t.p.Z, t.a.Y);
-    copy_conditional(t.a.Y, t.p.Z, wvalue & 1);
-
-    // Note |ecp_nistz256_point_add_affine| does not work if |p.p| and |t.a|
-    // are the same non-infinity point.
-    ecp_nistz256_point_add_affine(&p.p, &p.p, &t.a);
+    ecp_nistz256_windowed_mul(group, out, p_, p_scalar);
+    if (!p_is_infinity) {
+      ecp_nistz256_point_add(&p->p, &p->p, out);
+    }
   }
 
   assert(group->field.width == P256_LIMBS);
-  OPENSSL_memcpy(r->X.words, p.p.X, P256_LIMBS * sizeof(BN_ULONG));
-  OPENSSL_memcpy(r->Y.words, p.p.Y, P256_LIMBS * sizeof(BN_ULONG));
-  OPENSSL_memcpy(r->Z.words, p.p.Z, P256_LIMBS * sizeof(BN_ULONG));
+  OPENSSL_memcpy(r->X.words, p->p.X, P256_LIMBS * sizeof(BN_ULONG));
+  OPENSSL_memcpy(r->Y.words, p->p.Y, P256_LIMBS * sizeof(BN_ULONG));
+  OPENSSL_memcpy(r->Z.words, p->p.Z, P256_LIMBS * sizeof(BN_ULONG));
+}
+
+static void ecp_nistz256_points_mul(const EC_GROUP *group, EC_RAW_POINT *r,
+                                    const EC_SCALAR *g_scalar,
+                                    const EC_RAW_POINT *p_,
+                                    const EC_SCALAR *p_scalar) {
+  assert((p_ != NULL) == (p_scalar != NULL));
+
+  alignas(32) p256_point_union_t t, p;
+
+  if (g_scalar != NULL) {
+    uint8_t p_str[33];
+    OPENSSL_memcpy(p_str, g_scalar->bytes, 32);
+    p_str[32] = 0;
+
+    // First window
+    unsigned index = 0;
+    unsigned wvalue = calc_first_wvalue(&index, p_str);
+
+    ecp_nistz256_select_w7(&p.a, ecp_nistz256_precomputed[0], wvalue >> 1);
+
+    ecp_nistz256_neg(p.p.Z, p.p.Y);
+    copy_conditional(p.p.Y, p.p.Z, wvalue & 1);
+
+    // Convert |p| from affine to Jacobian coordinates. We set Z to zero if |p|
+    // is infinity and |ONE| otherwise. |p| was computed from the table, so it
+    // is infinity iff |wvalue >> 1| is zero.
+    OPENSSL_memset(p.p.Z, 0, sizeof(p.p.Z));
+    copy_conditional(p.p.Z, ONE, is_not_zero(wvalue >> 1));
+
+    for (int i = 1; i < 37; i++) {
+      wvalue = calc_wvalue(&index, p_str);
+
+      ecp_nistz256_select_w7(&t.a, ecp_nistz256_precomputed[i], wvalue >> 1);
+
+      ecp_nistz256_neg(t.p.Z, t.a.Y);
+      copy_conditional(t.a.Y, t.p.Z, wvalue & 1);
+
+      // Note |ecp_nistz256_point_add_affine| does not work if |p.p| and |t.a|
+      // are the same non-infinity point, so it is important that we compute the
+      // |g_scalar| term before the |p_scalar| term.
+      ecp_nistz256_point_add_affine(&p.p, &p.p, &t.a);
+    }
+  }
+
+  mul_p_add_and_store(group, r, g_scalar, p_, p_scalar, &t, &p);
 }
 
 static void ecp_nistz256_points_mul_public(const EC_GROUP *group,
@@ -422,13 +438,7 @@
     ecp_nistz256_point_add_affine(&p.p, &p.p, &t.a);
   }
 
-  ecp_nistz256_windowed_mul(group, &t.p, p_, p_scalar);
-  ecp_nistz256_point_add(&p.p, &p.p, &t.p);
-
-  assert(group->field.width == P256_LIMBS);
-  OPENSSL_memcpy(r->X.words, p.p.X, P256_LIMBS * sizeof(BN_ULONG));
-  OPENSSL_memcpy(r->Y.words, p.p.Y, P256_LIMBS * sizeof(BN_ULONG));
-  OPENSSL_memcpy(r->Z.words, p.p.Z, P256_LIMBS * sizeof(BN_ULONG));
+  mul_p_add_and_store(group, r, g_scalar, p_, p_scalar, &t, &p);
 }
 
 static int ecp_nistz256_get_affine(const EC_GROUP *group,
@@ -635,8 +645,7 @@
   out->point_get_affine_coordinates = ecp_nistz256_get_affine;
   out->add = ecp_nistz256_add;
   out->dbl = ecp_nistz256_dbl;
-  out->mul = ecp_nistz256_point_mul;
-  out->mul_base = ecp_nistz256_point_mul_base;
+  out->mul = ecp_nistz256_points_mul;
   out->mul_public = ecp_nistz256_points_mul_public;
   out->felem_mul = ec_GFp_mont_felem_mul;
   out->felem_sqr = ec_GFp_mont_felem_sqr;
diff --git a/src/crypto/fipsmodule/ec/simple_mul.c b/src/crypto/fipsmodule/ec/simple_mul.c
index 4ed6c48..e05f491 100644
--- a/src/crypto/fipsmodule/ec/simple_mul.c
+++ b/src/crypto/fipsmodule/ec/simple_mul.c
@@ -21,8 +21,9 @@
 #include "../../internal.h"
 
 
-void ec_GFp_mont_mul(const EC_GROUP *group, EC_RAW_POINT *r,
-                     const EC_RAW_POINT *p, const EC_SCALAR *scalar) {
+static void ec_GFp_mont_mul_single(const EC_GROUP *group, EC_RAW_POINT *r,
+                                   const EC_RAW_POINT *p,
+                                   const EC_SCALAR *scalar) {
   // This is a generic implementation for uncommon curves that not do not
   // warrant a tuned one. It uses unsigned digits so that the doubling case in
   // |ec_GFp_mont_add| is always unreachable, erring on safety and simplicity.
@@ -78,7 +79,21 @@
   }
 }
 
-void ec_GFp_mont_mul_base(const EC_GROUP *group, EC_RAW_POINT *r,
-                          const EC_SCALAR *scalar) {
-  ec_GFp_mont_mul(group, r, &group->generator->raw, scalar);
+void ec_GFp_mont_mul(const EC_GROUP *group, EC_RAW_POINT *r,
+                     const EC_SCALAR *g_scalar, const EC_RAW_POINT *p,
+                     const EC_SCALAR *p_scalar) {
+  assert(g_scalar != NULL || p_scalar != NULL);
+  if (p_scalar == NULL) {
+    ec_GFp_mont_mul_single(group, r, &group->generator->raw, g_scalar);
+  } else if (g_scalar == NULL) {
+    ec_GFp_mont_mul_single(group, r, p, p_scalar);
+  } else {
+    // Support constant-time two-point multiplication for compatibility.  This
+    // does not actually come up in keygen, ECDH, or ECDSA, so we implement it
+    // the naive way.
+    ec_GFp_mont_mul_single(group, r, &group->generator->raw, g_scalar);
+    EC_RAW_POINT tmp;
+    ec_GFp_mont_mul_single(group, &tmp, p, p_scalar);
+    ec_GFp_mont_add(group, r, r, &tmp);
+  }
 }
diff --git a/src/crypto/fipsmodule/ec/util.c b/src/crypto/fipsmodule/ec/util.c
index 4f39f18..7303a15 100644
--- a/src/crypto/fipsmodule/ec/util.c
+++ b/src/crypto/fipsmodule/ec/util.c
@@ -18,7 +18,6 @@
 
 #include "internal.h"
 
-
 // This function looks at 5+1 scalar bits (5 current, 1 adjacent less
 // significant bit), and recodes them into a signed digit for use in fast point
 // multiplication: the use of signed rather than unsigned digits means that
@@ -44,13 +43,13 @@
 //     of a nonnegative integer (b_k in {0, 1}), rewrite it in digits 0, 1, -1
 //     by using bit-wise subtraction as follows:
 //
-//        b_k     b_(k-1)  ...  b_2  b_1  b_0
-//      -         b_k      ...  b_3  b_2  b_1  b_0
-//       -----------------------------------------
-//        s_(k+1) s_k      ...  s_3  s_2  s_1  s_0
+//        b_k b_(k-1)  ...  b_2  b_1  b_0
+//      -     b_k      ...  b_3  b_2  b_1  b_0
+//       -------------------------------------
+//        s_k b_(k-1)  ...  s_3  s_2  s_1  s_0
 //
 //     A left-shift followed by subtraction of the original value yields a new
-//     representation of the same value, using signed bits s_i = b_(i-1) - b_i.
+//     representation of the same value, using signed bits s_i = b_(i+1) - b_i.
 //     This representation from Booth's paper has since appeared in the
 //     literature under a variety of different names including "reversed binary
 //     form", "alternating greedy expansion", "mutual opposite form", and
@@ -74,7 +73,7 @@
 // (1961), pp. 67-91), in a radix-2^5 setting.  That is, we always combine five
 // signed bits into a signed digit:
 //
-//       s_(5j + 4) s_(5j + 3) s_(5j + 2) s_(5j + 1) s_(5j)
+//       s_(4j + 4) s_(4j + 3) s_(4j + 2) s_(4j + 1) s_(4j)
 //
 // The sign-alternating property implies that the resulting digit values are
 // integers from -16 to 16.
@@ -82,164 +81,14 @@
 // Of course, we don't actually need to compute the signed digits s_i as an
 // intermediate step (that's just a nice way to see how this scheme relates
 // to the wNAF): a direct computation obtains the recoded digit from the
-// six bits b_(5j + 4) ... b_(5j - 1).
+// six bits b_(4j + 4) ... b_(4j - 1).
 //
-// This function takes those six bits as an integer (0 .. 63), writing the
+// This function takes those five bits as an integer (0 .. 63), writing the
 // recoded digit to *sign (0 for positive, 1 for negative) and *digit (absolute
-// value, in the range 0 .. 16).  Note that this integer essentially provides
-// the input bits "shifted to the left" by one position: for example, the input
-// to compute the least significant recoded digit, given that there's no bit
-// b_-1, has to be b_4 b_3 b_2 b_1 b_0 0.
-//
-// DOUBLING CASE:
-//
-// Point addition formulas for short Weierstrass curves are often incomplete.
-// Edge cases such as P + P or P + ∞ must be handled separately. This
-// complicates constant-time requirements. P + ∞ cannot be avoided (any window
-// may be zero) and is handled with constant-time selects. P + P (where P is not
-// ∞) usually is not. Instead, windowing strategies are chosen to avoid this
-// case. Whether this happens depends on the group order.
-//
-// Let w be the window width (in this function, w = 5). The non-trivial doubling
-// case in single-point scalar multiplication may occur if and only if the
-// 2^(w-1) bit of the group order is zero.
-//
-// Note the above only holds if the scalar is fully reduced and the group order
-// is a prime that is much larger than 2^w. It also only holds when windows
-// are applied from most significant to least significant, doubling between each
-// window. It does not apply to more complex table strategies such as
-// |EC_GFp_nistz256_method|.
-//
-// PROOF:
-//
-// Let n be the group order. Let l be the number of bits needed to represent n.
-// Assume there exists some 0 <= k < n such that signed w-bit windowed
-// multiplication hits the doubling case.
-//
-// Windowed multiplication consists of iterating over groups of s_i (defined
-// above based on k's binary representation) from most to least significant. At
-// iteration i (for i = ..., 3w, 2w, w, 0, starting from the most significant
-// window), we:
-//
-//  1. Double the accumulator A, w times. Let A_i be the value of A at this
-//     point.
-//
-//  2. Set A to T_i + A_i, where T_i is a precomputed multiple of P
-//     corresponding to the window s_(i+w-1) ... s_i.
-//
-// Let j be the index such that A_j = T_j ≠ ∞. Looking at A_i and T_i as
-// multiples of P, define a_i and t_i to be scalar coefficients of A_i and T_i.
-// Thus a_j = t_j ≠ 0 (mod n). Note a_i and t_i may not be reduced mod n. t_i is
-// the value of the w signed bits s_(i+w-1) ... s_i. a_i is computed as a_i =
-// 2^w * (a_(i+w) + t_(i+w)).
-//
-// t_i is bounded by -2^(w-1) <= t_i <= 2^(w-1). Additionally, we may write it
-// in terms of unsigned bits b_i. t_i consists of signed bits s_(i+w-1) ... s_i.
-// This is computed as:
-//
-//         b_(i+w-2) b_(i+w-3)  ...  b_i      b_(i-1)
-//      -  b_(i+w-1) b_(i+w-2)  ...  b_(i+1)  b_i
-//       --------------------------------------------
-//   t_i = s_(i+w-1) s_(i+w-2)  ...  s_(i+1)  s_i
-//
-// Observe that b_(i+w-2) through b_i occur in both terms. Let x be the integer
-// represented by that bit string, i.e. 2^(w-2)*b_(i+w-2) + ... + b_i.
-//
-//   t_i = (2*x + b_(i-1)) - (2^(w-1)*b_(i+w-1) + x)
-//       = x - 2^(w-1)*b_(i+w-1) + b_(i-1)
-//
-// Or, using C notation for bit operations:
-//
-//   t_i = (k>>i) & ((1<<(w-1)) - 1) - (k>>i) & (1<<(w-1)) + (k>>(i-1)) & 1
-//
-// Note b_(i-1) is added in left-shifted by one (or doubled) from its place.
-// This is compensated by t_(i-w)'s subtraction term. Thus, a_i may be computed
-// by adding b_l b_(l-1) ... b_(i+1) b_i and an extra copy of b_(i-1). In C
-// notation, this is:
-//
-//   a_i = (k>>(i+w)) << w + ((k>>(i+w-1)) & 1) << w
-//
-// Observe that, while t_i may be positive or negative, a_i is bounded by
-// 0 <= a_i < n + 2^w. Additionally, a_i can only be zero if b_(i+w-1) and up
-// are all zero. (Note this implies a non-trivial P + (-P) is unreachable for
-// all groups. That would imply the subsequent a_i is zero, which means all
-// terms thus far were zero.)
-//
-// Returning to our doubling position, we have a_j = t_j (mod n). We now
-// determine the value of a_j - t_j, which must be divisible by n. Our bounds on
-// a_j and t_j imply a_j - t_j is 0 or n. If it is 0, a_j = t_j. However, 2^w
-// divides a_j and -2^(w-1) <= t_j <= 2^(w-1), so this can only happen if
-// a_j = t_j = 0, which is a trivial doubling. Therefore, a_j - t_j = n.
-//
-// Now we determine j. Suppose j > 0. w divides j, so j >= w. Then,
-//
-//   n = a_j - t_j = (k>>(j+w)) << w + ((k>>(j+w-1)) & 1) << w - t_j
-//                <= k/2^j + 2^w - t_j
-//                 < n/2^w + 2^w + 2^(w-1)
-//
-// n is much larger than 2^w, so this is impossible. Thus, j = 0: only the final
-// addition may hit the doubling case.
-//
-// Finally, we consider bit patterns for n and k. Divide k into k_H + k_M + k_L
-// such that k_H is the contribution from b_(l-1) .. b_w, k_M is the
-// contribution from b_(w-1), and k_L is the contribution from b_(w-2) ... b_0.
-// That is:
-//
-// - 2^w divides k_H
-// - k_M is 0 or 2^(w-1)
-// - 0 <= k_L < 2^(w-1)
-//
-// Divide n into n_H + n_M + n_L similarly. We thus have:
-//
-//   t_0 = (k>>0) & ((1<<(w-1)) - 1) - (k>>0) & (1<<(w-1)) + (k>>(0-1)) & 1
-//       = k & ((1<<(w-1)) - 1) - k & (1<<(w-1))
-//       = k_L - k_M
-//
-//   a_0 = (k>>(0+w)) << w + ((k>>(0+w-1)) & 1) << w
-//       = (k>>w) << w + ((k>>(w-1)) & 1) << w
-//       = k_H + 2*k_M
-//
-//                 n = a_0 - t_0
-//   n_H + n_M + n_L = (k_H + 2*k_M) - (k_L - k_M)
-//                   = k_H + 3*k_M - k_L
-//
-// k_H - k_L < k and k < n, so k_H - k_L ≠ n. Therefore k_M is not 0 and must be
-// 2^(w-1). Now we consider k_H and n_H. We know k_H <= n_H. Suppose k_H = n_H.
-// Then,
-//
-//   n_M + n_L = 3*(2^(w-1)) - k_L
-//             > 3*(2^(w-1)) - 2^(w-1)
-//             = 2^w
-//
-// Contradiction (n_M + n_L is the bottom w bits of n). Thus k_H < n_H. Suppose
-// k_H < n_H - 2*2^w. Then,
-//
-//   n_H + n_M + n_L = k_H + 3*(2^(w-1)) - k_L
-//                   < n_H - 2*2^w + 3*(2^(w-1)) - k_L
-//         n_M + n_L < -2^(w-1) - k_L
-//
-// Contradiction. Thus, k_H = n_H - 2^w. (Note 2^w divides n_H and k_H.) Thus,
-//
-//   n_H + n_M + n_L = k_H + 3*(2^(w-1)) - k_L
-//                   = n_H - 2^w + 3*(2^(w-1)) - k_L
-//         n_M + n_L = 2^(w-1) - k_L
-//                  <= 2^(w-1)
-//
-// Equality would mean 2^(w-1) divides n, which is impossible if n is prime.
-// Thus n_M + n_L < 2^(w-1), so n_M is zero, proving our condition.
-//
-// This proof constructs k, so, to show the converse, let k_H = n_H - 2^w,
-// k_M = 2^(w-1), k_L = 2^(w-1) - n_L. This will result in a non-trivial point
-// doubling in the final addition and is the only such scalar.
-//
-// COMMON CURVES:
-//
-// The group orders for common curves end in the following bit patterns:
-//
-//   P-521: ...00001001; w = 4 is okay
-//   P-384: ...01110011; w = 2, 5, 6, 7 are okay
-//   P-256: ...01010001; w = 5, 7 are okay
-//   P-224: ...00111101; w = 3, 4, 5, 6 are okay
+// value, in the range 0 .. 8).  Note that this integer essentially provides the
+// input bits "shifted to the left" by one position: for example, the input to
+// compute the least significant recoded digit, given that there's no bit b_-1,
+// has to be b_4 b_3 b_2 b_1 b_0 0.
 void ec_GFp_nistp_recode_scalar_bits(uint8_t *sign, uint8_t *digit,
                                      uint8_t in) {
   uint8_t s, d;
diff --git a/src/crypto/fipsmodule/ecdh/ecdh.c b/src/crypto/fipsmodule/ecdh/ecdh.c
index a7b2f08..b9dc237 100644
--- a/src/crypto/fipsmodule/ecdh/ecdh.c
+++ b/src/crypto/fipsmodule/ecdh/ecdh.c
@@ -93,7 +93,7 @@
   EC_RAW_POINT shared_point;
   uint8_t buf[EC_MAX_BYTES];
   size_t buflen;
-  if (!ec_point_mul_scalar(group, &shared_point, &pub_key->raw, priv) ||
+  if (!ec_point_mul_scalar(group, &shared_point, NULL, &pub_key->raw, priv) ||
       !ec_point_get_affine_coordinate_bytes(group, buf, NULL, &buflen,
                                             sizeof(buf), &shared_point)) {
     OPENSSL_PUT_ERROR(ECDH, ECDH_R_POINT_ARITHMETIC_FAILURE);
diff --git a/src/crypto/fipsmodule/ecdsa/ecdsa.c b/src/crypto/fipsmodule/ecdsa/ecdsa.c
index 38771d5..010ee02 100644
--- a/src/crypto/fipsmodule/ecdsa/ecdsa.c
+++ b/src/crypto/fipsmodule/ecdsa/ecdsa.c
@@ -232,7 +232,7 @@
     ec_scalar_from_montgomery(group, out_kinv_mont, out_kinv_mont);
 
     // Compute r, the x-coordinate of generator * k.
-    if (!ec_point_mul_scalar_base(group, &tmp_point, &k) ||
+    if (!ec_point_mul_scalar(group, &tmp_point, &k, NULL, NULL) ||
         !ec_get_x_coordinate_as_scalar(group, out_r, &tmp_point)) {
       goto err;
     }
diff --git a/src/crypto/fipsmodule/fips_shared.lds b/src/crypto/fipsmodule/fips_shared.lds
deleted file mode 100644
index 6d44abc..0000000
--- a/src/crypto/fipsmodule/fips_shared.lds
+++ /dev/null
@@ -1,19 +0,0 @@
-SECTIONS
-{
-  .text : {
-    BORINGSSL_bcm_text_start = .;
-    *(.text)
-    BORINGSSL_bcm_text_end = .;
-  }
-  .rodata : {
-    BORINGSSL_bcm_rodata_start = .;
-    *(.rodata)
-    BORINGSSL_bcm_rodata_end = .;
-  }
-
-  /DISCARD/ : {
-    *(.rela.dyn)
-    *(.data)
-    *(.rel.ro)
-  }
-}
diff --git a/src/crypto/fipsmodule/fips_shared_support.c b/src/crypto/fipsmodule/fips_shared_support.c
deleted file mode 100644
index 2a66a1f..0000000
--- a/src/crypto/fipsmodule/fips_shared_support.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/* Copyright (c) 2019, Google Inc.
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
-
-#include <stdint.h>
-
-
-#if defined(BORINGSSL_FIPS) && defined(BORINGSSL_SHARED_LIBRARY)
-// BORINGSSL_bcm_text_hash is is default hash value for the FIPS integrity check
-// that must be replaced with the real value during the build process. This
-// value need only be distinct, i.e. so that we can safely search-and-replace it
-// in an object file.
-const uint8_t BORINGSSL_bcm_text_hash[64];
-const uint8_t BORINGSSL_bcm_text_hash[64] = {
-    0xae, 0x2c, 0xea, 0x2a, 0xbd, 0xa6, 0xf3, 0xec, 0x97, 0x7f, 0x9b,
-    0xf6, 0x94, 0x9a, 0xfc, 0x83, 0x68, 0x27, 0xcb, 0xa0, 0xa0, 0x9f,
-    0x6b, 0x6f, 0xde, 0x52, 0xcd, 0xe2, 0xcd, 0xff, 0x31, 0x80, 0xa2,
-    0xd4, 0xc3, 0x66, 0x0f, 0xc2, 0x6a, 0x7b, 0xf4, 0xbe, 0x39, 0xa2,
-    0xd7, 0x25, 0xdb, 0x21, 0x98, 0xe9, 0xd5, 0x53, 0xbf, 0x5c, 0x32,
-    0x06, 0x83, 0x34, 0x0c, 0x65, 0x89, 0x52, 0xbd, 0x1f,
-};
-#endif  // FIPS && SHARED_LIBRARY
diff --git a/src/crypto/fipsmodule/md5/asm/md5-586.pl b/src/crypto/fipsmodule/md5/asm/md5-586.pl
index 20c226b..ded9442 100644
--- a/src/crypto/fipsmodule/md5/asm/md5-586.pl
+++ b/src/crypto/fipsmodule/md5/asm/md5-586.pl
@@ -36,7 +36,7 @@
 &md5_block("md5_block_asm_data_order");
 &asm_finish();
 
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
 
 sub Np
 	{
diff --git a/src/crypto/fipsmodule/md5/asm/md5-x86_64.pl b/src/crypto/fipsmodule/md5/asm/md5-x86_64.pl
index 509bcde..6eb33c0 100644
--- a/src/crypto/fipsmodule/md5/asm/md5-x86_64.pl
+++ b/src/crypto/fipsmodule/md5/asm/md5-x86_64.pl
@@ -380,4 +380,4 @@
 
 print $code;
 
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
diff --git a/src/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl b/src/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl
index d3e3763..b9edb79 100644
--- a/src/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl
+++ b/src/crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl
@@ -1143,4 +1143,4 @@
 
 print $code;
 
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
diff --git a/src/crypto/fipsmodule/modes/asm/ghash-armv4.pl b/src/crypto/fipsmodule/modes/asm/ghash-armv4.pl
index 54c80f7..778b543 100644
--- a/src/crypto/fipsmodule/modes/asm/ghash-armv4.pl
+++ b/src/crypto/fipsmodule/modes/asm/ghash-armv4.pl
@@ -553,4 +553,4 @@
 
 	print $_,"\n";
 }
-close STDOUT or die "error closing STDOUT"; # enforce flush
+close STDOUT; # enforce flush
diff --git a/src/crypto/fipsmodule/modes/asm/ghash-neon-armv8.pl b/src/crypto/fipsmodule/modes/asm/ghash-neon-armv8.pl
index f57017d..972be41 100644
--- a/src/crypto/fipsmodule/modes/asm/ghash-neon-armv8.pl
+++ b/src/crypto/fipsmodule/modes/asm/ghash-neon-armv8.pl
@@ -284,4 +284,4 @@
 
 	print $_,"\n";
 }
-close STDOUT or die "error closing STDOUT"; # enforce flush
+close STDOUT; # enforce flush
diff --git a/src/crypto/fipsmodule/modes/asm/ghash-ssse3-x86.pl b/src/crypto/fipsmodule/modes/asm/ghash-ssse3-x86.pl
index 45e1ee1..0d9ce15 100644
--- a/src/crypto/fipsmodule/modes/asm/ghash-ssse3-x86.pl
+++ b/src/crypto/fipsmodule/modes/asm/ghash-ssse3-x86.pl
@@ -285,4 +285,4 @@
 
 &asm_finish();
 
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
diff --git a/src/crypto/fipsmodule/modes/asm/ghash-ssse3-x86_64.pl b/src/crypto/fipsmodule/modes/asm/ghash-ssse3-x86_64.pl
index e0e5010..1dd2519 100644
--- a/src/crypto/fipsmodule/modes/asm/ghash-ssse3-x86_64.pl
+++ b/src/crypto/fipsmodule/modes/asm/ghash-ssse3-x86_64.pl
@@ -410,4 +410,4 @@
 }
 
 print $code;
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
diff --git a/src/crypto/fipsmodule/modes/asm/ghash-x86.pl b/src/crypto/fipsmodule/modes/asm/ghash-x86.pl
index a2ab859..02edf03 100644
--- a/src/crypto/fipsmodule/modes/asm/ghash-x86.pl
+++ b/src/crypto/fipsmodule/modes/asm/ghash-x86.pl
@@ -1150,7 +1150,7 @@
 &asciz("GHASH for x86, CRYPTOGAMS by <appro\@openssl.org>");
 &asm_finish();
 
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
 
 # A question was risen about choice of vanilla MMX. Or rather why wasn't
 # SSE2 chosen instead? In addition to the fact that MMX runs on legacy
diff --git a/src/crypto/fipsmodule/modes/asm/ghash-x86_64.pl b/src/crypto/fipsmodule/modes/asm/ghash-x86_64.pl
index 5c4122f..b267698 100644
--- a/src/crypto/fipsmodule/modes/asm/ghash-x86_64.pl
+++ b/src/crypto/fipsmodule/modes/asm/ghash-x86_64.pl
@@ -1803,4 +1803,4 @@
 
 print $code;
 
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
diff --git a/src/crypto/fipsmodule/modes/asm/ghashp8-ppc.pl b/src/crypto/fipsmodule/modes/asm/ghashp8-ppc.pl
index 7a1259b..c46cdb5 100644
--- a/src/crypto/fipsmodule/modes/asm/ghashp8-ppc.pl
+++ b/src/crypto/fipsmodule/modes/asm/ghashp8-ppc.pl
@@ -667,4 +667,4 @@
 	print $_,"\n";
 }
 
-close STDOUT or die "error closing STDOUT"; # enforce flush
+close STDOUT; # enforce flush
diff --git a/src/crypto/fipsmodule/modes/asm/ghashv8-armx.pl b/src/crypto/fipsmodule/modes/asm/ghashv8-armx.pl
index 99124a2..1435db5 100644
--- a/src/crypto/fipsmodule/modes/asm/ghashv8-armx.pl
+++ b/src/crypto/fipsmodule/modes/asm/ghashv8-armx.pl
@@ -425,4 +425,4 @@
     }
 }
 
-close STDOUT or die "error closing STDOUT"; # enforce flush
+close STDOUT; # enforce flush
diff --git a/src/crypto/fipsmodule/modes/ccm.c b/src/crypto/fipsmodule/modes/ccm.c
new file mode 100644
index 0000000..5a153f4
--- /dev/null
+++ b/src/crypto/fipsmodule/modes/ccm.c
@@ -0,0 +1,256 @@
+/* ====================================================================
+ * Copyright (c) 2011 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ */
+
+#include <assert.h>
+#include <string.h>
+
+#include <openssl/cpu.h>
+#include <openssl/mem.h>
+
+#include "../../internal.h"
+#include "internal.h"
+
+
+struct ccm128_state {
+  union {
+    uint64_t u[2];
+    uint8_t c[16];
+  } nonce, cmac;
+};
+
+int CRYPTO_ccm128_init(CCM128_CONTEXT *ctx, const AES_KEY *key,
+                       block128_f block, ctr128_f ctr, unsigned M, unsigned L) {
+  if (M < 4 || M > 16 || (M & 1) != 0 || L < 2 || L > 8) {
+    return 0;
+  }
+  ctx->block = block;
+  ctx->ctr = ctr;
+  ctx->M = M;
+  ctx->L = L;
+  return 1;
+}
+
+size_t CRYPTO_ccm128_max_input(const CCM128_CONTEXT *ctx) {
+  return ctx->L >= sizeof(size_t) ? (size_t)-1
+                                  : (((size_t)1) << (ctx->L * 8)) - 1;
+}
+
+static int ccm128_init_state(const CCM128_CONTEXT *ctx,
+                             struct ccm128_state *state, const AES_KEY *key,
+                             const uint8_t *nonce, size_t nonce_len,
+                             const uint8_t *aad, size_t aad_len,
+                             size_t plaintext_len) {
+  const block128_f block = ctx->block;
+  const unsigned M = ctx->M;
+  const unsigned L = ctx->L;
+
+  // |L| determines the expected |nonce_len| and the limit for |plaintext_len|.
+  if (plaintext_len > CRYPTO_ccm128_max_input(ctx) ||
+      nonce_len != 15 - L) {
+    return 0;
+  }
+
+  // Assemble the first block for computing the MAC.
+  OPENSSL_memset(state, 0, sizeof(*state));
+  state->nonce.c[0] = (uint8_t)((L - 1) | ((M - 2) / 2) << 3);
+  if (aad_len != 0) {
+    state->nonce.c[0] |= 0x40;  // Set AAD Flag
+  }
+  OPENSSL_memcpy(&state->nonce.c[1], nonce, nonce_len);
+  for (unsigned i = 0; i < L; i++) {
+    state->nonce.c[15 - i] = (uint8_t)(plaintext_len >> (8 * i));
+  }
+
+  (*block)(state->nonce.c, state->cmac.c, key);
+  size_t blocks = 1;
+
+  if (aad_len != 0) {
+    unsigned i;
+    // Cast to u64 to avoid the compiler complaining about invalid shifts.
+    uint64_t aad_len_u64 = aad_len;
+    if (aad_len_u64 < 0x10000 - 0x100) {
+      state->cmac.c[0] ^= (uint8_t)(aad_len_u64 >> 8);
+      state->cmac.c[1] ^= (uint8_t)aad_len_u64;
+      i = 2;
+    } else if (aad_len_u64 <= 0xffffffff) {
+      state->cmac.c[0] ^= 0xff;
+      state->cmac.c[1] ^= 0xfe;
+      state->cmac.c[2] ^= (uint8_t)(aad_len_u64 >> 24);
+      state->cmac.c[3] ^= (uint8_t)(aad_len_u64 >> 16);
+      state->cmac.c[4] ^= (uint8_t)(aad_len_u64 >> 8);
+      state->cmac.c[5] ^= (uint8_t)aad_len_u64;
+      i = 6;
+    } else {
+      state->cmac.c[0] ^= 0xff;
+      state->cmac.c[1] ^= 0xff;
+      state->cmac.c[2] ^= (uint8_t)(aad_len_u64 >> 56);
+      state->cmac.c[3] ^= (uint8_t)(aad_len_u64 >> 48);
+      state->cmac.c[4] ^= (uint8_t)(aad_len_u64 >> 40);
+      state->cmac.c[5] ^= (uint8_t)(aad_len_u64 >> 32);
+      state->cmac.c[6] ^= (uint8_t)(aad_len_u64 >> 24);
+      state->cmac.c[7] ^= (uint8_t)(aad_len_u64 >> 16);
+      state->cmac.c[8] ^= (uint8_t)(aad_len_u64 >> 8);
+      state->cmac.c[9] ^= (uint8_t)aad_len_u64;
+      i = 10;
+    }
+
+    do {
+      for (; i < 16 && aad_len != 0; i++) {
+        state->cmac.c[i] ^= *aad;
+        aad++;
+        aad_len--;
+      }
+      (*block)(state->cmac.c, state->cmac.c, key);
+      blocks++;
+      i = 0;
+    } while (aad_len != 0);
+  }
+
+  // Per RFC 3610, section 2.6, the total number of block cipher operations done
+  // must not exceed 2^61. There are two block cipher operations remaining per
+  // message block, plus one block at the end to encrypt the MAC.
+  size_t remaining_blocks = 2 * ((plaintext_len + 15) / 16) + 1;
+  if (plaintext_len + 15 < plaintext_len ||
+      remaining_blocks + blocks < blocks ||
+      (uint64_t) remaining_blocks + blocks > UINT64_C(1) << 61) {
+    return 0;
+  }
+
+  // Assemble the first block for encrypting and decrypting. The bottom |L|
+  // bytes are replaced with a counter and all bit the encoding of |L| is
+  // cleared in the first byte.
+  state->nonce.c[0] &= 7;
+  return 1;
+}
+
+static int ccm128_encrypt(const CCM128_CONTEXT *ctx, struct ccm128_state *state,
+                          const AES_KEY *key, uint8_t *out, const uint8_t *in,
+                          size_t len) {
+  // The counter for encryption begins at one.
+  for (unsigned i = 0; i < ctx->L; i++) {
+    state->nonce.c[15 - i] = 0;
+  }
+  state->nonce.c[15] = 1;
+
+  uint8_t partial_buf[16];
+  unsigned num = 0;
+  if (ctx->ctr != NULL) {
+    CRYPTO_ctr128_encrypt_ctr32(in, out, len, key, state->nonce.c, partial_buf,
+                                &num, ctx->ctr);
+  } else {
+    CRYPTO_ctr128_encrypt(in, out, len, key, state->nonce.c, partial_buf, &num,
+                          ctx->block);
+  }
+  return 1;
+}
+
+static int ccm128_compute_mac(const CCM128_CONTEXT *ctx,
+                              struct ccm128_state *state, const AES_KEY *key,
+                              uint8_t *out_tag, size_t tag_len,
+                              const uint8_t *in, size_t len) {
+  block128_f block = ctx->block;
+  if (tag_len != ctx->M) {
+    return 0;
+  }
+
+  // Incorporate |in| into the MAC.
+  union {
+    uint64_t u[2];
+    uint8_t c[16];
+  } tmp;
+  while (len >= 16) {
+    OPENSSL_memcpy(tmp.c, in, 16);
+    state->cmac.u[0] ^= tmp.u[0];
+    state->cmac.u[1] ^= tmp.u[1];
+    (*block)(state->cmac.c, state->cmac.c, key);
+    in += 16;
+    len -= 16;
+  }
+  if (len > 0) {
+    for (size_t i = 0; i < len; i++) {
+      state->cmac.c[i] ^= in[i];
+    }
+    (*block)(state->cmac.c, state->cmac.c, key);
+  }
+
+  // Encrypt the MAC with counter zero.
+  for (unsigned i = 0; i < ctx->L; i++) {
+    state->nonce.c[15 - i] = 0;
+  }
+  (*block)(state->nonce.c, tmp.c, key);
+  state->cmac.u[0] ^= tmp.u[0];
+  state->cmac.u[1] ^= tmp.u[1];
+
+  OPENSSL_memcpy(out_tag, state->cmac.c, tag_len);
+  return 1;
+}
+
+int CRYPTO_ccm128_encrypt(const CCM128_CONTEXT *ctx, const AES_KEY *key,
+                          uint8_t *out, uint8_t *out_tag, size_t tag_len,
+                          const uint8_t *nonce, size_t nonce_len,
+                          const uint8_t *in, size_t len, const uint8_t *aad,
+                          size_t aad_len) {
+  struct ccm128_state state;
+  return ccm128_init_state(ctx, &state, key, nonce, nonce_len, aad, aad_len,
+                           len) &&
+         ccm128_compute_mac(ctx, &state, key, out_tag, tag_len, in, len) &&
+         ccm128_encrypt(ctx, &state, key, out, in, len);
+}
+
+int CRYPTO_ccm128_decrypt(const CCM128_CONTEXT *ctx, const AES_KEY *key,
+                          uint8_t *out, uint8_t *out_tag, size_t tag_len,
+                          const uint8_t *nonce, size_t nonce_len,
+                          const uint8_t *in, size_t len, const uint8_t *aad,
+                          size_t aad_len) {
+  struct ccm128_state state;
+  return ccm128_init_state(ctx, &state, key, nonce, nonce_len, aad, aad_len,
+                           len) &&
+         ccm128_encrypt(ctx, &state, key, out, in, len) &&
+         ccm128_compute_mac(ctx, &state, key, out_tag, tag_len, out, len);
+}
diff --git a/src/crypto/fipsmodule/modes/internal.h b/src/crypto/fipsmodule/modes/internal.h
index 0971a90..dec1e56 100644
--- a/src/crypto/fipsmodule/modes/internal.h
+++ b/src/crypto/fipsmodule/modes/internal.h
@@ -345,6 +345,42 @@
 #endif  // GHASH_ASM
 
 
+// CCM.
+
+typedef struct ccm128_context {
+  block128_f block;
+  ctr128_f ctr;
+  unsigned M, L;
+} CCM128_CONTEXT;
+
+// CRYPTO_ccm128_init initialises |ctx| to use |block| (typically AES) with the
+// specified |M| and |L| parameters. It returns one on success and zero if |M|
+// or |L| is invalid.
+int CRYPTO_ccm128_init(CCM128_CONTEXT *ctx, const AES_KEY *key,
+                       block128_f block, ctr128_f ctr, unsigned M, unsigned L);
+
+// CRYPTO_ccm128_max_input returns the maximum input length accepted by |ctx|.
+size_t CRYPTO_ccm128_max_input(const CCM128_CONTEXT *ctx);
+
+// CRYPTO_ccm128_encrypt encrypts |len| bytes from |in| to |out| writing the tag
+// to |out_tag|. |key| must be the same key that was passed to
+// |CRYPTO_ccm128_init|. It returns one on success and zero otherwise.
+int CRYPTO_ccm128_encrypt(const CCM128_CONTEXT *ctx, const AES_KEY *key,
+                          uint8_t *out, uint8_t *out_tag, size_t tag_len,
+                          const uint8_t *nonce, size_t nonce_len,
+                          const uint8_t *in, size_t len, const uint8_t *aad,
+                          size_t aad_len);
+
+// CRYPTO_ccm128_decrypt decrypts |len| bytes from |in| to |out|, writing the
+// expected tag to |out_tag|. |key| must be the same key that was passed to
+// |CRYPTO_ccm128_init|. It returns one on success and zero otherwise.
+int CRYPTO_ccm128_decrypt(const CCM128_CONTEXT *ctx, const AES_KEY *key,
+                          uint8_t *out, uint8_t *out_tag, size_t tag_len,
+                          const uint8_t *nonce, size_t nonce_len,
+                          const uint8_t *in, size_t len, const uint8_t *aad,
+                          size_t aad_len);
+
+
 // CBC.
 
 // cbc128_f is the type of a function that performs CBC-mode encryption.
diff --git a/src/crypto/fipsmodule/rand/asm/rdrand-x86_64.pl b/src/crypto/fipsmodule/rand/asm/rdrand-x86_64.pl
index eb2a592..76b5f9b 100644
--- a/src/crypto/fipsmodule/rand/asm/rdrand-x86_64.pl
+++ b/src/crypto/fipsmodule/rand/asm/rdrand-x86_64.pl
@@ -84,4 +84,4 @@
 .size CRYPTO_rdrand_multiple8_buf,.-CRYPTO_rdrand_multiple8_buf
 ___
 
-close STDOUT or die "error closing STDOUT";	# flush
+close STDOUT;	# flush
diff --git a/src/crypto/fipsmodule/rsa/rsa_impl.c b/src/crypto/fipsmodule/rsa/rsa_impl.c
index ab2abe9..903ba9a 100644
--- a/src/crypto/fipsmodule/rsa/rsa_impl.c
+++ b/src/crypto/fipsmodule/rsa/rsa_impl.c
@@ -554,7 +554,7 @@
   if (!ret) {
     OPENSSL_PUT_ERROR(RSA, RSA_R_PADDING_CHECK_FAILED);
   } else {
-    CONSTTIME_DECLASSIFY(out, *out_len);
+    CONSTTIME_DECLASSIFY(out, out_len);
   }
 
 err:
diff --git a/src/crypto/fipsmodule/sha/asm/sha1-586.pl b/src/crypto/fipsmodule/sha/asm/sha1-586.pl
index bf42961..87fd361 100644
--- a/src/crypto/fipsmodule/sha/asm/sha1-586.pl
+++ b/src/crypto/fipsmodule/sha/asm/sha1-586.pl
@@ -1483,4 +1483,4 @@
 
 &asm_finish();
 
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
diff --git a/src/crypto/fipsmodule/sha/asm/sha1-armv4-large.pl b/src/crypto/fipsmodule/sha/asm/sha1-armv4-large.pl
index ca82514..27187dd 100644
--- a/src/crypto/fipsmodule/sha/asm/sha1-armv4-large.pl
+++ b/src/crypto/fipsmodule/sha/asm/sha1-armv4-large.pl
@@ -740,4 +740,4 @@
 	print $_,$/;
 }
 
-close STDOUT or die "error closing STDOUT"; # enforce flush
+close STDOUT; # enforce flush
diff --git a/src/crypto/fipsmodule/sha/asm/sha1-armv8.pl b/src/crypto/fipsmodule/sha/asm/sha1-armv8.pl
index c147462..7c8880f 100644
--- a/src/crypto/fipsmodule/sha/asm/sha1-armv8.pl
+++ b/src/crypto/fipsmodule/sha/asm/sha1-armv8.pl
@@ -353,4 +353,4 @@
 	print $_,"\n";
 }
 
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
diff --git a/src/crypto/fipsmodule/sha/asm/sha1-x86_64.pl b/src/crypto/fipsmodule/sha/asm/sha1-x86_64.pl
index df7cbc3..fd4ff2a 100755
--- a/src/crypto/fipsmodule/sha/asm/sha1-x86_64.pl
+++ b/src/crypto/fipsmodule/sha/asm/sha1-x86_64.pl
@@ -2122,4 +2122,4 @@
 
 	print $_,"\n";
 }
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
diff --git a/src/crypto/fipsmodule/sha/asm/sha256-586.pl b/src/crypto/fipsmodule/sha/asm/sha256-586.pl
index 240a604..129a9f4 100644
--- a/src/crypto/fipsmodule/sha/asm/sha256-586.pl
+++ b/src/crypto/fipsmodule/sha/asm/sha256-586.pl
@@ -1287,4 +1287,4 @@
 
 &asm_finish();
 
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
diff --git a/src/crypto/fipsmodule/sha/asm/sha256-armv4.pl b/src/crypto/fipsmodule/sha/asm/sha256-armv4.pl
index 15d78de..e5ecdfd 100644
--- a/src/crypto/fipsmodule/sha/asm/sha256-armv4.pl
+++ b/src/crypto/fipsmodule/sha/asm/sha256-armv4.pl
@@ -735,4 +735,4 @@
 	print $_,"\n";
 }
 
-close STDOUT or die "error closing STDOUT"; # enforce flush
+close STDOUT; # enforce flush
diff --git a/src/crypto/fipsmodule/sha/asm/sha512-586.pl b/src/crypto/fipsmodule/sha/asm/sha512-586.pl
index 01acf67..25a5f25 100644
--- a/src/crypto/fipsmodule/sha/asm/sha512-586.pl
+++ b/src/crypto/fipsmodule/sha/asm/sha512-586.pl
@@ -922,4 +922,4 @@
 
 &asm_finish();
 
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
diff --git a/src/crypto/fipsmodule/sha/asm/sha512-armv4.pl b/src/crypto/fipsmodule/sha/asm/sha512-armv4.pl
index c8c715e..cc247a4 100644
--- a/src/crypto/fipsmodule/sha/asm/sha512-armv4.pl
+++ b/src/crypto/fipsmodule/sha/asm/sha512-armv4.pl
@@ -670,4 +670,4 @@
 close SELF;
 
 print $code;
-close STDOUT or die "error closing STDOUT"; # enforce flush
+close STDOUT; # enforce flush
diff --git a/src/crypto/fipsmodule/sha/asm/sha512-armv8.pl b/src/crypto/fipsmodule/sha/asm/sha512-armv8.pl
index 64306be..3f69071 100644
--- a/src/crypto/fipsmodule/sha/asm/sha512-armv8.pl
+++ b/src/crypto/fipsmodule/sha/asm/sha512-armv8.pl
@@ -457,4 +457,4 @@
 	print $_,"\n";
 }
 
-close STDOUT or die "error closing STDOUT";
+close STDOUT;
diff --git a/src/crypto/fipsmodule/sha/asm/sha512-x86_64.pl b/src/crypto/fipsmodule/sha/asm/sha512-x86_64.pl
index 8c5a5f3..4927850 100755
--- a/src/crypto/fipsmodule/sha/asm/sha512-x86_64.pl
+++ b/src/crypto/fipsmodule/sha/asm/sha512-x86_64.pl
@@ -278,6 +278,7 @@
 	jnz	_shaext_shortcut
 ___
     # XOP codepath removed.
+___
 $code.=<<___ if ($avx>1);
 	and	\$`1<<8|1<<5|1<<3`,%r11d	# check for BMI2+AVX2+BMI1
 	cmp	\$`1<<8|1<<5|1<<3`,%r11d
@@ -2082,4 +2083,4 @@
 
 	print $_,"\n";
 }
-close STDOUT or die "error closing STDOUT";
+close STDOUT;