external/boringssl: Sync to c3889634a1aa52575c5d26497696238208fbd0f5.

This includes the following changes:

https://boringssl.googlesource.com/boringssl/+log/41c10e2b5f37edce8b9f292f7f3bacb7e30e25c4..c3889634a1aa52575c5d26497696238208fbd0f5

Test: atest CtsLibcoreTestCases
Change-Id: Ia1c2941ccf58a9e0d736b3409a2d13c21603a205
diff --git a/src/crypto/test/CMakeLists.txt b/src/crypto/test/CMakeLists.txt
index 3e02c3c..b968fd7 100644
--- a/src/crypto/test/CMakeLists.txt
+++ b/src/crypto/test/CMakeLists.txt
@@ -1,15 +1,24 @@
 add_library(
-  test_support
+  test_support_lib
 
-  OBJECT
+  STATIC
 
+  abi_test.cc
   file_test.cc
   malloc.cc
   test_util.cc
   wycheproof_util.cc
 )
 
-add_dependencies(test_support global_target)
+if (LIBUNWIND_FOUND)
+  target_compile_options(test_support_lib PRIVATE ${LIBUNWIND_CFLAGS_OTHER})
+  target_include_directories(test_support_lib PRIVATE ${LIBUNWIND_INCLUDE_DIRS})
+  target_link_libraries(test_support_lib ${LIBUNWIND_LDFLAGS})
+endif()
+if(WIN32)
+  target_link_libraries(test_support_lib dbghelp)
+endif()
+add_dependencies(test_support_lib global_target)
 
 add_library(
   boringssl_gtest_main
diff --git a/src/crypto/test/abi_test.cc b/src/crypto/test/abi_test.cc
new file mode 100644
index 0000000..9844c73
--- /dev/null
+++ b/src/crypto/test/abi_test.cc
@@ -0,0 +1,786 @@
+/* Copyright (c) 2018, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include "abi_test.h"
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <array>
+
+#include <openssl/buf.h>
+#include <openssl/mem.h>
+#include <openssl/rand.h>
+#include <openssl/span.h>
+
+#if defined(OPENSSL_X86_64) && defined(SUPPORTS_ABI_TEST)
+#if defined(OPENSSL_LINUX) && defined(BORINGSSL_HAVE_LIBUNWIND)
+#define SUPPORTS_UNWIND_TEST
+#define UNW_LOCAL_ONLY
+#include <errno.h>
+#include <fcntl.h>
+#include <libunwind.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#elif defined(OPENSSL_WINDOWS)
+#define SUPPORTS_UNWIND_TEST
+OPENSSL_MSVC_PRAGMA(warning(push, 3))
+#include <windows.h>
+#include <dbghelp.h>
+OPENSSL_MSVC_PRAGMA(warning(pop))
+#endif
+#endif  // X86_64 && SUPPORTS_ABI_TEST
+
+
+namespace abi_test {
+
+namespace internal {
+
+static bool g_unwind_tests_enabled = false;
+
+std::string FixVAArgsString(const char *str) {
+  std::string ret = str;
+  size_t idx = ret.find(',');
+  if (idx == std::string::npos) {
+    return ret + "()";
+  }
+  size_t idx2 = idx + 1;
+  while (idx2 < ret.size() && ret[idx2] == ' ') {
+    idx2++;
+  }
+  while (idx > 0 && ret[idx - 1] == ' ') {
+    idx--;
+  }
+  return ret.substr(0, idx) + "(" + ret.substr(idx2) + ")";
+}
+
+#if defined(SUPPORTS_ABI_TEST)
+// ForEachMismatch calls |func| for each register where |a| and |b| differ.
+template <typename Func>
+static void ForEachMismatch(const CallerState &a, const CallerState &b,
+                            const Func &func) {
+#define CALLER_STATE_REGISTER(type, name) \
+  if (a.name != b.name) {                 \
+    func(#name);                          \
+  }
+  LOOP_CALLER_STATE_REGISTERS()
+#undef CALLER_STATE_REGISTER
+}
+
+// ReadUnwindResult adds the results of the most recent unwind test to |out|.
+static void ReadUnwindResult(Result *out);
+
+crypto_word_t RunTrampoline(Result *out, crypto_word_t func,
+                            const crypto_word_t *argv, size_t argc,
+                            bool unwind) {
+  CallerState state;
+  RAND_bytes(reinterpret_cast<uint8_t *>(&state), sizeof(state));
+
+  unwind &= g_unwind_tests_enabled;
+  CallerState state2 = state;
+  crypto_word_t ret = abi_test_trampoline(func, &state2, argv, argc, unwind);
+#if defined(OPENSSL_X86_64) || defined(OPENSSL_X86)
+  // Query and clear the direction flag early, so negative tests do not
+  // interfere with |malloc|.
+  bool direction_flag = abi_test_get_and_clear_direction_flag();
+#endif  // OPENSSL_X86_64 || OPENSSL_X86
+
+  *out = Result();
+  ForEachMismatch(state, state2, [&](const char *reg) {
+    out->errors.push_back(std::string(reg) + " was not restored after return");
+  });
+#if defined(OPENSSL_X86_64) || defined(OPENSSL_X86)
+  // Linux and Windows ABIs for x86 require the direction flag be cleared on
+  // return. (Some OpenSSL assembly preserves it, which is stronger, but we only
+  // require what is specified by the ABI so |CHECK_ABI| works with C compiler
+  // output.)
+  if (direction_flag) {
+    out->errors.emplace_back("Direction flag set after return");
+  }
+#endif  // OPENSSL_X86_64 || OPENSSL_X86
+  if (unwind) {
+    ReadUnwindResult(out);
+  }
+  return ret;
+}
+#endif  // SUPPORTS_ABI_TEST
+
+#if defined(SUPPORTS_UNWIND_TEST)
+// We test unwind metadata by running the function under test with the trap flag
+// set. This results in |SIGTRAP| and |EXCEPTION_SINGLE_STEP| on Linux and
+// Windows, respectively. We hande these and verify libunwind or the Windows
+// unwind APIs unwind successfully.
+
+// IsAncestorStackFrame returns true if |a_sp| is an ancestor stack frame of
+// |b_sp|.
+static bool IsAncestorStackFrame(crypto_word_t a_sp, crypto_word_t b_sp) {
+#if defined(OPENSSL_X86_64)
+  // The stack grows down, so ancestor stack frames have higher addresses.
+  return a_sp > b_sp;
+#else
+#error "unknown architecture"
+#endif
+}
+
+// Implement some string formatting utilties. Ideally we would use |snprintf|,
+// but this is called in a signal handler and |snprintf| is not async-signal-
+// safe.
+
+#if !defined(OPENSSL_WINDOWS)
+static std::array<char, DECIMAL_SIZE(crypto_word_t) + 1> WordToDecimal(
+    crypto_word_t v) {
+  std::array<char, DECIMAL_SIZE(crypto_word_t) + 1> ret;
+  size_t len = 0;
+  do {
+    ret[len++] = '0' + v % 10;
+    v /= 10;
+  } while (v != 0);
+  for (size_t i = 0; i < len / 2; i++) {
+    std::swap(ret[i], ret[len - 1 - i]);
+  }
+  ret[len] = '\0';
+  return ret;
+}
+#endif  // !OPENSSL_WINDOWS
+
+static std::array<char, sizeof(crypto_word_t) * 2 + 1> WordToHex(
+    crypto_word_t v) {
+  static const char kHex[] = "0123456789abcdef";
+  std::array<char, sizeof(crypto_word_t) * 2 + 1> ret;
+  for (size_t i = sizeof(crypto_word_t) - 1; i < sizeof(crypto_word_t); i--) {
+    uint8_t b = v & 0xff;
+    v >>= 8;
+    ret[i * 2] = kHex[b >> 4];
+    ret[i * 2 + 1] = kHex[b & 0xf];
+  }
+  ret[sizeof(crypto_word_t) * 2] = '\0';
+  return ret;
+}
+
+static void StrCatSignalSafeImpl(bssl::Span<char> out) {}
+
+template <typename... Args>
+static void StrCatSignalSafeImpl(bssl::Span<char> out, const char *str,
+                                 Args... args) {
+  BUF_strlcat(out.data(), str, out.size());
+  StrCatSignalSafeImpl(out, args...);
+}
+
+template <typename... Args>
+static void StrCatSignalSafe(bssl::Span<char> out, Args... args) {
+  if (out.empty()) {
+    return;
+  }
+  out[0] = '\0';
+  StrCatSignalSafeImpl(out, args...);
+}
+
+template <typename... Args>
+[[noreturn]] static void FatalError(Args... args) {
+  // We cannot use |snprintf| here because it is not async-signal-safe.
+  char buf[512];
+  StrCatSignalSafe(buf, args..., "\n");
+#if defined(OPENSSL_WINDOWS)
+  HANDLE stderr_handle = GetStdHandle(STD_ERROR_HANDLE);
+  if (stderr_handle != INVALID_HANDLE_VALUE) {
+    DWORD unused;
+    WriteFile(stderr_handle, buf, strlen(buf), &unused, nullptr);
+  }
+#else
+  write(STDERR_FILENO, buf, strlen(buf));
+#endif
+  abort();
+}
+
+class UnwindStatus {
+ public:
+  UnwindStatus() : err_(nullptr) {}
+  explicit UnwindStatus(const char *err) : err_(err) {}
+
+  bool ok() const { return err_ == nullptr; }
+  const char *Error() const { return err_; }
+
+ private:
+  const char *err_;
+};
+
+template<typename T>
+class UnwindStatusOr {
+ public:
+  UnwindStatusOr(UnwindStatus status) : status_(status) {
+    assert(!status_.ok());
+  }
+
+  UnwindStatusOr(const T &value) : status_(UnwindStatus()), value_(value) {}
+
+  bool ok() const { return status_.ok(); }
+  const char *Error() const { return status_.Error(); }
+
+  const T &ValueOrDie(const char *msg = "Unexpected error") const {
+    if (!ok()) {
+      FatalError(msg, ": ", Error());
+    }
+    return value_;
+  }
+
+ private:
+  UnwindStatus status_;
+  T value_;
+};
+
+// UnwindCursor abstracts between libunwind and Windows unwind APIs. It is
+// async-signal-safe.
+#if defined(OPENSSL_WINDOWS)
+class UnwindCursor {
+ public:
+  explicit UnwindCursor(const CONTEXT &ctx) : ctx_(ctx) {
+    starting_ip_ = ctx_.Rip;
+  }
+
+  crypto_word_t starting_ip() const { return starting_ip_; }
+
+  // Step unwinds the cursor by one frame. On success, it returns whether there
+  // were more frames to unwind.
+  UnwindStatusOr<bool> Step() {
+    bool is_top = is_top_;
+    is_top_ = false;
+
+    DWORD64 image_base;
+    RUNTIME_FUNCTION *entry =
+        RtlLookupFunctionEntry(ctx_.Rip, &image_base, nullptr);
+    if (entry == nullptr) {
+      // This is a leaf function. Leaf functions do not touch stack or
+      // callee-saved registers, so they may be unwound by simulating a ret.
+      if (!is_top) {
+        return UnwindStatus("leaf function found below the top frame");
+      }
+      memcpy(&ctx_.Rip, reinterpret_cast<const void *>(ctx_.Rsp),
+             sizeof(ctx_.Rip));
+      ctx_.Rsp += 8;
+      return true;
+    }
+
+    // This is a frame function. Call into the Windows unwinder.
+    void *handler_data;
+    DWORD64 establisher_frame;
+    RtlVirtualUnwind(UNW_FLAG_NHANDLER, image_base, ctx_.Rip, entry, &ctx_,
+                     &handler_data, &establisher_frame, nullptr);
+    return ctx_.Rip != 0;
+  }
+
+  // GetIP returns the instruction pointer at the current frame.
+  UnwindStatusOr<crypto_word_t> GetIP() { return ctx_.Rip; }
+
+  // GetSP returns the stack pointer at the current frame.
+  UnwindStatusOr<crypto_word_t> GetSP() { return ctx_.Rsp; }
+
+  // GetCallerState returns the callee-saved registers at the current frame.
+  UnwindStatusOr<CallerState> GetCallerState() {
+    CallerState state;
+    state.rbx = ctx_.Rbx;
+    state.rbp = ctx_.Rbp;
+    state.rdi = ctx_.Rdi;
+    state.rsi = ctx_.Rsi;
+    state.r12 = ctx_.R12;
+    state.r13 = ctx_.R13;
+    state.r14 = ctx_.R14;
+    state.r15 = ctx_.R15;
+    memcpy(&state.xmm6, &ctx_.Xmm6, sizeof(Reg128));
+    memcpy(&state.xmm7, &ctx_.Xmm7, sizeof(Reg128));
+    memcpy(&state.xmm8, &ctx_.Xmm8, sizeof(Reg128));
+    memcpy(&state.xmm9, &ctx_.Xmm9, sizeof(Reg128));
+    memcpy(&state.xmm10, &ctx_.Xmm10, sizeof(Reg128));
+    memcpy(&state.xmm11, &ctx_.Xmm11, sizeof(Reg128));
+    memcpy(&state.xmm12, &ctx_.Xmm12, sizeof(Reg128));
+    memcpy(&state.xmm13, &ctx_.Xmm13, sizeof(Reg128));
+    memcpy(&state.xmm14, &ctx_.Xmm14, sizeof(Reg128));
+    memcpy(&state.xmm15, &ctx_.Xmm15, sizeof(Reg128));
+    return state;
+  }
+
+  // ToString returns a human-readable representation of the address the cursor
+  // started at.
+  const char *ToString() {
+    StrCatSignalSafe(starting_ip_buf_, "0x", WordToHex(starting_ip_).data());
+    return starting_ip_buf_;
+  }
+
+ private:
+  CONTEXT ctx_;
+  crypto_word_t starting_ip_;
+  char starting_ip_buf_[64];
+  bool is_top_ = true;
+};
+#else  // !OPENSSL_WINDOWS
+class UnwindCursor {
+ public:
+  explicit UnwindCursor(unw_context_t *ctx) : ctx_(ctx) {
+    int ret = InitAtSignalFrame(&cursor_);
+    if (ret < 0) {
+      FatalError("Error getting unwind context: ", unw_strerror(ret));
+    }
+    starting_ip_ = GetIP().ValueOrDie("Error getting instruction pointer");
+  }
+
+  // Step unwinds the cursor by one frame. On success, it returns whether there
+  // were more frames to unwind.
+  UnwindStatusOr<bool> Step() {
+    int ret = unw_step(&cursor_);
+    if (ret < 0) {
+      return UNWError(ret);
+    }
+    return ret != 0;
+  }
+
+  // GetIP returns the instruction pointer at the current frame.
+  UnwindStatusOr<crypto_word_t> GetIP() {
+    crypto_word_t ip;
+    int ret = GetReg(&ip, UNW_REG_IP);
+    if (ret < 0) {
+      return UNWError(ret);
+    }
+    return ip;
+  }
+
+  // GetSP returns the stack pointer at the current frame.
+  UnwindStatusOr<crypto_word_t> GetSP() {
+    crypto_word_t sp;
+    int ret = GetReg(&sp, UNW_REG_SP);
+    if (ret < 0) {
+      return UNWError(ret);
+    }
+    return sp;
+  }
+
+  // GetCallerState returns the callee-saved registers at the current frame.
+  UnwindStatusOr<CallerState> GetCallerState() {
+    CallerState state;
+    int ret = 0;
+#if defined(OPENSSL_X86_64)
+    ret = ret < 0 ? ret : GetReg(&state.rbx, UNW_X86_64_RBX);
+    ret = ret < 0 ? ret : GetReg(&state.rbp, UNW_X86_64_RBP);
+    ret = ret < 0 ? ret : GetReg(&state.r12, UNW_X86_64_R12);
+    ret = ret < 0 ? ret : GetReg(&state.r13, UNW_X86_64_R13);
+    ret = ret < 0 ? ret : GetReg(&state.r14, UNW_X86_64_R14);
+    ret = ret < 0 ? ret : GetReg(&state.r15, UNW_X86_64_R15);
+#else
+#error "unknown architecture"
+#endif
+    if (ret < 0) {
+      return UNWError(ret);
+    }
+    return state;
+  }
+
+  // ToString returns a human-readable representation of the address the cursor
+  // started at, using debug information if available.
+  const char *ToString() {
+    // Use a new cursor. |cursor_| has already been unwound, and
+    // |unw_get_proc_name| is slow so we do not sample it unconditionally in the
+    // constructor.
+    unw_cursor_t cursor;
+    unw_word_t off;
+    if (InitAtSignalFrame(&cursor) != 0 ||
+        unw_get_proc_name(&cursor, starting_ip_buf_, sizeof(starting_ip_buf_),
+                          &off) != 0) {
+      StrCatSignalSafe(starting_ip_buf_, "0x", WordToHex(starting_ip_).data());
+      return starting_ip_buf_;
+    }
+    size_t len = strlen(starting_ip_buf_);
+    // Print the offset in decimal, to match gdb's disassembly output and ease
+    // debugging.
+    StrCatSignalSafe(bssl::Span<char>(starting_ip_buf_).subspan(len), "+",
+                     WordToDecimal(off).data(), " (0x",
+                     WordToHex(starting_ip_).data(), ")");
+    return starting_ip_buf_;
+  }
+
+ private:
+  static UnwindStatus UNWError(int ret) {
+    assert(ret < 0);
+    const char *msg = unw_strerror(ret);
+    return UnwindStatus(msg == nullptr ? "unknown error" : msg);
+  }
+
+  int InitAtSignalFrame(unw_cursor_t *cursor) {
+    // Work around a bug in libunwind which breaks rax and rdx recovery. This
+    // breaks functions which temporarily use rax as the CFA register. See
+    // https://git.savannah.gnu.org/gitweb/?p=libunwind.git;a=commit;h=819bf51bbd2da462c2ec3401e8ac9153b6e725e3
+    OPENSSL_memset(cursor, 0, sizeof(*cursor));
+    int ret = unw_init_local(cursor, ctx_);
+    if (ret < 0) {
+      return ret;
+    }
+    for (;;) {
+      ret = unw_is_signal_frame(cursor);
+      if (ret < 0) {
+        return ret;
+      }
+      if (ret != 0) {
+        return 0;  // Found the signal frame.
+      }
+      ret = unw_step(cursor);
+      if (ret < 0) {
+        return ret;
+      }
+    }
+  }
+
+  int GetReg(crypto_word_t *out, unw_regnum_t reg) {
+    unw_word_t val;
+    int ret = unw_get_reg(&cursor_, reg, &val);
+    if (ret >= 0) {
+      static_assert(sizeof(crypto_word_t) == sizeof(unw_word_t),
+                    "crypto_word_t and unw_word_t are inconsistent");
+      *out = val;
+    }
+    return ret;
+  }
+
+  unw_context_t *ctx_;
+  unw_cursor_t cursor_;
+  crypto_word_t starting_ip_;
+  char starting_ip_buf_[64];
+};
+#endif  // OPENSSL_WINDOWS
+
+// g_in_trampoline is true if we are in an instrumented |abi_test_trampoline|
+// call, in the region that triggers |SIGTRAP|.
+static bool g_in_trampoline = false;
+// g_unwind_function_done, if |g_in_trampoline| is true, is whether the function
+// under test has returned. It is undefined otherwise.
+static bool g_unwind_function_done;
+// g_trampoline_state, if |g_in_trampoline| is true, is the state the function
+// under test must preserve. It is undefined otherwise.
+static CallerState g_trampoline_state;
+// g_trampoline_sp, if |g_in_trampoline| is true, is the stack pointer of the
+// trampoline frame. It is undefined otherwise.
+static crypto_word_t g_trampoline_sp;
+
+// kMaxUnwindErrors is the maximum number of unwind errors reported per
+// function. If a function's unwind tables are wrong, we are otherwise likely to
+// repeat the same error at multiple addresses.
+static constexpr size_t kMaxUnwindErrors = 10;
+
+// Errors are saved in a signal handler. We use a static buffer to avoid
+// allocation.
+static size_t g_num_unwind_errors = 0;
+
+struct UnwindError {
+#if defined(OPENSSL_WINDOWS)
+  crypto_word_t ip;
+#endif
+  char str[512];
+};
+
+static UnwindError g_unwind_errors[kMaxUnwindErrors];
+
+template <typename... Args>
+static void AddUnwindError(UnwindCursor *cursor, Args... args) {
+  if (g_num_unwind_errors >= kMaxUnwindErrors) {
+    return;
+  }
+#if defined(OPENSSL_WINDOWS)
+  // Windows symbol functions should not be called when handling an
+  // exception. Stash the instruction pointer, to be symbolized later.
+  g_unwind_errors[g_num_unwind_errors].ip = cursor->starting_ip();
+  StrCatSignalSafe(g_unwind_errors[g_num_unwind_errors].str, args...);
+#else
+  StrCatSignalSafe(g_unwind_errors[g_num_unwind_errors].str,
+                   "unwinding at ", cursor->ToString(), ": ", args...);
+#endif
+  g_num_unwind_errors++;
+}
+
+static void CheckUnwind(UnwindCursor *cursor) {
+  const crypto_word_t kStartAddress =
+      reinterpret_cast<crypto_word_t>(&abi_test_unwind_start);
+  const crypto_word_t kReturnAddress =
+      reinterpret_cast<crypto_word_t>(&abi_test_unwind_return);
+  const crypto_word_t kStopAddress =
+      reinterpret_cast<crypto_word_t>(&abi_test_unwind_stop);
+
+  crypto_word_t sp = cursor->GetSP().ValueOrDie("Error getting stack pointer");
+  crypto_word_t ip =
+      cursor->GetIP().ValueOrDie("Error getting instruction pointer");
+  if (!g_in_trampoline) {
+    if (ip != kStartAddress) {
+      FatalError("Unexpected SIGTRAP at ", cursor->ToString());
+    }
+
+    // Save the current state and begin.
+    g_in_trampoline = true;
+    g_unwind_function_done = false;
+    g_trampoline_sp = sp;
+    g_trampoline_state = cursor->GetCallerState().ValueOrDie(
+        "Error getting initial caller state");
+  } else {
+    if (sp == g_trampoline_sp || g_unwind_function_done) {
+      // |g_unwind_function_done| should imply |sp| is |g_trampoline_sp|, but
+      // clearing the trap flag in x86 briefly displaces the stack pointer.
+      //
+      // Also note we check both |ip| and |sp| below, in case the function under
+      // test is also |abi_test_trampoline|.
+      if (ip == kReturnAddress && sp == g_trampoline_sp) {
+        g_unwind_function_done = true;
+      }
+      if (ip == kStopAddress && sp == g_trampoline_sp) {
+        // |SIGTRAP| is fatal again.
+        g_in_trampoline = false;
+      }
+    } else if (IsAncestorStackFrame(sp, g_trampoline_sp)) {
+      // This should never happen. We went past |g_trampoline_sp| without
+      // stopping at |kStopAddress|.
+      AddUnwindError(cursor, "stack frame is before caller");
+      g_in_trampoline = false;
+    } else if (g_num_unwind_errors < kMaxUnwindErrors) {
+      for (;;) {
+        UnwindStatusOr<bool> step_ret = cursor->Step();
+        if (!step_ret.ok()) {
+          AddUnwindError(cursor, "error unwinding: ", step_ret.Error());
+          break;
+        }
+        // |Step| returns whether there was a frame to unwind.
+        if (!step_ret.ValueOrDie()) {
+          AddUnwindError(cursor, "could not unwind to starting frame");
+          break;
+        }
+
+        UnwindStatusOr<crypto_word_t> cur_sp = cursor->GetSP();
+        if (!cur_sp.ok()) {
+          AddUnwindError(cursor,
+                         "error recovering stack pointer: ", cur_sp.Error());
+          break;
+        }
+        if (IsAncestorStackFrame(cur_sp.ValueOrDie(), g_trampoline_sp)) {
+          AddUnwindError(cursor, "unwound past starting frame");
+          break;
+        }
+        if (cur_sp.ValueOrDie() == g_trampoline_sp) {
+          // We found the parent frame. Check the return address.
+          UnwindStatusOr<crypto_word_t> cur_ip = cursor->GetIP();
+          if (!cur_ip.ok()) {
+            AddUnwindError(cursor,
+                           "error recovering return address: ", cur_ip.Error());
+          } else if (cur_ip.ValueOrDie() != kReturnAddress) {
+            AddUnwindError(cursor, "wrong return address");
+          }
+
+          // Check the remaining registers.
+          UnwindStatusOr<CallerState> state = cursor->GetCallerState();
+          if (!state.ok()) {
+            AddUnwindError(cursor,
+                           "error recovering registers: ", state.Error());
+          } else {
+            ForEachMismatch(state.ValueOrDie(), g_trampoline_state,
+                            [&](const char *reg) {
+                              AddUnwindError(cursor, reg, " was not recovered");
+                            });
+          }
+          break;
+        }
+      }
+    }
+  }
+}
+
+static void ReadUnwindResult(Result *out) {
+  for (size_t i = 0; i < g_num_unwind_errors; i++) {
+#if defined(OPENSSL_WINDOWS)
+    const crypto_word_t ip = g_unwind_errors[i].ip;
+    char buf[256];
+    DWORD64 displacement;
+    struct {
+      SYMBOL_INFO info;
+      char name_buf[128];
+    } symbol;
+    memset(&symbol, 0, sizeof(symbol));
+    symbol.info.SizeOfStruct = sizeof(symbol.info);
+    symbol.info.MaxNameLen = sizeof(symbol.name_buf);
+    if (SymFromAddr(GetCurrentProcess(), ip, &displacement, &symbol.info)) {
+      snprintf(buf, sizeof(buf), "unwinding at %s+%llu (0x%s): %s",
+               symbol.info.Name, displacement, WordToHex(ip).data(),
+               g_unwind_errors[i].str);
+    } else {
+      snprintf(buf, sizeof(buf), "unwinding at 0x%s: %s",
+               WordToHex(ip).data(), g_unwind_errors[i].str);
+    }
+    out->errors.emplace_back(buf);
+#else
+    out->errors.emplace_back(g_unwind_errors[i].str);
+#endif
+  }
+  if (g_num_unwind_errors == kMaxUnwindErrors) {
+    out->errors.emplace_back("(additional errors omitted)");
+  }
+  g_num_unwind_errors = 0;
+}
+
+#if defined(OPENSSL_WINDOWS)
+static DWORD g_main_thread;
+
+static long ExceptionHandler(EXCEPTION_POINTERS *info) {
+  if (info->ExceptionRecord->ExceptionCode != EXCEPTION_SINGLE_STEP ||
+      GetCurrentThreadId() != g_main_thread) {
+    return EXCEPTION_CONTINUE_SEARCH;
+  }
+
+  UnwindCursor cursor(*info->ContextRecord);
+  CheckUnwind(&cursor);
+  if (g_in_trampoline) {
+    // Windows clears the trap flag, so we must restore it.
+    info->ContextRecord->EFlags |= 0x100;
+  }
+  return EXCEPTION_CONTINUE_EXECUTION;
+}
+
+static void EnableUnwindTestsImpl() {
+  if (IsDebuggerPresent()) {
+    // Unwind tests drive logic via |EXCEPTION_SINGLE_STEP|, which conflicts with
+    // debuggers.
+    fprintf(stderr, "Debugger detected. Disabling unwind tests.\n");
+    return;
+  }
+
+  g_main_thread = GetCurrentThreadId();
+
+  SymSetOptions(SYMOPT_DEFERRED_LOADS);
+  if (!SymInitialize(GetCurrentProcess(), nullptr, TRUE)) {
+    fprintf(stderr, "Could not initialize symbols.\n");
+  }
+
+  if (AddVectoredExceptionHandler(0, ExceptionHandler) == nullptr) {
+    fprintf(stderr, "Error installing exception handler.\n");
+    abort();
+  }
+
+  g_unwind_tests_enabled = true;
+}
+#else  // !OPENSSL_WINDOWS
+// HandleEINTR runs |func| and returns the result, retrying the operation on
+// |EINTR|.
+template <typename Func>
+static auto HandleEINTR(const Func &func) -> decltype(func()) {
+  decltype(func()) ret;
+  do {
+    ret = func();
+  } while (ret < 0 && errno == EINTR);
+  return ret;
+}
+
+static bool ReadFileToString(std::string *out, const char *path) {
+  out->clear();
+
+  int fd = HandleEINTR([&] { return open(path, O_RDONLY); });
+  if (fd < 0) {
+    return false;
+  }
+
+  for (;;) {
+    char buf[1024];
+    ssize_t ret = HandleEINTR([&] { return read(fd, buf, sizeof(buf)); });
+    if (ret < 0) {
+      close(fd);
+      return false;
+    }
+    if (ret == 0) {
+      close(fd);
+      return true;
+    }
+    out->append(buf, static_cast<size_t>(ret));
+  }
+}
+
+static bool IsBeingDebugged() {
+  std::string status;
+  if (!ReadFileToString(&status, "/proc/self/status")) {
+    perror("error reading /proc/self/status");
+    return false;
+  }
+  std::string key = "\nTracerPid:\t";
+  size_t idx = status.find(key);
+  if (idx == std::string::npos) {
+    return false;
+  }
+  idx += key.size();
+  return idx < status.size() && status[idx] != '0';
+}
+
+static pthread_t g_main_thread;
+
+static void TrapHandler(int sig) {
+  // Note this is a signal handler, so only async-signal-safe functions may be
+  // used here. See signal-safety(7). libunwind promises local unwind is
+  // async-signal-safe.
+
+  // |pthread_equal| is not listed as async-signal-safe, but this is clearly an
+  // oversight.
+  if (!pthread_equal(g_main_thread, pthread_self())) {
+    FatalError("SIGTRAP on background thread");
+  }
+
+  unw_context_t ctx;
+  int ret = unw_getcontext(&ctx);
+  if (ret < 0) {
+    FatalError("Error getting unwind context: ", unw_strerror(ret));
+  }
+
+  UnwindCursor cursor(&ctx);
+  CheckUnwind(&cursor);
+}
+
+static void EnableUnwindTestsImpl() {
+  if (IsBeingDebugged()) {
+    // Unwind tests drive logic via |SIGTRAP|, which conflicts with debuggers.
+    fprintf(stderr, "Debugger detected. Disabling unwind tests.\n");
+    return;
+  }
+
+  g_main_thread = pthread_self();
+
+  struct sigaction trap_action;
+  OPENSSL_memset(&trap_action, 0, sizeof(trap_action));
+  sigemptyset(&trap_action.sa_mask);
+  trap_action.sa_handler = TrapHandler;
+  if (sigaction(SIGTRAP, &trap_action, NULL) != 0) {
+    perror("sigaction");
+    abort();
+  }
+
+  g_unwind_tests_enabled = true;
+}
+#endif  // OPENSSL_WINDOWS
+
+#else  // !SUPPORTS_UNWIND_TEST
+
+#if defined(SUPPORTS_ABI_TEST)
+static void ReadUnwindResult(Result *) {}
+#endif
+static void EnableUnwindTestsImpl() {}
+
+#endif  // SUPPORTS_UNWIND_TEST
+
+}  // namespace internal
+
+void EnableUnwindTests() { internal::EnableUnwindTestsImpl(); }
+
+bool UnwindTestsEnabled() { return internal::g_unwind_tests_enabled; }
+
+}  // namespace abi_test
diff --git a/src/crypto/test/abi_test.h b/src/crypto/test/abi_test.h
new file mode 100644
index 0000000..44547f8
--- /dev/null
+++ b/src/crypto/test/abi_test.h
@@ -0,0 +1,475 @@
+/* Copyright (c) 2018, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#ifndef OPENSSL_HEADER_ABI_TEST_H
+#define OPENSSL_HEADER_ABI_TEST_H
+
+#include <gtest/gtest.h>
+
+#include <string>
+#include <type_traits>
+#include <vector>
+
+#include <openssl/base.h>
+
+#include "../internal.h"
+
+
+// abi_test provides routines for verifying that functions satisfy platform ABI
+// requirements.
+namespace abi_test {
+
+// Result stores the result of an ABI test.
+struct Result {
+  bool ok() const { return errors.empty(); }
+
+  std::vector<std::string> errors;
+};
+
+namespace internal {
+
+// DeductionGuard wraps |T| in a template, so that template argument deduction
+// does not apply to it. This may be used to force C++ to deduce template
+// arguments from another parameter.
+template <typename T>
+struct DeductionGuard {
+  using Type = T;
+};
+
+// Reg128 contains storage space for a 128-bit register.
+struct alignas(16) Reg128 {
+  bool operator==(const Reg128 &x) const { return x.lo == lo && x.hi == hi; }
+  bool operator!=(const Reg128 &x) const { return !((*this) == x); }
+  uint64_t lo, hi;
+};
+
+// LOOP_CALLER_STATE_REGISTERS is a macro that iterates over all registers the
+// callee is expected to save for the caller, with the exception of the stack
+// pointer. The stack pointer is tested implicitly by the function successfully
+// returning at all.
+#if defined(OPENSSL_X86_64)
+
+// References:
+// SysV64: https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-1.0.pdf
+// Win64: https://docs.microsoft.com/en-us/cpp/build/x64-software-conventions?view=vs-2017#register-usage
+#if defined(OPENSSL_WINDOWS)
+#define LOOP_CALLER_STATE_REGISTERS()  \
+  CALLER_STATE_REGISTER(uint64_t, rbx) \
+  CALLER_STATE_REGISTER(uint64_t, rbp) \
+  CALLER_STATE_REGISTER(uint64_t, rdi) \
+  CALLER_STATE_REGISTER(uint64_t, rsi) \
+  CALLER_STATE_REGISTER(uint64_t, r12) \
+  CALLER_STATE_REGISTER(uint64_t, r13) \
+  CALLER_STATE_REGISTER(uint64_t, r14) \
+  CALLER_STATE_REGISTER(uint64_t, r15) \
+  CALLER_STATE_REGISTER(Reg128, xmm6)  \
+  CALLER_STATE_REGISTER(Reg128, xmm7)  \
+  CALLER_STATE_REGISTER(Reg128, xmm8)  \
+  CALLER_STATE_REGISTER(Reg128, xmm9)  \
+  CALLER_STATE_REGISTER(Reg128, xmm10) \
+  CALLER_STATE_REGISTER(Reg128, xmm11) \
+  CALLER_STATE_REGISTER(Reg128, xmm12) \
+  CALLER_STATE_REGISTER(Reg128, xmm13) \
+  CALLER_STATE_REGISTER(Reg128, xmm14) \
+  CALLER_STATE_REGISTER(Reg128, xmm15)
+#else
+#define LOOP_CALLER_STATE_REGISTERS()  \
+  CALLER_STATE_REGISTER(uint64_t, rbx) \
+  CALLER_STATE_REGISTER(uint64_t, rbp) \
+  CALLER_STATE_REGISTER(uint64_t, r12) \
+  CALLER_STATE_REGISTER(uint64_t, r13) \
+  CALLER_STATE_REGISTER(uint64_t, r14) \
+  CALLER_STATE_REGISTER(uint64_t, r15)
+#endif  // OPENSSL_WINDOWS
+
+#elif defined(OPENSSL_X86)
+
+// References:
+// SysV32: https://uclibc.org/docs/psABI-i386.pdf and
+// Win32: https://docs.microsoft.com/en-us/cpp/cpp/argument-passing-and-naming-conventions?view=vs-2017
+#define LOOP_CALLER_STATE_REGISTERS()  \
+  CALLER_STATE_REGISTER(uint32_t, esi) \
+  CALLER_STATE_REGISTER(uint32_t, edi) \
+  CALLER_STATE_REGISTER(uint32_t, ebx) \
+  CALLER_STATE_REGISTER(uint32_t, ebp)
+
+#elif defined(OPENSSL_ARM)
+
+// References:
+// AAPCS: http://infocenter.arm.com/help/topic/com.arm.doc.ihi0042f/IHI0042F_aapcs.pdf
+// iOS32: https://developer.apple.com/library/archive/documentation/Xcode/Conceptual/iPhoneOSABIReference/Articles/ARMv6FunctionCallingConventions.html
+// Linux: http://sourcery.mentor.com/sgpp/lite/arm/portal/kbattach142/arm_gnu_linux_%20abi.pdf
+//
+// ARM specifies a common calling convention, except r9 is left to the platform.
+// Linux treats r9 as callee-saved, while iOS 3+ treats it as caller-saved. Most
+// of our assembly treats it as callee-saved to be uniform, but we match the
+// platform to avoid false positives when testing compiler-generated output.
+#define LOOP_CALLER_STATE_REGISTERS_PRE_R9() \
+  CALLER_STATE_REGISTER(uint64_t, d8)        \
+  CALLER_STATE_REGISTER(uint64_t, d9)        \
+  CALLER_STATE_REGISTER(uint64_t, d10)       \
+  CALLER_STATE_REGISTER(uint64_t, d11)       \
+  CALLER_STATE_REGISTER(uint64_t, d12)       \
+  CALLER_STATE_REGISTER(uint64_t, d13)       \
+  CALLER_STATE_REGISTER(uint64_t, d14)       \
+  CALLER_STATE_REGISTER(uint64_t, d15)       \
+  CALLER_STATE_REGISTER(uint32_t, r4)        \
+  CALLER_STATE_REGISTER(uint32_t, r5)        \
+  CALLER_STATE_REGISTER(uint32_t, r6)        \
+  CALLER_STATE_REGISTER(uint32_t, r7)        \
+  CALLER_STATE_REGISTER(uint32_t, r8)
+#define LOOP_CALLER_STATE_REGISTERS_POST_R9() \
+  CALLER_STATE_REGISTER(uint32_t, r10)        \
+  CALLER_STATE_REGISTER(uint32_t, r11)
+#if defined(OPENSSL_APPLE)
+#define LOOP_CALLER_STATE_REGISTERS()  \
+  LOOP_CALLER_STATE_REGISTERS_PRE_R9() \
+  LOOP_CALLER_STATE_REGISTERS_POST_R9()
+#else  // !OPENSSL_APPLE
+#define LOOP_CALLER_STATE_REGISTERS()  \
+  LOOP_CALLER_STATE_REGISTERS_PRE_R9() \
+  CALLER_STATE_REGISTER(uint32_t, r9)  \
+  LOOP_CALLER_STATE_REGISTERS_POST_R9()
+#endif  // OPENSSL_APPLE
+
+#elif defined(OPENSSL_AARCH64)
+
+// References:
+// AAPCS64: http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf
+// iOS64: https://developer.apple.com/library/archive/documentation/Xcode/Conceptual/iPhoneOSABIReference/Articles/ARM64FunctionCallingConventions.html
+//
+// In aarch64, r19 (x19 in a 64-bit context) is the platform register. iOS says
+// user code may not touch it. We found no clear reference for Linux. The iOS
+// behavior implies portable assembly cannot use it, and aarch64 has many
+// registers. Thus this framework ignores register's existence. We can test r19
+// violations with grep.
+#define LOOP_CALLER_STATE_REGISTERS()                                \
+  /* Per AAPCS64, section 5.1.2, only the bottom 64 bits of v8-v15 */ \
+  /* are preserved. These are accessed as dN. */                     \
+  CALLER_STATE_REGISTER(uint64_t, d8)                                \
+  CALLER_STATE_REGISTER(uint64_t, d9)                                \
+  CALLER_STATE_REGISTER(uint64_t, d10)                               \
+  CALLER_STATE_REGISTER(uint64_t, d11)                               \
+  CALLER_STATE_REGISTER(uint64_t, d12)                               \
+  CALLER_STATE_REGISTER(uint64_t, d13)                               \
+  CALLER_STATE_REGISTER(uint64_t, d14)                               \
+  CALLER_STATE_REGISTER(uint64_t, d15)                               \
+  /* For consistency with dN, use the 64-bit name xN, rather than */ \
+  /* the generic rN. */                                              \
+  CALLER_STATE_REGISTER(uint64_t, x19)                               \
+  CALLER_STATE_REGISTER(uint64_t, x20)                               \
+  CALLER_STATE_REGISTER(uint64_t, x21)                               \
+  CALLER_STATE_REGISTER(uint64_t, x22)                               \
+  CALLER_STATE_REGISTER(uint64_t, x23)                               \
+  CALLER_STATE_REGISTER(uint64_t, x24)                               \
+  CALLER_STATE_REGISTER(uint64_t, x25)                               \
+  CALLER_STATE_REGISTER(uint64_t, x26)                               \
+  CALLER_STATE_REGISTER(uint64_t, x27)                               \
+  CALLER_STATE_REGISTER(uint64_t, x28)                               \
+  CALLER_STATE_REGISTER(uint64_t, x29)
+
+#endif  // X86_64 || X86 || ARM || AARCH64
+
+// Enable ABI testing if all of the following are true.
+//
+// - We have CallerState and trampoline support for the architecture.
+//
+// - Assembly is enabled.
+//
+// - This is not a shared library build. Assembly functions are not reachable
+//   from tests in shared library builds.
+#if defined(LOOP_CALLER_STATE_REGISTERS) && !defined(OPENSSL_NO_ASM) && \
+    !defined(BORINGSSL_SHARED_LIBRARY)
+#define SUPPORTS_ABI_TEST
+
+// CallerState contains all caller state that the callee is expected to
+// preserve.
+struct CallerState {
+#define CALLER_STATE_REGISTER(type, name) type name;
+  LOOP_CALLER_STATE_REGISTERS()
+#undef CALLER_STATE_REGISTER
+};
+
+// RunTrampoline runs |func| on |argv|, recording ABI errors in |out|. It does
+// not perform any type-checking. If |unwind| is true and unwind tests have been
+// enabled, |func| is single-stepped under an unwind test.
+crypto_word_t RunTrampoline(Result *out, crypto_word_t func,
+                            const crypto_word_t *argv, size_t argc,
+                            bool unwind);
+
+template <typename T>
+inline crypto_word_t ToWord(T t) {
+#if !defined(OPENSSL_X86) && !defined(OPENSSL_X86_64) && \
+    !defined(OPENSSL_ARM) && !defined(OPENSSL_AARCH64)
+#error "Unknown architecture"
+#endif
+  static_assert(sizeof(T) <= sizeof(crypto_word_t),
+                "T is larger than crypto_word_t");
+  static_assert(sizeof(T) >= 4, "types under four bytes are complicated");
+
+  // ABIs are complex around arguments that are smaller than native words. For
+  // 32-bit architectures, the rules above imply we only have word-sized
+  // arguments. For 64-bit architectures, we still have assembly functions which
+  // take |int|.
+  //
+  // For aarch64, AAPCS64, section 5.4.2, clauses C.7 and C.14 says any
+  // remaining bits are unspecified. iOS64 contradicts this and says the callee
+  // extends arguments up to 32 bits, and only the upper 32 bits are
+  // unspecified. Rejecting parameters smaller than 32 bits avoids the
+  // divergence.
+  //
+  // TODO(davidben): Find authoritative citations for x86_64. For x86_64, I
+  // observed the behavior of Clang, GCC, and MSVC. ABI rules here may be
+  // inferred from two kinds of experiments:
+  //
+  // 1. When passing a value to a small-argument-taking function, does the
+  //    compiler ensure unused bits are cleared, sign-extended, etc.? Tests for
+  //    register parameters are confounded by x86_64's implicit clearing of
+  //    registers' upper halves, but passing some_u64 >> 1 usually clears this.
+  //
+  // 2. When compiling a small-argument-taking function, does the compiler make
+  //    assumptions about unused bits of arguments?
+  //
+  // MSVC for x86_64 is straightforward. It appears to tolerate and produce
+  // arbitrary values for unused bits, like AAPCS64.
+  //
+  // GCC and Clang for x86_64 are more complex. They match MSVC for stack
+  // parameters. However, for register parameters, they behave like iOS64 and,
+  // as callers, extend up to 32 bits, leaving the remainder arbitrary. When
+  // compiling a callee, Clang takes advantage of this conversion, but I was
+  // unable to make GCC do so.
+  //
+  // Note that, although the Win64 rules are sufficient to require our assembly
+  // be conservative, we wish for |CHECK_ABI| to support C-compiled functions,
+  // so it must enforce the correct rules for each platform.
+  //
+  // Fortunately, the |static_assert|s above cause all supported architectures
+  // to behave the same.
+  crypto_word_t ret;
+  // Filling extra bits with 0xaa will be vastly out of bounds for code
+  // expecting either sign- or zero-extension. (0xaa is 0b10101010.)
+  OPENSSL_memset(&ret, 0xaa, sizeof(ret));
+  OPENSSL_memcpy(&ret, &t, sizeof(t));
+  return ret;
+}
+
+// CheckImpl runs |func| on |args|, recording ABI errors in |out|. If |unwind|
+// is true and unwind tests have been enabled, |func| is single-stepped under an
+// unwind test.
+//
+// It returns the value as a |crypto_word_t| to work around problems when |R| is
+// void. |args| is wrapped in a |DeductionGuard| so |func| determines the
+// template arguments. Otherwise, |args| may deduce |Args| incorrectly. For
+// instance, if |func| takes const int *, and the caller passes an int *, the
+// compiler will complain the deduced types do not match.
+template <typename R, typename... Args>
+inline crypto_word_t CheckImpl(Result *out, bool unwind, R (*func)(Args...),
+                               typename DeductionGuard<Args>::Type... args) {
+  // We only support up to 8 arguments. This ensures all arguments on aarch64
+  // are passed in registers and avoids the iOS descrepancy around packing small
+  // arguments on the stack.
+  //
+  // https://developer.apple.com/library/archive/documentation/Xcode/Conceptual/iPhoneOSABIReference/Articles/ARM64FunctionCallingConventions.html
+  static_assert(sizeof...(args) <= 8,
+                "too many arguments for abi_test_trampoline");
+
+  // Allocate one extra entry so MSVC does not complain about zero-size arrays.
+  crypto_word_t argv[sizeof...(args) + 1] = {
+      ToWord(args)...,
+  };
+  return RunTrampoline(out, reinterpret_cast<crypto_word_t>(func), argv,
+                       sizeof...(args), unwind);
+}
+#else
+// To simplify callers when ABI testing support is unavoidable, provide a backup
+// CheckImpl implementation. It must be specialized for void returns because we
+// call |func| directly.
+template <typename R, typename... Args>
+inline typename std::enable_if<!std::is_void<R>::value, crypto_word_t>::type
+CheckImpl(Result *out, bool /* unwind */, R (*func)(Args...),
+          typename DeductionGuard<Args>::Type... args) {
+  *out = Result();
+  return func(args...);
+}
+
+template <typename... Args>
+inline crypto_word_t CheckImpl(Result *out, bool /* unwind */,
+                               void (*func)(Args...),
+                               typename DeductionGuard<Args>::Type... args) {
+  *out = Result();
+  func(args...);
+  return 0;
+}
+#endif  // SUPPORTS_ABI_TEST
+
+// FixVAArgsString takes a string like "f, 1, 2" and returns a string like
+// "f(1, 2)".
+//
+// This is needed because the |CHECK_ABI| macro below cannot be defined as
+// CHECK_ABI(func, ...). The C specification requires that variadic macros bind
+// at least one variadic argument. Clang, GCC, and MSVC all ignore this, but
+// there are issues with trailing commas and different behaviors across
+// compilers.
+std::string FixVAArgsString(const char *str);
+
+// CheckGTest behaves like |CheckImpl|, but it returns the correct type and
+// raises GTest assertions on failure. If |unwind| is true and unwind tests are
+// enabled, |func| is single-stepped under an unwind test.
+template <typename R, typename... Args>
+inline R CheckGTest(const char *va_args_str, const char *file, int line,
+                    bool unwind, R (*func)(Args...),
+                    typename DeductionGuard<Args>::Type... args) {
+  Result result;
+  crypto_word_t ret = CheckImpl(&result, unwind, func, args...);
+  if (!result.ok()) {
+    testing::Message msg;
+    msg << "ABI failures in " << FixVAArgsString(va_args_str) << ":\n";
+    for (const auto &error : result.errors) {
+      msg << "    " << error << "\n";
+    }
+    ADD_FAILURE_AT(file, line) << msg;
+  }
+  return (R)ret;
+}
+
+}  // namespace internal
+
+// Check runs |func| on |args| and returns the result. If ABI-testing is
+// supported in this build configuration, it writes any ABI failures to |out|.
+// Otherwise, it runs the function transparently.
+template <typename R, typename... Args>
+inline R Check(Result *out, R (*func)(Args...),
+               typename internal::DeductionGuard<Args>::Type... args) {
+  return (R)internal::CheckImpl(out, false, func, args...);
+}
+
+// EnableUnwindTests enables unwind tests, if supported. If not supported, it
+// does nothing.
+void EnableUnwindTests();
+
+// UnwindTestsEnabled returns true if unwind tests are enabled and false
+// otherwise.
+bool UnwindTestsEnabled();
+
+}  // namespace abi_test
+
+// CHECK_ABI calls the first argument on the remaining arguments and returns the
+// result. If ABI-testing is supported in this build configuration, it adds a
+// non-fatal GTest failure if the call did not satisfy ABI requirements.
+//
+// |CHECK_ABI| does return the value and thus may replace any function call,
+// provided it takes only simple parameters. However, it is recommended to test
+// ABI separately from functional tests of assembly. Fully instrumenting a
+// function for ABI checking requires single-stepping the function, which is
+// inefficient.
+//
+// Functional testing requires coverage of input values, while ABI testing only
+// requires branch coverage. Most of our assembly is constant-time, so usually
+// only a few instrumented calls are necessary.
+//
+// TODO(https://crbug.com/boringssl/259): Most of Windows assembly currently
+// fails SEH testing. For now, |CHECK_ABI| behaves like |CHECK_ABI_NO_UNWIND|
+// on Windows. Functions which work with unwind testing on Windows should use
+// |CHECK_ABI_SEH|.
+#if defined(OPENSSL_WINDOWS)
+#define CHECK_ABI(...) CHECK_ABI_NO_UNWIND(__VA_ARGS__)
+#else
+#define CHECK_ABI(...) CHECK_ABI_SEH(__VA_ARGS__)
+#endif
+
+// CHECK_ABI_SEH behaves like |CHECK_ABI| but enables unwind testing on Windows.
+#define CHECK_ABI_SEH(...)                                               \
+  abi_test::internal::CheckGTest(#__VA_ARGS__, __FILE__, __LINE__, true, \
+                                 __VA_ARGS__)
+
+// CHECK_ABI_NO_UNWIND behaves like |CHECK_ABI| but disables unwind testing.
+#define CHECK_ABI_NO_UNWIND(...)                                          \
+  abi_test::internal::CheckGTest(#__VA_ARGS__, __FILE__, __LINE__, false, \
+                                 __VA_ARGS__)
+
+
+// Internal functions.
+
+#if defined(SUPPORTS_ABI_TEST)
+struct Uncallable {
+  Uncallable() = delete;
+};
+
+extern "C" {
+
+// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
+// with |argv|, then saves the callee-saved registers into |state|. It returns
+// the result of |func|. If |unwind| is non-zero, this function triggers unwind
+// instrumentation.
+//
+// We give |func| type |crypto_word_t| to avoid tripping MSVC's warning 4191.
+crypto_word_t abi_test_trampoline(crypto_word_t func,
+                                  abi_test::internal::CallerState *state,
+                                  const crypto_word_t *argv, size_t argc,
+                                  crypto_word_t unwind);
+
+#if defined(OPENSSL_X86_64)
+// abi_test_unwind_start points at the instruction that starts unwind testing in
+// |abi_test_trampoline|. This is the value of the instruction pointer at the
+// first |SIGTRAP| during unwind testing.
+//
+// This symbol is not a function and should not be called.
+void abi_test_unwind_start(Uncallable);
+
+// abi_test_unwind_return points at the instruction immediately after the call in
+// |abi_test_trampoline|. When unwinding the function under test, this is the
+// expected address in the |abi_test_trampoline| frame. After this address, the
+// unwind tester should ignore |SIGTRAP| until |abi_test_unwind_stop|.
+//
+// This symbol is not a function and should not be called.
+void abi_test_unwind_return(Uncallable);
+
+// abi_test_unwind_stop is the value of the instruction pointer at the final
+// |SIGTRAP| during unwind testing.
+//
+// This symbol is not a function and should not be called.
+void abi_test_unwind_stop(Uncallable);
+
+// abi_test_bad_unwind_wrong_register preserves the ABI, but annotates the wrong
+// register in unwind metadata.
+void abi_test_bad_unwind_wrong_register(void);
+
+// abi_test_bad_unwind_temporary preserves the ABI, but temporarily corrupts the
+// storage space for a saved register, breaking unwind.
+void abi_test_bad_unwind_temporary(void);
+
+#if defined(OPENSSL_WINDOWS)
+// abi_test_bad_unwind_epilog preserves the ABI, and correctly annotates the
+// prolog, but the epilog does not match Win64's rules, breaking unwind during
+// the epilog.
+void abi_test_bad_unwind_epilog(void);
+#endif
+#endif  // OPENSSL_X86_64
+
+#if defined(OPENSSL_X86_64) || defined(OPENSSL_X86)
+// abi_test_get_and_clear_direction_flag clears the direction flag. If the flag
+// was previously set, it returns one. Otherwise, it returns zero.
+int abi_test_get_and_clear_direction_flag(void);
+
+// abi_test_set_direction_flag sets the direction flag. This does not conform to
+// ABI requirements and must only be called within a |CHECK_ABI| guard to avoid
+// errors later in the program.
+int abi_test_set_direction_flag(void);
+#endif  // OPENSSL_X86_64 || OPENSSL_X86
+
+}  // extern "C"
+#endif  // SUPPORTS_ABI_TEST
+
+
+#endif  // OPENSSL_HEADER_ABI_TEST_H
diff --git a/src/crypto/test/asm/trampoline-armv4.pl b/src/crypto/test/asm/trampoline-armv4.pl
new file mode 100755
index 0000000..30f510e
--- /dev/null
+++ b/src/crypto/test/asm/trampoline-armv4.pl
@@ -0,0 +1,182 @@
+#!/usr/bin/env perl
+# Copyright (c) 2019, Google Inc.
+#
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+# This file defines helper functions for crypto/test/abi_test.h on 32-bit
+# ARM. See that header for details on how to use this.
+#
+# For convenience, this file is linked into libcrypto, where consuming builds
+# already support architecture-specific sources. The static linker should drop
+# this code in non-test binaries. This includes a shared library build of
+# libcrypto, provided --gc-sections (ELF), -dead_strip (iOS), or equivalent is
+# used.
+#
+# References:
+#
+# AAPCS: http://infocenter.arm.com/help/topic/com.arm.doc.ihi0042f/IHI0042F_aapcs.pdf
+# iOS ARMv6: https://developer.apple.com/library/archive/documentation/Xcode/Conceptual/iPhoneOSABIReference/Articles/ARMv6FunctionCallingConventions.html
+# iOS ARMv7: https://developer.apple.com/library/archive/documentation/Xcode/Conceptual/iPhoneOSABIReference/Articles/ARMv7FunctionCallingConventions.html
+# Linux: http://sourcery.mentor.com/sgpp/lite/arm/portal/kbattach142/arm_gnu_linux_%20abi.pdf
+
+use strict;
+
+my $flavour = shift;
+my $output  = shift;
+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/;
+my $dir = $1;
+my $xlate;
+( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+die "can't locate arm-xlate.pl";
+
+open OUT, "| \"$^X\" \"$xlate\" $flavour \"$output\"";
+*STDOUT = *OUT;
+
+my ($func, $state, $argv, $argc) = ("r0", "r1", "r2", "r3");
+my $code = <<____;
+.syntax	unified
+
+.arch	armv7-a
+.fpu	vfp
+
+.text
+
+@ abi_test_trampoline loads callee-saved registers from |state|, calls |func|
+@ with |argv|, then saves the callee-saved registers into |state|. It returns
+@ the result of |func|. The |unwind| argument is unused.
+@ uint32_t abi_test_trampoline(void (*func)(...), CallerState *state,
+@                              const uint32_t *argv, size_t argc,
+@                              int unwind);
+.type	abi_test_trampoline, %function
+.globl	abi_test_trampoline
+.align	4
+abi_test_trampoline:
+.Labi_test_trampoline_begin:
+	@ Save parameters and all callee-saved registers. For convenience, we
+	@ save r9 on iOS even though it's volatile.
+	vstmdb	sp!, {d8-d15}
+	stmdb	sp!, {r0-r11,lr}
+
+	@ Reserve stack space for six (10-4) stack parameters, plus an extra 4
+	@ bytes to keep it 8-byte-aligned (see AAPCS, section 5.3).
+	sub     sp, sp, #28
+
+	@ Every register in AAPCS is either non-volatile or a parameter (except
+	@ r9 on iOS), so this code, by the actual call, loses all its scratch
+	@ registers. First fill in stack parameters while there are registers
+	@ to spare.
+	cmp	$argc, #4
+	bls	.Lstack_args_done
+	mov	r4, sp				@ r4 is the output pointer.
+	add	r5, $argv, $argc, lsl #2	@ Set r5 to the end of argv.
+	add	$argv, $argv, #16		@ Skip four arguments.
+.Lstack_args_loop:
+	ldr	r6, [$argv], #4
+	cmp	$argv, r5
+	str	r6, [r4], #4
+	bne	.Lstack_args_loop
+
+.Lstack_args_done:
+	@ Load registers from |$state|.
+	vldmia	$state!, {d8-d15}
+#if defined(__APPLE__)
+	@ r9 is not volatile on iOS.
+	ldmia	$state!, {r4-r8,r10-r11}
+#else
+	ldmia	$state!, {r4-r11}
+#endif
+
+	@ Load register parameters. This uses up our remaining registers, so we
+	@ repurpose lr as scratch space.
+	ldr	$argc, [sp, #40]	@ Reload argc.
+	ldr	lr, [sp, #36]		@ Load argv into lr.
+	cmp	$argc, #3
+	bhi	.Larg_r3
+	beq	.Larg_r2
+	cmp	$argc, #1
+	bhi	.Larg_r1
+	beq	.Larg_r0
+	b	.Largs_done
+
+.Larg_r3:
+	ldr	r3, [lr, #12]	@ argv[3]
+.Larg_r2:
+	ldr	r2, [lr, #8]	@ argv[2]
+.Larg_r1:
+	ldr	r1, [lr, #4]	@ argv[1]
+.Larg_r0:
+	ldr	r0, [lr]	@ argv[0]
+.Largs_done:
+
+	@ With every other register in use, load the function pointer into lr
+	@ and call the function.
+	ldr	lr, [sp, #28]
+	blx	lr
+
+	@ r1-r3 are free for use again. The trampoline only supports
+	@ single-return functions. Pass r4-r11 to the caller.
+	ldr	$state, [sp, #32]
+	vstmia	$state!, {d8-d15}
+#if defined(__APPLE__)
+	@ r9 is not volatile on iOS.
+	stmia	$state!, {r4-r8,r10-r11}
+#else
+	stmia	$state!, {r4-r11}
+#endif
+
+	@ Unwind the stack and restore registers.
+	add	sp, sp, #44		@ 44 = 28+16
+	ldmia	sp!, {r4-r11,lr}	@ Skip r0-r3 (see +16 above).
+	vldmia	sp!, {d8-d15}
+
+	bx	lr
+.size	abi_test_trampoline,.-abi_test_trampoline
+____
+
+# abi_test_clobber_* zeros the corresponding register. These are used to test
+# the ABI-testing framework.
+foreach (0..12) {
+  # This loop skips r13 (sp), r14 (lr, implicitly clobbered by every call), and
+  # r15 (pc).
+  $code .= <<____;
+.type	abi_test_clobber_r$_, %function
+.globl	abi_test_clobber_r$_
+.align	4
+abi_test_clobber_r$_:
+	mov	r$_, #0
+	bx	lr
+.size	abi_test_clobber_r$_,.-abi_test_clobber_r$_
+____
+}
+
+foreach (0..15) {
+  my $lo = "s".(2*$_);
+  my $hi = "s".(2*$_+1);
+  $code .= <<____;
+.type	abi_test_clobber_d$_, %function
+.globl	abi_test_clobber_d$_
+.align	4
+abi_test_clobber_d$_:
+	mov	r0, #0
+	vmov	$lo, r0
+	vmov	$hi, r0
+	bx	lr
+.size	abi_test_clobber_d$_,.-abi_test_clobber_d$_
+____
+}
+
+print $code;
+close STDOUT;
diff --git a/src/crypto/test/asm/trampoline-armv8.pl b/src/crypto/test/asm/trampoline-armv8.pl
new file mode 100755
index 0000000..aab5250
--- /dev/null
+++ b/src/crypto/test/asm/trampoline-armv8.pl
@@ -0,0 +1,209 @@
+#!/usr/bin/env perl
+# Copyright (c) 2019, Google Inc.
+#
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+# This file defines helper functions for crypto/test/abi_test.h on aarch64. See
+# that header for details on how to use this.
+#
+# For convenience, this file is linked into libcrypto, where consuming builds
+# already support architecture-specific sources. The static linker should drop
+# this code in non-test binaries. This includes a shared library build of
+# libcrypto, provided --gc-sections (ELF), -dead_strip (iOS), or equivalent is
+# used.
+#
+# References:
+#
+# AAPCS64: http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf
+# iOS ARM64: https://developer.apple.com/library/archive/documentation/Xcode/Conceptual/iPhoneOSABIReference/Articles/ARM64FunctionCallingConventions.html
+
+use strict;
+
+my $flavour = shift;
+my $output  = shift;
+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/;
+my $dir = $1;
+my $xlate;
+( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+die "can't locate arm-xlate.pl";
+
+open OUT, "| \"$^X\" \"$xlate\" $flavour \"$output\"";
+*STDOUT = *OUT;
+
+my ($func, $state, $argv, $argc) = ("x0", "x1", "x2", "x3");
+my $code = <<____;
+.text
+
+// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
+// with |argv|, then saves the callee-saved registers into |state|. It returns
+// the result of |func|. The |unwind| argument is unused.
+// uint64_t abi_test_trampoline(void (*func)(...), CallerState *state,
+//                              const uint64_t *argv, size_t argc,
+//                              uint64_t unwind);
+.type	abi_test_trampoline, %function
+.globl	abi_test_trampoline
+.align	4
+abi_test_trampoline:
+.Labi_test_trampoline_begin:
+	// Stack layout (low to high addresses)
+	//   x29,x30 (16 bytes)
+	//    d8-d15 (64 bytes)
+	//   x19-x28 (80 bytes)
+	//    $state (8 bytes)
+	//   padding (8 bytes)
+	stp	x29, x30, [sp, #-176]!
+	mov	x29, sp
+
+	// Saved callee-saved registers and |state|.
+	stp	d8, d9, [sp, #16]
+	stp	d10, d11, [sp, #32]
+	stp	d12, d13, [sp, #48]
+	stp	d14, d15, [sp, #64]
+	stp	x19, x20, [sp, #80]
+	stp	x21, x22, [sp, #96]
+	stp	x23, x24, [sp, #112]
+	stp	x25, x26, [sp, #128]
+	stp	x27, x28, [sp, #144]
+	str	$state, [sp, #160]
+
+	// Load registers from |state|, with the exception of x29. x29 is the
+	// frame pointer and also callee-saved, but AAPCS64 allows platforms to
+	// mandate that x29 always point to a frame. iOS64 does so, which means
+	// we cannot fill x29 with entropy without violating ABI rules
+	// ourselves. x29 is tested separately below.
+	ldp	d8, d9, [$state], #16
+	ldp	d10, d11, [$state], #16
+	ldp	d12, d13, [$state], #16
+	ldp	d14, d15, [$state], #16
+	ldp	x19, x20, [$state], #16
+	ldp	x21, x22, [$state], #16
+	ldp	x23, x24, [$state], #16
+	ldp	x25, x26, [$state], #16
+	ldp	x27, x28, [$state], #16
+
+	// Move parameters into temporary registers.
+	mov	x9, $func
+	mov	x10, $argv
+	mov	x11, $argc
+
+	// Load parameters into registers.
+	cbz	x11, .Largs_done
+	ldr	x0, [x10], #8
+	subs	x11, x11, #1
+	b.eq	.Largs_done
+	ldr	x1, [x10], #8
+	subs	x11, x11, #1
+	b.eq	.Largs_done
+	ldr	x2, [x10], #8
+	subs	x11, x11, #1
+	b.eq	.Largs_done
+	ldr	x3, [x10], #8
+	subs	x11, x11, #1
+	b.eq	.Largs_done
+	ldr	x4, [x10], #8
+	subs	x11, x11, #1
+	b.eq	.Largs_done
+	ldr	x5, [x10], #8
+	subs	x11, x11, #1
+	b.eq	.Largs_done
+	ldr	x6, [x10], #8
+	subs	x11, x11, #1
+	b.eq	.Largs_done
+	ldr	x7, [x10], #8
+
+.Largs_done:
+	blr	x9
+
+	// Reload |state| and store registers.
+	ldr	$state, [sp, #160]
+	stp	d8, d9, [$state], #16
+	stp	d10, d11, [$state], #16
+	stp	d12, d13, [$state], #16
+	stp	d14, d15, [$state], #16
+	stp	x19, x20, [$state], #16
+	stp	x21, x22, [$state], #16
+	stp	x23, x24, [$state], #16
+	stp	x25, x26, [$state], #16
+	stp	x27, x28, [$state], #16
+
+	// |func| is required to preserve x29, the frame pointer. We cannot load
+	// random values into x29 (see comment above), so compare it against the
+	// expected value and zero the field of |state| if corrupted.
+	mov	x9, sp
+	cmp	x29, x9
+	b.eq	.Lx29_ok
+	str	xzr, [$state]
+
+.Lx29_ok:
+	// Restore callee-saved registers.
+	ldp	d8, d9, [sp, #16]
+	ldp	d10, d11, [sp, #32]
+	ldp	d12, d13, [sp, #48]
+	ldp	d14, d15, [sp, #64]
+	ldp	x19, x20, [sp, #80]
+	ldp	x21, x22, [sp, #96]
+	ldp	x23, x24, [sp, #112]
+	ldp	x25, x26, [sp, #128]
+	ldp	x27, x28, [sp, #144]
+
+	ldp	x29, x30, [sp], #176
+	ret
+.size	abi_test_trampoline,.-abi_test_trampoline
+____
+
+# abi_test_clobber_* zeros the corresponding register. These are used to test
+# the ABI-testing framework.
+foreach (0..29) {
+  # x18 is the platform register and off limits.
+  next if ($_ == 18);
+  $code .= <<____;
+.type	abi_test_clobber_x$_, %function
+.globl	abi_test_clobber_x$_
+.align	4
+abi_test_clobber_x$_:
+	mov	x$_, xzr
+	ret
+.size	abi_test_clobber_x$_,.-abi_test_clobber_x$_
+____
+}
+foreach (0..31) {
+  $code .= <<____;
+.type	abi_test_clobber_d$_, %function
+.globl	abi_test_clobber_d$_
+.align	4
+abi_test_clobber_d$_:
+	fmov	d$_, xzr
+	ret
+.size	abi_test_clobber_d$_,.-abi_test_clobber_d$_
+____
+}
+
+# abi_test_clobber_v*_upper clobbers only the upper half of v*. AAPCS64 only
+# requires the lower half (d*) be preserved.
+foreach (8..15) {
+  $code .= <<____;
+.type	abi_test_clobber_v${_}_upper, %function
+.globl	abi_test_clobber_v${_}_upper
+.align	4
+abi_test_clobber_v${_}_upper:
+	fmov	v${_}.d[1], xzr
+	ret
+.size	abi_test_clobber_v${_}_upper,.-abi_test_clobber_v${_}_upper
+____
+}
+
+print $code;
+close STDOUT;
diff --git a/src/crypto/test/asm/trampoline-x86.pl b/src/crypto/test/asm/trampoline-x86.pl
new file mode 100755
index 0000000..569a3dd
--- /dev/null
+++ b/src/crypto/test/asm/trampoline-x86.pl
@@ -0,0 +1,123 @@
+#!/usr/bin/env perl
+# Copyright (c) 2018, Google Inc.
+#
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+# This file defines helper functions for crypto/test/abi_test.h on x86. See
+# that header for details on how to use this.
+#
+# For convenience, this file is linked into libcrypto, where consuming builds
+# already support architecture-specific sources. The static linker should drop
+# this code in non-test binaries. This includes a shared library build of
+# libcrypto, provided --gc-sections (ELF), -dead_strip (Mac), or equivalent is
+# used.
+#
+# References:
+#
+# SysV ABI: https://uclibc.org/docs/psABI-i386.pdf
+# Win32 ABI: https://docs.microsoft.com/en-us/cpp/cpp/argument-passing-and-naming-conventions?view=vs-2017
+
+use strict;
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/;
+my $dir = $1;
+push(@INC, "${dir}", "${dir}../../perlasm");
+require "x86asm.pl";
+
+my $output = pop;
+open STDOUT, ">$output";
+
+&asm_init($ARGV[0]);
+
+# abi_test_trampoline loads callee-saved registers from |state|, calls |func|
+# with |argv|, then saves the callee-saved registers into |state|. It returns
+# the result of |func|. |unwind| is ignored.
+# uint32_t abi_test_trampoline(void (*func)(...), CallerState *state,
+#                              const uint32_t *argv, size_t argc,
+#                              int unwind);
+&function_begin("abi_test_trampoline")
+	# Load registers from |state|. Note |function_begin| (as opposed to
+	# |function_begin_B|) automatically saves all callee-saved registers, so we
+	# may freely clobber them.
+	&mov("ecx", &wparam(1));
+	&mov("esi", &DWP(4*0, "ecx"));
+	&mov("edi", &DWP(4*1, "ecx"));
+	&mov("ebx", &DWP(4*2, "ecx"));
+	&mov("ebp", &DWP(4*3, "ecx"));
+
+	# Use a fixed stack allocation so |wparam| continues to work. abi_test.h
+	# supports at most 10 arguments. The SysV ABI requires a 16-byte-aligned
+	# stack on process entry, so round up to 3 (mod 4).
+	&stack_push(11);
+
+	# Copy parameters to stack.
+	&mov("eax", &wparam(2));
+	&xor("ecx", "ecx");
+&set_label("loop");
+	&cmp("ecx", &wparam(3));
+	&jae(&label("loop_done"));
+	&mov("edx", &DWP(0, "eax", "ecx", 4));
+	&mov(&DWP(0, "esp", "ecx", 4), "edx");
+	&add("ecx", 1);
+	&jmp(&label("loop"));
+
+&set_label("loop_done");
+	&call_ptr(&wparam(0));
+
+	&stack_pop(11);
+
+	# Save registers back into |state|.
+	&mov("ecx", &wparam(1));
+	&mov(&DWP(4*0, "ecx"), "esi");
+	&mov(&DWP(4*1, "ecx"), "edi");
+	&mov(&DWP(4*2, "ecx"), "ebx");
+	&mov(&DWP(4*3, "ecx"), "ebp");
+&function_end("abi_test_trampoline")
+
+# abi_test_get_and_clear_direction_flag clears the direction flag. If the flag
+# was previously set, it returns one. Otherwise, it returns zero.
+# int abi_test_get_and_clear_direction_flag(void);
+&function_begin_B("abi_test_get_and_clear_direction_flag");
+	&pushf();
+	&pop("eax");
+	&and("eax", 0x400);
+	&shr("eax", 10);
+	&cld();
+	&ret();
+&function_end_B("abi_test_get_and_clear_direction_flag");
+
+# abi_test_set_direction_flag sets the direction flag.
+# void abi_test_set_direction_flag(void);
+&function_begin_B("abi_test_set_direction_flag");
+	&std();
+	&ret();
+&function_end_B("abi_test_set_direction_flag");
+
+# abi_test_clobber_* zeros the corresponding register. These are used to test
+# the ABI-testing framework.
+foreach ("eax", "ebx", "ecx", "edx", "edi", "esi", "ebp") {
+&function_begin_B("abi_test_clobber_$_");
+	&xor($_, $_);
+	&ret();
+&function_end_B("abi_test_clobber_$_");
+}
+foreach (0..7) {
+&function_begin_B("abi_test_clobber_xmm$_");
+	&pxor("xmm$_", "xmm$_");
+	&ret();
+&function_end_B("abi_test_clobber_xmm$_");
+}
+
+&asm_finish();
+
+close STDOUT;
diff --git a/src/crypto/test/asm/trampoline-x86_64.pl b/src/crypto/test/asm/trampoline-x86_64.pl
new file mode 100755
index 0000000..8cb1410
--- /dev/null
+++ b/src/crypto/test/asm/trampoline-x86_64.pl
@@ -0,0 +1,559 @@
+#!/usr/bin/env perl
+# Copyright (c) 2018, Google Inc.
+#
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+# This file defines helper functions for crypto/test/abi_test.h on x86_64. See
+# that header for details on how to use this.
+#
+# For convenience, this file is linked into libcrypto, where consuming builds
+# already support architecture-specific sources. The static linker should drop
+# this code in non-test binaries. This includes a shared library build of
+# libcrypto, provided --gc-sections (ELF), -dead_strip (Mac), or equivalent is
+# used.
+#
+# References:
+#
+# SysV ABI: https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-1.0.pdf
+# Win64 ABI: https://docs.microsoft.com/en-us/cpp/build/x64-software-conventions?view=vs-2017
+
+use strict;
+
+my $flavour = shift;
+my $output  = shift;
+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+
+my $win64 = 0;
+$win64 = 1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/;
+my $dir = $1;
+my $xlate;
+( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+die "can't locate x86_64-xlate.pl";
+
+open OUT, "| \"$^X\" \"$xlate\" $flavour \"$output\"";
+*STDOUT = *OUT;
+
+# @inp is the registers used for function inputs, in order.
+my @inp = $win64 ? ("%rcx", "%rdx", "%r8", "%r9") :
+                   ("%rdi", "%rsi", "%rdx", "%rcx", "%r8", "%r9");
+
+# @caller_state is the list of registers that the callee must preserve for the
+# caller. This must match the definition of CallerState in abi_test.h.
+my @caller_state = ("%rbx", "%rbp", "%r12", "%r13", "%r14", "%r15");
+if ($win64) {
+  @caller_state = ("%rbx", "%rbp", "%rdi", "%rsi", "%r12", "%r13", "%r14",
+                   "%r15", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10",
+                   "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15");
+}
+
+# $caller_state_size is the size of CallerState, in bytes.
+my $caller_state_size = 0;
+foreach (@caller_state) {
+  if (/^%r/) {
+    $caller_state_size += 8;
+  } elsif (/^%xmm/) {
+    $caller_state_size += 16;
+  } else {
+    die "unknown register $_";
+  }
+}
+
+# load_caller_state returns code which loads a CallerState structure at
+# $off($reg) into the respective registers. No other registers are touched, but
+# $reg may not be a register in CallerState. $cb is an optional callback to
+# add extra lines after each movq or movdqa. $cb is passed the offset, relative
+# to $reg, and name of each register.
+sub load_caller_state {
+  my ($off, $reg, $cb) = @_;
+  my $ret = "";
+  foreach (@caller_state) {
+    my $old_off = $off;
+    if (/^%r/) {
+      $ret .= "\tmovq\t$off($reg), $_\n";
+      $off += 8;
+    } elsif (/^%xmm/) {
+      $ret .= "\tmovdqa\t$off($reg), $_\n";
+      $off += 16;
+    } else {
+      die "unknown register $_";
+    }
+    $ret .= $cb->($old_off, $_) if (defined($cb));
+  }
+  return $ret;
+}
+
+# store_caller_state behaves like load_caller_state, except that it writes the
+# current values of the registers into $off($reg).
+sub store_caller_state {
+  my ($off, $reg, $cb) = @_;
+  my $ret = "";
+  foreach (@caller_state) {
+    my $old_off = $off;
+    if (/^%r/) {
+      $ret .= "\tmovq\t$_, $off($reg)\n";
+      $off += 8;
+    } elsif (/^%xmm/) {
+      $ret .= "\tmovdqa\t$_, $off($reg)\n";
+      $off += 16;
+    } else {
+      die "unknown register $_";
+    }
+    $ret .= $cb->($old_off, $_) if (defined($cb));
+  }
+  return $ret;
+}
+
+# $max_params is the maximum number of parameters abi_test_trampoline supports.
+my $max_params = 10;
+
+# Windows reserves stack space for the register-based parameters, while SysV
+# only reserves space for the overflow ones.
+my $stack_params_skip = $win64 ? scalar(@inp) : 0;
+my $num_stack_params = $win64 ? $max_params : $max_params - scalar(@inp);
+
+my ($func, $state, $argv, $argc, $unwind) = @inp;
+my $code = <<____;
+.text
+
+# abi_test_trampoline loads callee-saved registers from |state|, calls |func|
+# with |argv|, then saves the callee-saved registers into |state|. It returns
+# the result of |func|. If |unwind| is non-zero, this function triggers unwind
+# instrumentation.
+# uint64_t abi_test_trampoline(void (*func)(...), CallerState *state,
+#                              const uint64_t *argv, size_t argc,
+#                              int unwind);
+.type	abi_test_trampoline, \@abi-omnipotent
+.globl	abi_test_trampoline
+.align	16
+abi_test_trampoline:
+.Labi_test_trampoline_seh_begin:
+.cfi_startproc
+	# Stack layout:
+	#   8 bytes - align
+	#   $caller_state_size bytes - saved caller registers
+	#   8 bytes - scratch space
+	#   8 bytes - saved copy of \$unwind (SysV-only)
+	#   8 bytes - saved copy of \$state
+	#   8 bytes - saved copy of \$func
+	#   8 bytes - if needed for stack alignment
+	#   8*$num_stack_params bytes - parameters for \$func
+____
+my $stack_alloc_size = 8 + $caller_state_size + 8*3 + 8*$num_stack_params;
+if (!$win64) {
+  $stack_alloc_size += 8;
+}
+# SysV and Windows both require the stack to be 16-byte-aligned. The call
+# instruction offsets it by 8, so stack allocations must be 8 mod 16.
+if ($stack_alloc_size % 16 != 8) {
+  $num_stack_params++;
+  $stack_alloc_size += 8;
+}
+my $stack_params_offset = 8 * $stack_params_skip;
+my $func_offset = 8 * $num_stack_params;
+my $state_offset = $func_offset + 8;
+# On Win64, unwind is already passed in memory. On SysV, it is passed in as
+# register and we must reserve stack space for it.
+my ($unwind_offset, $scratch_offset);
+if ($win64) {
+  $unwind_offset = $stack_alloc_size + 5*8;
+  $scratch_offset = $state_offset + 8;
+} else {
+  $unwind_offset = $state_offset + 8;
+  $scratch_offset = $unwind_offset + 8;
+}
+my $caller_state_offset = $scratch_offset + 8;
+$code .= <<____;
+	subq	\$$stack_alloc_size, %rsp
+.cfi_adjust_cfa_offset	$stack_alloc_size
+.Labi_test_trampoline_seh_prolog_alloc:
+____
+$code .= <<____ if (!$win64);
+	movq	$unwind, $unwind_offset(%rsp)
+____
+# Store our caller's state. This is needed because we modify it ourselves, and
+# also to isolate the test infrastruction from the function under test failing
+# to save some register.
+my %reg_offsets;
+$code .= store_caller_state($caller_state_offset, "%rsp", sub {
+  my ($off, $reg) = @_;
+  $reg = substr($reg, 1);
+  $reg_offsets{$reg} = $off;
+  $off -= $stack_alloc_size + 8;
+  return <<____;
+.cfi_offset	$reg, $off
+.Labi_test_trampoline_seh_prolog_$reg:
+____
+});
+$code .= <<____;
+.Labi_test_trampoline_seh_prolog_end:
+____
+
+$code .= load_caller_state(0, $state);
+$code .= <<____;
+	# Stash \$func and \$state, so they are available after the call returns.
+	movq	$func, $func_offset(%rsp)
+	movq	$state, $state_offset(%rsp)
+
+	# Load parameters. Note this will clobber \$argv and \$argc, so we can
+	# only use non-parameter volatile registers. There are three, and they
+	# are the same between SysV and Win64: %rax, %r10, and %r11.
+	movq	$argv, %r10
+	movq	$argc, %r11
+____
+foreach (@inp) {
+  $code .= <<____;
+	dec	%r11
+	js	.Largs_done
+	movq	(%r10), $_
+	addq	\$8, %r10
+____
+}
+$code .= <<____;
+	leaq	$stack_params_offset(%rsp), %rax
+.Largs_loop:
+	dec	%r11
+	js	.Largs_done
+
+	# This block should be:
+	#    movq (%r10), %rtmp
+	#    movq %rtmp, (%rax)
+	# There are no spare registers available, so we spill into the scratch
+	# space.
+	movq	%r11, $scratch_offset(%rsp)
+	movq	(%r10), %r11
+	movq	%r11, (%rax)
+	movq	$scratch_offset(%rsp), %r11
+
+	addq	\$8, %r10
+	addq	\$8, %rax
+	jmp	.Largs_loop
+
+.Largs_done:
+	movq	$func_offset(%rsp), %rax
+	movq	$unwind_offset(%rsp), %r10
+	testq	%r10, %r10
+	jz	.Lno_unwind
+
+	# Set the trap flag.
+	pushfq
+	orq	\$0x100, 0(%rsp)
+	popfq
+
+	# Run an instruction to trigger a breakpoint immediately before the
+	# call.
+	nop
+.globl	abi_test_unwind_start
+abi_test_unwind_start:
+
+	call	*%rax
+.globl	abi_test_unwind_return
+abi_test_unwind_return:
+
+	# Clear the trap flag. Note this assumes the trap flag was clear on
+	# entry. We do not support instrumenting an unwind-instrumented
+	# |abi_test_trampoline|.
+	pushfq
+	andq	\$-0x101, 0(%rsp)	# -0x101 is ~0x100
+	popfq
+.globl	abi_test_unwind_stop
+abi_test_unwind_stop:
+
+	jmp	.Lcall_done
+
+.Lno_unwind:
+	call	*%rax
+
+.Lcall_done:
+	# Store what \$func did our state, so our caller can check.
+	movq  $state_offset(%rsp), $state
+____
+$code .= store_caller_state(0, $state);
+
+# Restore our caller's state.
+$code .= load_caller_state($caller_state_offset, "%rsp", sub {
+  my ($off, $reg) = @_;
+  $reg = substr($reg, 1);
+  return ".cfi_restore\t$reg\n";
+});
+$code .= <<____;
+	addq	\$$stack_alloc_size, %rsp
+.cfi_adjust_cfa_offset	-$stack_alloc_size
+
+	# %rax already contains \$func's return value, unmodified.
+	ret
+.cfi_endproc
+.Labi_test_trampoline_seh_end:
+.size	abi_test_trampoline,.-abi_test_trampoline
+____
+
+# abi_test_clobber_* zeros the corresponding register. These are used to test
+# the ABI-testing framework.
+foreach ("ax", "bx", "cx", "dx", "di", "si", "bp", 8..15) {
+  $code .= <<____;
+.type	abi_test_clobber_r$_, \@abi-omnipotent
+.globl	abi_test_clobber_r$_
+.align	16
+abi_test_clobber_r$_:
+	xorq	%r$_, %r$_
+	ret
+.size	abi_test_clobber_r$_,.-abi_test_clobber_r$_
+____
+}
+
+foreach (0..15) {
+  $code .= <<____;
+.type	abi_test_clobber_xmm$_, \@abi-omnipotent
+.globl	abi_test_clobber_xmm$_
+.align	16
+abi_test_clobber_xmm$_:
+	pxor	%xmm$_, %xmm$_
+	ret
+.size	abi_test_clobber_xmm$_,.-abi_test_clobber_xmm$_
+____
+}
+
+$code .= <<____;
+# abi_test_bad_unwind_wrong_register preserves the ABI, but annotates the wrong
+# register in unwind metadata.
+# void abi_test_bad_unwind_wrong_register(void);
+.type	abi_test_bad_unwind_wrong_register, \@abi-omnipotent
+.globl	abi_test_bad_unwind_wrong_register
+.align	16
+abi_test_bad_unwind_wrong_register:
+.cfi_startproc
+.Labi_test_bad_unwind_wrong_register_seh_begin:
+	pushq	%r12
+.cfi_push	%r13	# This should be %r12
+.Labi_test_bad_unwind_wrong_register_seh_push_r13:
+	# Windows evaluates epilogs directly in the unwinder, rather than using
+	# unwind codes. Add a nop so there is one non-epilog point (immediately
+	# before the nop) where the unwinder can observe the mistake.
+	nop
+	popq	%r12
+.cfi_pop	%r12
+	ret
+.Labi_test_bad_unwind_wrong_register_seh_end:
+.cfi_endproc
+.size	abi_test_bad_unwind_wrong_register,.-abi_test_bad_unwind_wrong_register
+
+# abi_test_bad_unwind_temporary preserves the ABI, but temporarily corrupts the
+# storage space for a saved register, breaking unwind.
+# void abi_test_bad_unwind_temporary(void);
+.type	abi_test_bad_unwind_temporary, \@abi-omnipotent
+.globl	abi_test_bad_unwind_temporary
+.align	16
+abi_test_bad_unwind_temporary:
+.cfi_startproc
+.Labi_test_bad_unwind_temporary_seh_begin:
+	pushq	%r12
+.cfi_push	%r12
+.Labi_test_bad_unwind_temporary_seh_push_r12:
+
+	movq	%r12, %rax
+	inc	%rax
+	movq	%rax, (%rsp)
+	# Unwinding from here is incorrect. Although %r12 itself has not been
+	# changed, the unwind codes say to look in (%rsp) instead.
+
+	movq	%r12, (%rsp)
+	# Unwinding is now fixed.
+
+	popq	%r12
+.cfi_pop	%r12
+	ret
+.Labi_test_bad_unwind_temporary_seh_end:
+.cfi_endproc
+.size	abi_test_bad_unwind_temporary,.-abi_test_bad_unwind_temporary
+
+# abi_test_get_and_clear_direction_flag clears the direction flag. If the flag
+# was previously set, it returns one. Otherwise, it returns zero.
+# int abi_test_get_and_clear_direction_flag(void);
+.type	abi_test_set_direction_flag, \@abi-omnipotent
+.globl	abi_test_get_and_clear_direction_flag
+abi_test_get_and_clear_direction_flag:
+	pushfq
+	popq	%rax
+	andq	\$0x400, %rax
+	shrq	\$10, %rax
+	cld
+	ret
+.size abi_test_get_and_clear_direction_flag,.-abi_test_get_and_clear_direction_flag
+
+# abi_test_set_direction_flag sets the direction flag.
+# void abi_test_set_direction_flag(void);
+.type	abi_test_set_direction_flag, \@abi-omnipotent
+.globl	abi_test_set_direction_flag
+abi_test_set_direction_flag:
+	std
+	ret
+.size abi_test_set_direction_flag,.-abi_test_set_direction_flag
+____
+
+if ($win64) {
+  $code .= <<____;
+# abi_test_bad_unwind_epilog preserves the ABI, and correctly annotates the
+# prolog, but the epilog does not match Win64's rules, breaking unwind during
+# the epilog.
+# void abi_test_bad_unwind_epilog(void);
+.type	abi_test_bad_unwind_epilog, \@abi-omnipotent
+.globl	abi_test_bad_unwind_epilog
+.align	16
+abi_test_bad_unwind_epilog:
+.Labi_test_bad_unwind_epilog_seh_begin:
+	pushq	%r12
+.Labi_test_bad_unwind_epilog_seh_push_r12:
+
+	nop
+
+	# The epilog should begin here, but the nop makes it invalid.
+	popq	%r12
+	nop
+	ret
+.Labi_test_bad_unwind_epilog_seh_end:
+.size	abi_test_bad_unwind_epilog,.-abi_test_bad_unwind_epilog
+____
+
+  # Add unwind metadata for SEH.
+  #
+  # TODO(davidben): This is all manual right now. Once we've added SEH tests,
+  # add support for emitting these in x86_64-xlate.pl, probably based on MASM
+  # and Yasm's unwind directives, and unify with CFI. (Sadly, NASM does not
+  # support these directives.) Then push that upstream to replace the
+  # error-prone and non-standard custom handlers.
+
+  # See https://docs.microsoft.com/en-us/cpp/build/struct-unwind-code?view=vs-2017
+  my $UWOP_PUSH_NONVOL = 0;
+  my $UWOP_ALLOC_LARGE = 1;
+  my $UWOP_ALLOC_SMALL = 2;
+  my $UWOP_SAVE_NONVOL = 4;
+  my $UWOP_SAVE_XMM128 = 8;
+
+  my %UWOP_REG_NUMBER = (rax => 0, rcx => 1, rdx => 2, rbx => 3, rsp => 4,
+                         rbp => 5, rsi => 6, rdi => 7,
+                         map(("r$_" => $_), (8..15)));
+
+  my $unwind_codes = "";
+  my $num_slots = 0;
+  foreach my $reg (reverse @caller_state) {
+    $reg = substr($reg, 1);
+    die "unknown register $reg" unless exists($reg_offsets{$reg});
+    if ($reg =~ /^r/) {
+      die "unknown register $reg" unless exists($UWOP_REG_NUMBER{$reg});
+      my $info = $UWOP_SAVE_NONVOL | ($UWOP_REG_NUMBER{$reg} << 4);
+      my $value = $reg_offsets{$reg} / 8;
+      $unwind_codes .= <<____;
+	.byte	.Labi_test_trampoline_seh_prolog_$reg-.Labi_test_trampoline_seh_begin
+	.byte	$info
+	.value	$value
+____
+      $num_slots += 2;
+    } elsif ($reg =~ /^xmm/) {
+      my $info = $UWOP_SAVE_XMM128 | (substr($reg, 3) << 4);
+      my $value = $reg_offsets{$reg} / 16;
+      $unwind_codes .= <<____;
+	.byte	.Labi_test_trampoline_seh_prolog_$reg-.Labi_test_trampoline_seh_begin
+	.byte	$info
+	.value	$value
+____
+      $num_slots += 2;
+    } else {
+      die "unknown register $reg";
+    }
+  }
+
+  if ($stack_alloc_size <= 128) {
+    my $info = $UWOP_ALLOC_SMALL | ((($stack_alloc_size - 8) / 8) << 4);
+    $unwind_codes .= <<____;
+	.byte	.Labi_test_trampoline_seh_prolog_alloc-.Labi_test_trampoline_seh_begin
+	.byte	$info
+____
+    $num_slots++;
+  } else {
+    die "stack allocation needs three unwind slots" if ($stack_alloc_size > 512 * 1024 + 8);
+    my $info = $UWOP_ALLOC_LARGE;
+    my $value = $stack_alloc_size / 8;
+    $unwind_codes .= <<____;
+	.byte	.Labi_test_trampoline_seh_prolog_alloc-.Labi_test_trampoline_seh_begin
+	.byte	$info
+	.value	$value
+____
+    $num_slots += 2;
+  }
+
+  $code .= <<____;
+.section	.pdata
+.align	4
+	# https://docs.microsoft.com/en-us/cpp/build/struct-runtime-function?view=vs-2017
+	.rva	.Labi_test_trampoline_seh_begin
+	.rva	.Labi_test_trampoline_seh_end
+	.rva	.Labi_test_trampoline_seh_info
+
+	.rva	.Labi_test_bad_unwind_wrong_register_seh_begin
+	.rva	.Labi_test_bad_unwind_wrong_register_seh_end
+	.rva	.Labi_test_bad_unwind_wrong_register_seh_info
+
+	.rva	.Labi_test_bad_unwind_temporary_seh_begin
+	.rva	.Labi_test_bad_unwind_temporary_seh_end
+	.rva	.Labi_test_bad_unwind_temporary_seh_info
+
+	.rva	.Labi_test_bad_unwind_epilog_seh_begin
+	.rva	.Labi_test_bad_unwind_epilog_seh_end
+	.rva	.Labi_test_bad_unwind_epilog_seh_info
+
+.section	.xdata
+.align	8
+.Labi_test_trampoline_seh_info:
+	# https://docs.microsoft.com/en-us/cpp/build/struct-unwind-info?view=vs-2017
+	.byte	1	# version 1, no flags
+	.byte	.Labi_test_trampoline_seh_prolog_end-.Labi_test_trampoline_seh_begin
+	.byte	$num_slots
+	.byte	0	# no frame register
+$unwind_codes
+
+.align	8
+.Labi_test_bad_unwind_wrong_register_seh_info:
+	.byte	1	# version 1, no flags
+	.byte	.Labi_test_bad_unwind_wrong_register_seh_push_r13-.Labi_test_bad_unwind_wrong_register_seh_begin
+	.byte	1	# one slot
+	.byte	0	# no frame register
+
+	.byte	.Labi_test_bad_unwind_wrong_register_seh_push_r13-.Labi_test_bad_unwind_wrong_register_seh_begin
+	.byte	@{[$UWOP_PUSH_NONVOL | ($UWOP_REG_NUMBER{r13} << 4)]}
+
+.align	8
+.Labi_test_bad_unwind_temporary_seh_info:
+	.byte	1	# version 1, no flags
+	.byte	.Labi_test_bad_unwind_temporary_seh_push_r12-.Labi_test_bad_unwind_temporary_seh_begin
+	.byte	1	# one slot
+	.byte	0	# no frame register
+
+	.byte	.Labi_test_bad_unwind_temporary_seh_push_r12-.Labi_test_bad_unwind_temporary_seh_begin
+	.byte	@{[$UWOP_PUSH_NONVOL | ($UWOP_REG_NUMBER{r12} << 4)]}
+
+.align	8
+.Labi_test_bad_unwind_epilog_seh_info:
+	.byte	1	# version 1, no flags
+	.byte	.Labi_test_bad_unwind_epilog_seh_push_r12-.Labi_test_bad_unwind_epilog_seh_begin
+	.byte	1	# one slot
+	.byte	0	# no frame register
+
+	.byte	.Labi_test_bad_unwind_epilog_seh_push_r12-.Labi_test_bad_unwind_epilog_seh_begin
+	.byte	@{[$UWOP_PUSH_NONVOL | ($UWOP_REG_NUMBER{r12} << 4)]}
+____
+}
+
+print $code;
+close STDOUT;
diff --git a/src/crypto/test/gtest_main.cc b/src/crypto/test/gtest_main.cc
index a557168..aeec0f5 100644
--- a/src/crypto/test/gtest_main.cc
+++ b/src/crypto/test/gtest_main.cc
@@ -20,6 +20,7 @@
 #include <openssl/cpu.h>
 #include <openssl/rand.h>
 
+#include "abi_test.h"
 #include "gtest_main.h"
 #include "../internal.h"
 
@@ -34,16 +35,15 @@
   testing::InitGoogleTest(&argc, argv);
   bssl::SetupGoogleTest();
 
-#if !defined(OPENSSL_WINDOWS)
+  bool unwind_tests = true;
   for (int i = 1; i < argc; i++) {
+#if !defined(OPENSSL_WINDOWS)
     if (strcmp(argv[i], "--fork_unsafe_buffering") == 0) {
       RAND_enable_fork_unsafe_buffering(-1);
     }
-  }
 #endif
 
 #if defined(TEST_ARM_CPUS)
-  for (int i = 1; i < argc; i++) {
     if (strncmp(argv[i], "--cpu=", 6) == 0) {
       const char *cpu = argv[i] + 6;
       uint32_t armcap;
@@ -68,8 +68,28 @@
       printf("Simulating CPU '%s'\n", cpu);
       *armcap_ptr = armcap;
     }
-  }
 #endif  // TEST_ARM_CPUS
 
-  return RUN_ALL_TESTS();
+    if (strcmp(argv[i], "--no_unwind_tests") == 0) {
+      unwind_tests = false;
+    }
+  }
+
+  if (unwind_tests) {
+    abi_test::EnableUnwindTests();
+  }
+
+  // Run the entire test suite under an ABI check. This is less effective than
+  // testing the individual assembly functions, but will catch issues with
+  // rarely-used registers.
+  abi_test::Result abi;
+  int ret = abi_test::Check(&abi, RUN_ALL_TESTS);
+  if (!abi.ok()) {
+    fprintf(stderr, "ABI failure in test suite:\n");
+    for (const auto &error : abi.errors) {
+      fprintf(stderr, "    %s\n", error.c_str());
+    }
+    exit(1);
+  }
+  return ret;
 }