external/boringssl: Sync to 5e578c9dba73460c3eb17f771c77fc8e36f7812e.
This includes the following changes:
https://boringssl.googlesource.com/boringssl/+log/58e449904e248f34bdfc2be7a609c58bcb0257b7..5e578c9dba73460c3eb17f771c77fc8e36f7812e
Test: BoringSSL CTS Presubmits
Change-Id: Ic1541b034545fa58a284ca35134b3719303455c7
diff --git a/linux-aarch64/crypto/fipsmodule/sha512-armv8.S b/linux-aarch64/crypto/fipsmodule/sha512-armv8.S
index 4645722..bb052b7 100644
--- a/linux-aarch64/crypto/fipsmodule/sha512-armv8.S
+++ b/linux-aarch64/crypto/fipsmodule/sha512-armv8.S
@@ -1,5 +1,46 @@
#if defined(__aarch64__)
-#include <openssl/arm_arch.h>
+// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
+//
+// Licensed under the OpenSSL license (the "License"). You may not use
+// this file except in compliance with the License. You can obtain a copy
+// in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
+
+// ====================================================================
+// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+// project. The module is, however, dual licensed under OpenSSL and
+// CRYPTOGAMS licenses depending on where you obtain it. For further
+// details see http://www.openssl.org/~appro/cryptogams/.
+//
+// Permission to use under GPLv2 terms is granted.
+// ====================================================================
+//
+// SHA256/512 for ARMv8.
+//
+// Performance in cycles per processed byte and improvement coefficient
+// over code generated with "default" compiler:
+//
+// SHA256-hw SHA256(*) SHA512
+// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**))
+// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***))
+// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***))
+// Denver 2.01 10.5 (+26%) 6.70 (+8%)
+// X-Gene 20.0 (+100%) 12.8 (+300%(***))
+// Mongoose 2.36 13.0 (+50%) 8.36 (+33%)
+//
+// (*) Software SHA256 results are of lesser relevance, presented
+// mostly for informational purposes.
+// (**) The result is a trade-off: it's possible to improve it by
+// 10% (or by 1 cycle per round), but at the cost of 20% loss
+// on Cortex-A53 (or by 4 cycles per round).
+// (***) Super-impressive coefficients over gcc-generated code are
+// indication of some compiler "pathology", most notably code
+// generated with -mgeneral-regs-only is significanty faster
+// and the gap is only 40-90%.
+
+#ifndef __KERNEL__
+# include <openssl/arm_arch.h>
+#endif
.text
@@ -1016,11 +1057,19 @@
.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
.quad 0 // terminator
.size .LK512,.-.LK512
+#ifndef __KERNEL__
.align 3
.LOPENSSL_armcap_P:
+# ifdef __ILP32__
+.long OPENSSL_armcap_P-.
+# else
.quad OPENSSL_armcap_P-.
+# endif
+#endif
.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
+#ifndef __KERNEL__
.comm OPENSSL_armcap_P,4,4
#endif
+#endif