external/boringssl: Sync to 3cbdc346.
This includes the following changes:
https://boringssl.googlesource.com/boringssl/+log/e34bcc91c07c0bf65ecc53a814d51f5246007150..3cbdc34619daafb9f8527fb9dd27afc8ee7dcf19
This removes android_compat_keywrap.c, as these APIs are now provided
natively by BoringSSL.
Test: cts-tradefed run cts -m CtsLibcoreTestCases -m
CtsLibcoreOkHttpTestCases -a arm64-v8a
Change-Id: I29bce93c45eb5b80fa739667bf6e357e0af03b7f
diff --git a/src/crypto/CMakeLists.txt b/src/crypto/CMakeLists.txt
index 5182a4a..4cc5ae2 100644
--- a/src/crypto/CMakeLists.txt
+++ b/src/crypto/CMakeLists.txt
@@ -17,6 +17,8 @@
elseif (${ARCH} STREQUAL "x86")
set(PERLASM_FLAGS "-fPIC -DOPENSSL_IA32_SSE2")
set(PERLASM_STYLE elf)
+ elseif (${ARCH} STREQUAL "ppc64le")
+ set(PERLASM_STYLE ppc64le)
else()
set(PERLASM_STYLE elf)
endif()
@@ -45,6 +47,7 @@
DEPENDS
${src}
${PROJECT_SOURCE_DIR}/crypto/perlasm/arm-xlate.pl
+ ${PROJECT_SOURCE_DIR}/crypto/perlasm/ppc-xlate.pl
${PROJECT_SOURCE_DIR}/crypto/perlasm/x86_64-xlate.pl
${PROJECT_SOURCE_DIR}/crypto/perlasm/x86asm.pl
${PROJECT_SOURCE_DIR}/crypto/perlasm/x86gas.pl
@@ -116,6 +119,7 @@
cpu-arm.c
cpu-arm-linux.c
cpu-intel.c
+ cpu-ppc64le.c
crypto.c
ex_data.c
mem.c
diff --git a/src/crypto/aes/CMakeLists.txt b/src/crypto/aes/CMakeLists.txt
index 0566e39..33eebf5 100644
--- a/src/crypto/aes/CMakeLists.txt
+++ b/src/crypto/aes/CMakeLists.txt
@@ -39,12 +39,21 @@
)
endif()
+if (${ARCH} STREQUAL "ppc64le")
+ set(
+ AES_ARCH_SOURCES
+
+ aesp8-ppc.${ASM_EXT}
+ )
+endif()
+
add_library(
aes
OBJECT
aes.c
+ key_wrap.c
mode_wrappers.c
${AES_ARCH_SOURCES}
@@ -60,6 +69,7 @@
perlasm(aes-armv4.${ASM_EXT} asm/aes-armv4.pl)
perlasm(bsaes-armv7.${ASM_EXT} asm/bsaes-armv7.pl)
perlasm(aesv8-armx.${ASM_EXT} asm/aesv8-armx.pl)
+perlasm(aesp8-ppc.${ASM_EXT} asm/aesp8-ppc.pl)
add_executable(
aes_test
diff --git a/src/crypto/aes/aes.c b/src/crypto/aes/aes.c
index 8823919..1aed63e 100644
--- a/src/crypto/aes/aes.c
+++ b/src/crypto/aes/aes.c
@@ -1066,12 +1066,12 @@
return CRYPTO_is_ARMv8_AES_capable();
}
-int aes_v8_set_encrypt_key(const uint8_t *user_key, const int bits,
+int aes_hw_set_encrypt_key(const uint8_t *user_key, const int bits,
AES_KEY *key);
-int aes_v8_set_decrypt_key(const uint8_t *user_key, const int bits,
+int aes_hw_set_decrypt_key(const uint8_t *user_key, const int bits,
AES_KEY *key);
-void aes_v8_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
-void aes_v8_decrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
+void aes_hw_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
+void aes_hw_decrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
#else
@@ -1079,19 +1079,19 @@
return 0;
}
-static int aes_v8_set_encrypt_key(const uint8_t *user_key, int bits, AES_KEY *key) {
+static int aes_hw_set_encrypt_key(const uint8_t *user_key, int bits, AES_KEY *key) {
abort();
}
-static int aes_v8_set_decrypt_key(const uint8_t *user_key, int bits, AES_KEY *key) {
+static int aes_hw_set_decrypt_key(const uint8_t *user_key, int bits, AES_KEY *key) {
abort();
}
-static void aes_v8_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
+static void aes_hw_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
abort();
}
-static void aes_v8_decrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
+static void aes_hw_decrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
abort();
}
@@ -1106,7 +1106,7 @@
void asm_AES_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
void AES_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
if (hwaes_capable()) {
- aes_v8_encrypt(in, out, key);
+ aes_hw_encrypt(in, out, key);
} else {
asm_AES_encrypt(in, out, key);
}
@@ -1115,7 +1115,7 @@
void asm_AES_decrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
void AES_decrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
if (hwaes_capable()) {
- aes_v8_decrypt(in, out, key);
+ aes_hw_decrypt(in, out, key);
} else {
asm_AES_decrypt(in, out, key);
}
@@ -1124,7 +1124,7 @@
int asm_AES_set_encrypt_key(const uint8_t *key, unsigned bits, AES_KEY *aeskey);
int AES_set_encrypt_key(const uint8_t *key, unsigned bits, AES_KEY *aeskey) {
if (hwaes_capable()) {
- return aes_v8_set_encrypt_key(key, bits, aeskey);
+ return aes_hw_set_encrypt_key(key, bits, aeskey);
} else {
return asm_AES_set_encrypt_key(key, bits, aeskey);
}
@@ -1133,7 +1133,7 @@
int asm_AES_set_decrypt_key(const uint8_t *key, unsigned bits, AES_KEY *aeskey);
int AES_set_decrypt_key(const uint8_t *key, unsigned bits, AES_KEY *aeskey) {
if (hwaes_capable()) {
- return aes_v8_set_decrypt_key(key, bits, aeskey);
+ return aes_hw_set_decrypt_key(key, bits, aeskey);
} else {
return asm_AES_set_decrypt_key(key, bits, aeskey);
}
diff --git a/src/crypto/aes/aes_test.cc b/src/crypto/aes/aes_test.cc
index e488d81..4fb3a31 100644
--- a/src/crypto/aes/aes_test.cc
+++ b/src/crypto/aes/aes_test.cc
@@ -15,88 +15,168 @@
#include <stdio.h>
#include <string.h>
+#include <memory>
+#include <vector>
+
#include <openssl/aes.h>
#include <openssl/crypto.h>
+#include "../test/file_test.h"
-static bool TestAES(const uint8_t *key, size_t key_len,
- const uint8_t plaintext[AES_BLOCK_SIZE],
- const uint8_t ciphertext[AES_BLOCK_SIZE]) {
+
+static bool TestRaw(FileTest *t) {
+ std::vector<uint8_t> key, plaintext, ciphertext;
+ if (!t->GetBytes(&key, "Key") ||
+ !t->GetBytes(&plaintext, "Plaintext") ||
+ !t->GetBytes(&ciphertext, "Ciphertext")) {
+ return false;
+ }
+
+ if (plaintext.size() != AES_BLOCK_SIZE ||
+ ciphertext.size() != AES_BLOCK_SIZE) {
+ t->PrintLine("Plaintext or Ciphertext not a block size.");
+ return false;
+ }
+
AES_KEY aes_key;
- if (AES_set_encrypt_key(key, key_len * 8, &aes_key) != 0) {
- fprintf(stderr, "AES_set_encrypt_key failed\n");
+ if (AES_set_encrypt_key(key.data(), 8 * key.size(), &aes_key) != 0) {
+ t->PrintLine("AES_set_encrypt_key failed.");
return false;
}
// Test encryption.
uint8_t block[AES_BLOCK_SIZE];
- AES_encrypt(plaintext, block, &aes_key);
- if (memcmp(block, ciphertext, AES_BLOCK_SIZE) != 0) {
- fprintf(stderr, "AES_encrypt gave the wrong output\n");
+ AES_encrypt(plaintext.data(), block, &aes_key);
+ if (!t->ExpectBytesEqual(block, AES_BLOCK_SIZE, ciphertext.data(),
+ ciphertext.size())) {
+ t->PrintLine("AES_encrypt gave the wrong output.");
return false;
}
// Test in-place encryption.
- memcpy(block, plaintext, AES_BLOCK_SIZE);
+ memcpy(block, plaintext.data(), AES_BLOCK_SIZE);
AES_encrypt(block, block, &aes_key);
- if (memcmp(block, ciphertext, AES_BLOCK_SIZE) != 0) {
- fprintf(stderr, "AES_encrypt gave the wrong output\n");
+ if (!t->ExpectBytesEqual(block, AES_BLOCK_SIZE, ciphertext.data(),
+ ciphertext.size())) {
+ t->PrintLine("In-place AES_encrypt gave the wrong output.");
return false;
}
- if (AES_set_decrypt_key(key, key_len * 8, &aes_key) != 0) {
- fprintf(stderr, "AES_set_decrypt_key failed\n");
+ if (AES_set_decrypt_key(key.data(), 8 * key.size(), &aes_key) != 0) {
+ t->PrintLine("AES_set_decrypt_key failed.");
return false;
}
// Test decryption.
- AES_decrypt(ciphertext, block, &aes_key);
- if (memcmp(block, plaintext, AES_BLOCK_SIZE) != 0) {
- fprintf(stderr, "AES_decrypt gave the wrong output\n");
+ AES_decrypt(ciphertext.data(), block, &aes_key);
+ if (!t->ExpectBytesEqual(block, AES_BLOCK_SIZE, plaintext.data(),
+ plaintext.size())) {
+ t->PrintLine("AES_decrypt gave the wrong output.");
return false;
}
// Test in-place decryption.
- memcpy(block, ciphertext, AES_BLOCK_SIZE);
+ memcpy(block, ciphertext.data(), AES_BLOCK_SIZE);
AES_decrypt(block, block, &aes_key);
- if (memcmp(block, plaintext, AES_BLOCK_SIZE) != 0) {
- fprintf(stderr, "AES_decrypt gave the wrong output\n");
+ if (!t->ExpectBytesEqual(block, AES_BLOCK_SIZE, plaintext.data(),
+ plaintext.size())) {
+ t->PrintLine("In-place AES_decrypt gave the wrong output.");
return false;
}
+
return true;
}
-int main() {
- CRYPTO_library_init();
+static bool TestKeyWrap(FileTest *t) {
+ // All test vectors use the default IV, so test both with implicit and
+ // explicit IV.
+ //
+ // TODO(davidben): Find test vectors that use a different IV.
+ static const uint8_t kDefaultIV[] = {
+ 0xa6, 0xa6, 0xa6, 0xa6, 0xa6, 0xa6, 0xa6, 0xa6,
+ };
- // Test vectors from FIPS-197, Appendix C.
- if (!TestAES((const uint8_t *)"\x00\x01\x02\x03\x04\x05\x06\x07"
- "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
- 128 / 8,
- (const uint8_t *)"\x00\x11\x22\x33\x44\x55\x66\x77"
- "\x88\x99\xaa\xbb\xcc\xdd\xee\xff",
- (const uint8_t *)"\x69\xc4\xe0\xd8\x6a\x7b\x04\x30"
- "\xd8\xcd\xb7\x80\x70\xb4\xc5\x5a") ||
- !TestAES((const uint8_t *)"\x00\x01\x02\x03\x04\x05\x06\x07"
- "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
- "\x10\x11\x12\x13\x14\x15\x16\x17",
- 192 / 8,
- (const uint8_t *)"\x00\x11\x22\x33\x44\x55\x66\x77"
- "\x88\x99\xaa\xbb\xcc\xdd\xee\xff",
- (const uint8_t *)"\xdd\xa9\x7c\xa4\x86\x4c\xdf\xe0"
- "\x6e\xaf\x70\xa0\xec\x0d\x71\x91") ||
- !TestAES((const uint8_t *)"\x00\x01\x02\x03\x04\x05\x06\x07"
- "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
- "\x10\x11\x12\x13\x14\x15\x16\x17"
- "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
- 256 / 8,
- (const uint8_t *)"\x00\x11\x22\x33\x44\x55\x66\x77"
- "\x88\x99\xaa\xbb\xcc\xdd\xee\xff",
- (const uint8_t *)"\x8e\xa2\xb7\xca\x51\x67\x45\xbf"
- "\xea\xfc\x49\x90\x4b\x49\x60\x89")) {
+ std::vector<uint8_t> key, plaintext, ciphertext;
+ if (!t->GetBytes(&key, "Key") ||
+ !t->GetBytes(&plaintext, "Plaintext") ||
+ !t->GetBytes(&ciphertext, "Ciphertext")) {
return false;
}
- printf("PASS\n");
- return 0;
+ if (plaintext.size() + 8 != ciphertext.size()) {
+ t->PrintLine("Invalid Plaintext and Ciphertext lengths.");
+ return false;
+ }
+
+ AES_KEY aes_key;
+ if (AES_set_encrypt_key(key.data(), 8 * key.size(), &aes_key) != 0) {
+ t->PrintLine("AES_set_encrypt_key failed.");
+ return false;
+ }
+
+ std::unique_ptr<uint8_t[]> buf(new uint8_t[ciphertext.size()]);
+ if (AES_wrap_key(&aes_key, nullptr /* iv */, buf.get(), plaintext.data(),
+ plaintext.size()) != static_cast<int>(ciphertext.size()) ||
+ !t->ExpectBytesEqual(buf.get(), ciphertext.size(), ciphertext.data(),
+ ciphertext.size())) {
+ t->PrintLine("AES_wrap_key with implicit IV failed.");
+ return false;
+ }
+
+ memset(buf.get(), 0, ciphertext.size());
+ if (AES_wrap_key(&aes_key, kDefaultIV, buf.get(), plaintext.data(),
+ plaintext.size()) != static_cast<int>(ciphertext.size()) ||
+ !t->ExpectBytesEqual(buf.get(), ciphertext.size(), ciphertext.data(),
+ ciphertext.size())) {
+ t->PrintLine("AES_wrap_key with explicit IV failed.");
+ return false;
+ }
+
+ if (AES_set_decrypt_key(key.data(), 8 * key.size(), &aes_key) != 0) {
+ t->PrintLine("AES_set_decrypt_key failed.");
+ return false;
+ }
+
+ buf.reset(new uint8_t[plaintext.size()]);
+ if (AES_unwrap_key(&aes_key, nullptr /* iv */, buf.get(), ciphertext.data(),
+ ciphertext.size()) != static_cast<int>(plaintext.size()) ||
+ !t->ExpectBytesEqual(buf.get(), plaintext.size(), plaintext.data(),
+ plaintext.size())) {
+ t->PrintLine("AES_unwrap_key with implicit IV failed.");
+ return false;
+ }
+
+ memset(buf.get(), 0, plaintext.size());
+ if (AES_unwrap_key(&aes_key, kDefaultIV, buf.get(), ciphertext.data(),
+ ciphertext.size()) != static_cast<int>(plaintext.size()) ||
+ !t->ExpectBytesEqual(buf.get(), plaintext.size(), plaintext.data(),
+ plaintext.size())) {
+ t->PrintLine("AES_unwrap_key with explicit IV failed.");
+ return false;
+ }
+
+ return true;
+}
+
+static bool TestAES(FileTest *t, void *arg) {
+ if (t->GetParameter() == "Raw") {
+ return TestRaw(t);
+ }
+ if (t->GetParameter() == "KeyWrap") {
+ return TestKeyWrap(t);
+ }
+
+ t->PrintLine("Unknown mode '%s'.", t->GetParameter().c_str());
+ return false;
+}
+
+int main(int argc, char **argv) {
+ CRYPTO_library_init();
+
+ if (argc != 2) {
+ fprintf(stderr, "%s <test file.txt>\n", argv[0]);
+ return 1;
+ }
+
+ return FileTestMain(TestAES, nullptr, argv[1]);
}
diff --git a/src/crypto/aes/aes_tests.txt b/src/crypto/aes/aes_tests.txt
new file mode 100644
index 0000000..d4e4c61
--- /dev/null
+++ b/src/crypto/aes/aes_tests.txt
@@ -0,0 +1,50 @@
+# Test vectors from FIPS-197, Appendix C.
+
+Mode = Raw
+Key = 000102030405060708090a0b0c0d0e0f
+Plaintext = 00112233445566778899aabbccddeeff
+Ciphertext = 69c4e0d86a7b0430d8cdb78070b4c55a
+
+Mode = Raw
+Key = 000102030405060708090a0b0c0d0e0f1011121314151617
+Plaintext = 00112233445566778899aabbccddeeff
+Ciphertext = dda97ca4864cdfe06eaf70a0ec0d7191
+
+Mode = Raw
+Key = 000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f
+Plaintext = 00112233445566778899aabbccddeeff
+Ciphertext = 8ea2b7ca516745bfeafc49904b496089
+
+
+# Test vectors from
+# http://csrc.nist.gov/groups/ST/toolkit/documents/kms/key-wrap.pdf
+
+Mode = KeyWrap
+Key = 000102030405060708090a0b0c0d0e0f
+Plaintext = 00112233445566778899aabbccddeeff
+Ciphertext = 1fa68b0a8112b447aef34bd8fb5a7b829d3e862371d2cfe5
+
+Mode = KeyWrap
+Key = 000102030405060708090a0b0c0d0e0f1011121314151617
+Plaintext = 00112233445566778899aabbccddeeff
+Ciphertext = 96778b25ae6ca435f92b5b97c050aed2468ab8a17ad84e5d
+
+Mode = KeyWrap
+Key = 000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f
+Plaintext = 00112233445566778899aabbccddeeff
+Ciphertext = 64e8c3f9ce0f5ba263e9777905818a2a93c8191e7d6e8ae7
+
+Mode = KeyWrap
+Key = 000102030405060708090a0b0c0d0e0f1011121314151617
+Plaintext = 00112233445566778899aabbccddeeff0001020304050607
+Ciphertext = 031d33264e15d33268f24ec260743edce1c6c7ddee725a936ba814915c6762d2
+
+Mode = KeyWrap
+Key = 000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f
+Plaintext = 00112233445566778899aabbccddeeff0001020304050607
+Ciphertext = a8f9bc1612c68b3ff6e6f4fbe30e71e4769c8b80a32cb8958cd5d17d6b254da1
+
+Mode = KeyWrap
+Key = 000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f
+Plaintext = 00112233445566778899aabbccddeeff000102030405060708090a0b0c0d0e0f
+Ciphertext = 28c9f404c4b810f4cbccb35cfb87f8263f5786e2d80ed326cbc7f0e71a99f43bfb988b9b7a02dd21
diff --git a/src/crypto/aes/asm/aesp8-ppc.pl b/src/crypto/aes/asm/aesp8-ppc.pl
new file mode 100644
index 0000000..4bdcff7
--- /dev/null
+++ b/src/crypto/aes/asm/aesp8-ppc.pl
@@ -0,0 +1,3805 @@
+#! /usr/bin/env perl
+# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# This module implements support for AES instructions as per PowerISA
+# specification version 2.07, first implemented by POWER8 processor.
+# The module is endian-agnostic in sense that it supports both big-
+# and little-endian cases. Data alignment in parallelizable modes is
+# handled with VSX loads and stores, which implies MSR.VSX flag being
+# set. It should also be noted that ISA specification doesn't prohibit
+# alignment exceptions for these instructions on page boundaries.
+# Initially alignment was handled in pure AltiVec/VMX way [when data
+# is aligned programmatically, which in turn guarantees exception-
+# free execution], but it turned to hamper performance when vcipher
+# instructions are interleaved. It's reckoned that eventual
+# misalignment penalties at page boundaries are in average lower
+# than additional overhead in pure AltiVec approach.
+#
+# May 2016
+#
+# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
+# systems were measured.
+#
+######################################################################
+# Current large-block performance in cycles per byte processed with
+# 128-bit key (less is better).
+#
+# CBC en-/decrypt CTR XTS
+# POWER8[le] 3.96/0.72 0.74 1.1
+# POWER8[be] 3.75/0.65 0.66 1.0
+
+$flavour = shift;
+
+if ($flavour =~ /64/) {
+ $SIZE_T =8;
+ $LRSAVE =2*$SIZE_T;
+ $STU ="stdu";
+ $POP ="ld";
+ $PUSH ="std";
+ $UCMP ="cmpld";
+ $SHL ="sldi";
+} elsif ($flavour =~ /32/) {
+ $SIZE_T =4;
+ $LRSAVE =$SIZE_T;
+ $STU ="stwu";
+ $POP ="lwz";
+ $PUSH ="stw";
+ $UCMP ="cmplw";
+ $SHL ="slwi";
+} else { die "nonsense $flavour"; }
+
+$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+die "can't locate ppc-xlate.pl";
+
+open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
+
+$FRAME=8*$SIZE_T;
+$prefix="aes_hw";
+
+$sp="r1";
+$vrsave="r12";
+
+#########################################################################
+{{{ # Key setup procedures #
+my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
+my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
+my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
+
+$code.=<<___;
+.machine "any"
+
+.text
+
+.align 7
+rcon:
+.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
+.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
+.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
+.long 0,0,0,0 ?asis
+Lconsts:
+ mflr r0
+ bcl 20,31,\$+4
+ mflr $ptr #vvvvv "distance between . and rcon
+ addi $ptr,$ptr,-0x48
+ mtlr r0
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
+.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
+
+.globl .${prefix}_set_encrypt_key
+.align 5
+.${prefix}_set_encrypt_key:
+Lset_encrypt_key:
+ mflr r11
+ $PUSH r11,$LRSAVE($sp)
+
+ li $ptr,-1
+ ${UCMP}i $inp,0
+ beq- Lenc_key_abort # if ($inp==0) return -1;
+ ${UCMP}i $out,0
+ beq- Lenc_key_abort # if ($out==0) return -1;
+ li $ptr,-2
+ cmpwi $bits,128
+ blt- Lenc_key_abort
+ cmpwi $bits,256
+ bgt- Lenc_key_abort
+ andi. r0,$bits,0x3f
+ bne- Lenc_key_abort
+
+ lis r0,0xfff0
+ mfspr $vrsave,256
+ mtspr 256,r0
+
+ bl Lconsts
+ mtlr r11
+
+ neg r9,$inp
+ lvx $in0,0,$inp
+ addi $inp,$inp,15 # 15 is not typo
+ lvsr $key,0,r9 # borrow $key
+ li r8,0x20
+ cmpwi $bits,192
+ lvx $in1,0,$inp
+ le?vspltisb $mask,0x0f # borrow $mask
+ lvx $rcon,0,$ptr
+ le?vxor $key,$key,$mask # adjust for byte swap
+ lvx $mask,r8,$ptr
+ addi $ptr,$ptr,0x10
+ vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
+ li $cnt,8
+ vxor $zero,$zero,$zero
+ mtctr $cnt
+
+ ?lvsr $outperm,0,$out
+ vspltisb $outmask,-1
+ lvx $outhead,0,$out
+ ?vperm $outmask,$zero,$outmask,$outperm
+
+ blt Loop128
+ addi $inp,$inp,8
+ beq L192
+ addi $inp,$inp,8
+ b L256
+
+.align 4
+Loop128:
+ vperm $key,$in0,$in0,$mask # rotate-n-splat
+ vsldoi $tmp,$zero,$in0,12 # >>32
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ vcipherlast $key,$key,$rcon
+ stvx $stage,0,$out
+ addi $out,$out,16
+
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vadduwm $rcon,$rcon,$rcon
+ vxor $in0,$in0,$key
+ bdnz Loop128
+
+ lvx $rcon,0,$ptr # last two round keys
+
+ vperm $key,$in0,$in0,$mask # rotate-n-splat
+ vsldoi $tmp,$zero,$in0,12 # >>32
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ vcipherlast $key,$key,$rcon
+ stvx $stage,0,$out
+ addi $out,$out,16
+
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vadduwm $rcon,$rcon,$rcon
+ vxor $in0,$in0,$key
+
+ vperm $key,$in0,$in0,$mask # rotate-n-splat
+ vsldoi $tmp,$zero,$in0,12 # >>32
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ vcipherlast $key,$key,$rcon
+ stvx $stage,0,$out
+ addi $out,$out,16
+
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vxor $in0,$in0,$key
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ stvx $stage,0,$out
+
+ addi $inp,$out,15 # 15 is not typo
+ addi $out,$out,0x50
+
+ li $rounds,10
+ b Ldone
+
+.align 4
+L192:
+ lvx $tmp,0,$inp
+ li $cnt,4
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ stvx $stage,0,$out
+ addi $out,$out,16
+ vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
+ vspltisb $key,8 # borrow $key
+ mtctr $cnt
+ vsububm $mask,$mask,$key # adjust the mask
+
+Loop192:
+ vperm $key,$in1,$in1,$mask # roate-n-splat
+ vsldoi $tmp,$zero,$in0,12 # >>32
+ vcipherlast $key,$key,$rcon
+
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+
+ vsldoi $stage,$zero,$in1,8
+ vspltw $tmp,$in0,3
+ vxor $tmp,$tmp,$in1
+ vsldoi $in1,$zero,$in1,12 # >>32
+ vadduwm $rcon,$rcon,$rcon
+ vxor $in1,$in1,$tmp
+ vxor $in0,$in0,$key
+ vxor $in1,$in1,$key
+ vsldoi $stage,$stage,$in0,8
+
+ vperm $key,$in1,$in1,$mask # rotate-n-splat
+ vsldoi $tmp,$zero,$in0,12 # >>32
+ vperm $outtail,$stage,$stage,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ vcipherlast $key,$key,$rcon
+ stvx $stage,0,$out
+ addi $out,$out,16
+
+ vsldoi $stage,$in0,$in1,8
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vperm $outtail,$stage,$stage,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ stvx $stage,0,$out
+ addi $out,$out,16
+
+ vspltw $tmp,$in0,3
+ vxor $tmp,$tmp,$in1
+ vsldoi $in1,$zero,$in1,12 # >>32
+ vadduwm $rcon,$rcon,$rcon
+ vxor $in1,$in1,$tmp
+ vxor $in0,$in0,$key
+ vxor $in1,$in1,$key
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ stvx $stage,0,$out
+ addi $inp,$out,15 # 15 is not typo
+ addi $out,$out,16
+ bdnz Loop192
+
+ li $rounds,12
+ addi $out,$out,0x20
+ b Ldone
+
+.align 4
+L256:
+ lvx $tmp,0,$inp
+ li $cnt,7
+ li $rounds,14
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ stvx $stage,0,$out
+ addi $out,$out,16
+ vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
+ mtctr $cnt
+
+Loop256:
+ vperm $key,$in1,$in1,$mask # rotate-n-splat
+ vsldoi $tmp,$zero,$in0,12 # >>32
+ vperm $outtail,$in1,$in1,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ vcipherlast $key,$key,$rcon
+ stvx $stage,0,$out
+ addi $out,$out,16
+
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vadduwm $rcon,$rcon,$rcon
+ vxor $in0,$in0,$key
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ stvx $stage,0,$out
+ addi $inp,$out,15 # 15 is not typo
+ addi $out,$out,16
+ bdz Ldone
+
+ vspltw $key,$in0,3 # just splat
+ vsldoi $tmp,$zero,$in1,12 # >>32
+ vsbox $key,$key
+
+ vxor $in1,$in1,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in1,$in1,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in1,$in1,$tmp
+
+ vxor $in1,$in1,$key
+ b Loop256
+
+.align 4
+Ldone:
+ lvx $in1,0,$inp # redundant in aligned case
+ vsel $in1,$outhead,$in1,$outmask
+ stvx $in1,0,$inp
+ li $ptr,0
+ mtspr 256,$vrsave
+ stw $rounds,0($out)
+
+Lenc_key_abort:
+ mr r3,$ptr
+ blr
+ .long 0
+ .byte 0,12,0x14,1,0,0,3,0
+ .long 0
+.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
+
+.globl .${prefix}_set_decrypt_key
+.align 5
+.${prefix}_set_decrypt_key:
+ $STU $sp,-$FRAME($sp)
+ mflr r10
+ $PUSH r10,$FRAME+$LRSAVE($sp)
+ bl Lset_encrypt_key
+ mtlr r10
+
+ cmpwi r3,0
+ bne- Ldec_key_abort
+
+ slwi $cnt,$rounds,4
+ subi $inp,$out,240 # first round key
+ srwi $rounds,$rounds,1
+ add $out,$inp,$cnt # last round key
+ mtctr $rounds
+
+Ldeckey:
+ lwz r0, 0($inp)
+ lwz r6, 4($inp)
+ lwz r7, 8($inp)
+ lwz r8, 12($inp)
+ addi $inp,$inp,16
+ lwz r9, 0($out)
+ lwz r10,4($out)
+ lwz r11,8($out)
+ lwz r12,12($out)
+ stw r0, 0($out)
+ stw r6, 4($out)
+ stw r7, 8($out)
+ stw r8, 12($out)
+ subi $out,$out,16
+ stw r9, -16($inp)
+ stw r10,-12($inp)
+ stw r11,-8($inp)
+ stw r12,-4($inp)
+ bdnz Ldeckey
+
+ xor r3,r3,r3 # return value
+Ldec_key_abort:
+ addi $sp,$sp,$FRAME
+ blr
+ .long 0
+ .byte 0,12,4,1,0x80,0,3,0
+ .long 0
+.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
+___
+}}}
+#########################################################################
+{{{ # Single block en- and decrypt procedures #
+sub gen_block () {
+my $dir = shift;
+my $n = $dir eq "de" ? "n" : "";
+my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
+
+$code.=<<___;
+.globl .${prefix}_${dir}crypt
+.align 5
+.${prefix}_${dir}crypt:
+ lwz $rounds,240($key)
+ lis r0,0xfc00
+ mfspr $vrsave,256
+ li $idx,15 # 15 is not typo
+ mtspr 256,r0
+
+ lvx v0,0,$inp
+ neg r11,$out
+ lvx v1,$idx,$inp
+ lvsl v2,0,$inp # inpperm
+ le?vspltisb v4,0x0f
+ ?lvsl v3,0,r11 # outperm
+ le?vxor v2,v2,v4
+ li $idx,16
+ vperm v0,v0,v1,v2 # align [and byte swap in LE]
+ lvx v1,0,$key
+ ?lvsl v5,0,$key # keyperm
+ srwi $rounds,$rounds,1
+ lvx v2,$idx,$key
+ addi $idx,$idx,16
+ subi $rounds,$rounds,1
+ ?vperm v1,v1,v2,v5 # align round key
+
+ vxor v0,v0,v1
+ lvx v1,$idx,$key
+ addi $idx,$idx,16
+ mtctr $rounds
+
+Loop_${dir}c:
+ ?vperm v2,v2,v1,v5
+ v${n}cipher v0,v0,v2
+ lvx v2,$idx,$key
+ addi $idx,$idx,16
+ ?vperm v1,v1,v2,v5
+ v${n}cipher v0,v0,v1
+ lvx v1,$idx,$key
+ addi $idx,$idx,16
+ bdnz Loop_${dir}c
+
+ ?vperm v2,v2,v1,v5
+ v${n}cipher v0,v0,v2
+ lvx v2,$idx,$key
+ ?vperm v1,v1,v2,v5
+ v${n}cipherlast v0,v0,v1
+
+ vspltisb v2,-1
+ vxor v1,v1,v1
+ li $idx,15 # 15 is not typo
+ ?vperm v2,v1,v2,v3 # outmask
+ le?vxor v3,v3,v4
+ lvx v1,0,$out # outhead
+ vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
+ vsel v1,v1,v0,v2
+ lvx v4,$idx,$out
+ stvx v1,0,$out
+ vsel v0,v0,v4,v2
+ stvx v0,$idx,$out
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,3,0
+ .long 0
+.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
+___
+}
+&gen_block("en");
+&gen_block("de");
+}}}
+#########################################################################
+{{{ # CBC en- and decrypt procedures #
+my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
+my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
+my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
+ map("v$_",(4..10));
+$code.=<<___;
+.globl .${prefix}_cbc_encrypt
+.align 5
+.${prefix}_cbc_encrypt:
+ ${UCMP}i $len,16
+ bltlr-
+
+ cmpwi $enc,0 # test direction
+ lis r0,0xffe0
+ mfspr $vrsave,256
+ mtspr 256,r0
+
+ li $idx,15
+ vxor $rndkey0,$rndkey0,$rndkey0
+ le?vspltisb $tmp,0x0f
+
+ lvx $ivec,0,$ivp # load [unaligned] iv
+ lvsl $inpperm,0,$ivp
+ lvx $inptail,$idx,$ivp
+ le?vxor $inpperm,$inpperm,$tmp
+ vperm $ivec,$ivec,$inptail,$inpperm
+
+ neg r11,$inp
+ ?lvsl $keyperm,0,$key # prepare for unaligned key
+ lwz $rounds,240($key)
+
+ lvsr $inpperm,0,r11 # prepare for unaligned load
+ lvx $inptail,0,$inp
+ addi $inp,$inp,15 # 15 is not typo
+ le?vxor $inpperm,$inpperm,$tmp
+
+ ?lvsr $outperm,0,$out # prepare for unaligned store
+ vspltisb $outmask,-1
+ lvx $outhead,0,$out
+ ?vperm $outmask,$rndkey0,$outmask,$outperm
+ le?vxor $outperm,$outperm,$tmp
+
+ srwi $rounds,$rounds,1
+ li $idx,16
+ subi $rounds,$rounds,1
+ beq Lcbc_dec
+
+Lcbc_enc:
+ vmr $inout,$inptail
+ lvx $inptail,0,$inp
+ addi $inp,$inp,16
+ mtctr $rounds
+ subi $len,$len,16 # len-=16
+
+ lvx $rndkey0,0,$key
+ vperm $inout,$inout,$inptail,$inpperm
+ lvx $rndkey1,$idx,$key
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key
+ addi $idx,$idx,16
+ vxor $inout,$inout,$ivec
+
+Loop_cbc_enc:
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vcipher $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key
+ addi $idx,$idx,16
+ bdnz Loop_cbc_enc
+
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key
+ li $idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vcipherlast $ivec,$inout,$rndkey0
+ ${UCMP}i $len,16
+
+ vperm $tmp,$ivec,$ivec,$outperm
+ vsel $inout,$outhead,$tmp,$outmask
+ vmr $outhead,$tmp
+ stvx $inout,0,$out
+ addi $out,$out,16
+ bge Lcbc_enc
+
+ b Lcbc_done
+
+.align 4
+Lcbc_dec:
+ ${UCMP}i $len,128
+ bge _aesp8_cbc_decrypt8x
+ vmr $tmp,$inptail
+ lvx $inptail,0,$inp
+ addi $inp,$inp,16
+ mtctr $rounds
+ subi $len,$len,16 # len-=16
+
+ lvx $rndkey0,0,$key
+ vperm $tmp,$tmp,$inptail,$inpperm
+ lvx $rndkey1,$idx,$key
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $inout,$tmp,$rndkey0
+ lvx $rndkey0,$idx,$key
+ addi $idx,$idx,16
+
+Loop_cbc_dec:
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vncipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vncipher $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key
+ addi $idx,$idx,16
+ bdnz Loop_cbc_dec
+
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vncipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key
+ li $idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vncipherlast $inout,$inout,$rndkey0
+ ${UCMP}i $len,16
+
+ vxor $inout,$inout,$ivec
+ vmr $ivec,$tmp
+ vperm $tmp,$inout,$inout,$outperm
+ vsel $inout,$outhead,$tmp,$outmask
+ vmr $outhead,$tmp
+ stvx $inout,0,$out
+ addi $out,$out,16
+ bge Lcbc_dec
+
+Lcbc_done:
+ addi $out,$out,-1
+ lvx $inout,0,$out # redundant in aligned case
+ vsel $inout,$outhead,$inout,$outmask
+ stvx $inout,0,$out
+
+ neg $enc,$ivp # write [unaligned] iv
+ li $idx,15 # 15 is not typo
+ vxor $rndkey0,$rndkey0,$rndkey0
+ vspltisb $outmask,-1
+ le?vspltisb $tmp,0x0f
+ ?lvsl $outperm,0,$enc
+ ?vperm $outmask,$rndkey0,$outmask,$outperm
+ le?vxor $outperm,$outperm,$tmp
+ lvx $outhead,0,$ivp
+ vperm $ivec,$ivec,$ivec,$outperm
+ vsel $inout,$outhead,$ivec,$outmask
+ lvx $inptail,$idx,$ivp
+ stvx $inout,0,$ivp
+ vsel $inout,$ivec,$inptail,$outmask
+ stvx $inout,$idx,$ivp
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,6,0
+ .long 0
+___
+#########################################################################
+{{ # Optimized CBC decrypt procedure #
+my $key_="r11";
+my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
+ $x00=0 if ($flavour =~ /osx/);
+my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
+my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
+my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
+ # v26-v31 last 6 round keys
+my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
+
+$code.=<<___;
+.align 5
+_aesp8_cbc_decrypt8x:
+ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
+ li r10,`$FRAME+8*16+15`
+ li r11,`$FRAME+8*16+31`
+ stvx v20,r10,$sp # ABI says so
+ addi r10,r10,32
+ stvx v21,r11,$sp
+ addi r11,r11,32
+ stvx v22,r10,$sp
+ addi r10,r10,32
+ stvx v23,r11,$sp
+ addi r11,r11,32
+ stvx v24,r10,$sp
+ addi r10,r10,32
+ stvx v25,r11,$sp
+ addi r11,r11,32
+ stvx v26,r10,$sp
+ addi r10,r10,32
+ stvx v27,r11,$sp
+ addi r11,r11,32
+ stvx v28,r10,$sp
+ addi r10,r10,32
+ stvx v29,r11,$sp
+ addi r11,r11,32
+ stvx v30,r10,$sp
+ stvx v31,r11,$sp
+ li r0,-1
+ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
+ li $x10,0x10
+ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+ li $x20,0x20
+ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+ li $x30,0x30
+ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+ li $x40,0x40
+ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+ li $x50,0x50
+ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+ li $x60,0x60
+ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+ li $x70,0x70
+ mtspr 256,r0
+
+ subi $rounds,$rounds,3 # -4 in total
+ subi $len,$len,128 # bias
+
+ lvx $rndkey0,$x00,$key # load key schedule
+ lvx v30,$x10,$key
+ addi $key,$key,0x20
+ lvx v31,$x00,$key
+ ?vperm $rndkey0,$rndkey0,v30,$keyperm
+ addi $key_,$sp,$FRAME+15
+ mtctr $rounds
+
+Load_cbc_dec_key:
+ ?vperm v24,v30,v31,$keyperm
+ lvx v30,$x10,$key
+ addi $key,$key,0x20
+ stvx v24,$x00,$key_ # off-load round[1]
+ ?vperm v25,v31,v30,$keyperm
+ lvx v31,$x00,$key
+ stvx v25,$x10,$key_ # off-load round[2]
+ addi $key_,$key_,0x20
+ bdnz Load_cbc_dec_key
+
+ lvx v26,$x10,$key
+ ?vperm v24,v30,v31,$keyperm
+ lvx v27,$x20,$key
+ stvx v24,$x00,$key_ # off-load round[3]
+ ?vperm v25,v31,v26,$keyperm
+ lvx v28,$x30,$key
+ stvx v25,$x10,$key_ # off-load round[4]
+ addi $key_,$sp,$FRAME+15 # rewind $key_
+ ?vperm v26,v26,v27,$keyperm
+ lvx v29,$x40,$key
+ ?vperm v27,v27,v28,$keyperm
+ lvx v30,$x50,$key
+ ?vperm v28,v28,v29,$keyperm
+ lvx v31,$x60,$key
+ ?vperm v29,v29,v30,$keyperm
+ lvx $out0,$x70,$key # borrow $out0
+ ?vperm v30,v30,v31,$keyperm
+ lvx v24,$x00,$key_ # pre-load round[1]
+ ?vperm v31,v31,$out0,$keyperm
+ lvx v25,$x10,$key_ # pre-load round[2]
+
+ #lvx $inptail,0,$inp # "caller" already did this
+ #addi $inp,$inp,15 # 15 is not typo
+ subi $inp,$inp,15 # undo "caller"
+
+ le?li $idx,8
+ lvx_u $in0,$x00,$inp # load first 8 "words"
+ le?lvsl $inpperm,0,$idx
+ le?vspltisb $tmp,0x0f
+ lvx_u $in1,$x10,$inp
+ le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
+ lvx_u $in2,$x20,$inp
+ le?vperm $in0,$in0,$in0,$inpperm
+ lvx_u $in3,$x30,$inp
+ le?vperm $in1,$in1,$in1,$inpperm
+ lvx_u $in4,$x40,$inp
+ le?vperm $in2,$in2,$in2,$inpperm
+ vxor $out0,$in0,$rndkey0
+ lvx_u $in5,$x50,$inp
+ le?vperm $in3,$in3,$in3,$inpperm
+ vxor $out1,$in1,$rndkey0
+ lvx_u $in6,$x60,$inp
+ le?vperm $in4,$in4,$in4,$inpperm
+ vxor $out2,$in2,$rndkey0
+ lvx_u $in7,$x70,$inp
+ addi $inp,$inp,0x80
+ le?vperm $in5,$in5,$in5,$inpperm
+ vxor $out3,$in3,$rndkey0
+ le?vperm $in6,$in6,$in6,$inpperm
+ vxor $out4,$in4,$rndkey0
+ le?vperm $in7,$in7,$in7,$inpperm
+ vxor $out5,$in5,$rndkey0
+ vxor $out6,$in6,$rndkey0
+ vxor $out7,$in7,$rndkey0
+
+ mtctr $rounds
+ b Loop_cbc_dec8x
+.align 5
+Loop_cbc_dec8x:
+ vncipher $out0,$out0,v24
+ vncipher $out1,$out1,v24
+ vncipher $out2,$out2,v24
+ vncipher $out3,$out3,v24
+ vncipher $out4,$out4,v24
+ vncipher $out5,$out5,v24
+ vncipher $out6,$out6,v24
+ vncipher $out7,$out7,v24
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vncipher $out0,$out0,v25
+ vncipher $out1,$out1,v25
+ vncipher $out2,$out2,v25
+ vncipher $out3,$out3,v25
+ vncipher $out4,$out4,v25
+ vncipher $out5,$out5,v25
+ vncipher $out6,$out6,v25
+ vncipher $out7,$out7,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz Loop_cbc_dec8x
+
+ subic $len,$len,128 # $len-=128
+ vncipher $out0,$out0,v24
+ vncipher $out1,$out1,v24
+ vncipher $out2,$out2,v24
+ vncipher $out3,$out3,v24
+ vncipher $out4,$out4,v24
+ vncipher $out5,$out5,v24
+ vncipher $out6,$out6,v24
+ vncipher $out7,$out7,v24
+
+ subfe. r0,r0,r0 # borrow?-1:0
+ vncipher $out0,$out0,v25
+ vncipher $out1,$out1,v25
+ vncipher $out2,$out2,v25
+ vncipher $out3,$out3,v25
+ vncipher $out4,$out4,v25
+ vncipher $out5,$out5,v25
+ vncipher $out6,$out6,v25
+ vncipher $out7,$out7,v25
+
+ and r0,r0,$len
+ vncipher $out0,$out0,v26
+ vncipher $out1,$out1,v26
+ vncipher $out2,$out2,v26
+ vncipher $out3,$out3,v26
+ vncipher $out4,$out4,v26
+ vncipher $out5,$out5,v26
+ vncipher $out6,$out6,v26
+ vncipher $out7,$out7,v26
+
+ add $inp,$inp,r0 # $inp is adjusted in such
+ # way that at exit from the
+ # loop inX-in7 are loaded
+ # with last "words"
+ vncipher $out0,$out0,v27
+ vncipher $out1,$out1,v27
+ vncipher $out2,$out2,v27
+ vncipher $out3,$out3,v27
+ vncipher $out4,$out4,v27
+ vncipher $out5,$out5,v27
+ vncipher $out6,$out6,v27
+ vncipher $out7,$out7,v27
+
+ addi $key_,$sp,$FRAME+15 # rewind $key_
+ vncipher $out0,$out0,v28
+ vncipher $out1,$out1,v28
+ vncipher $out2,$out2,v28
+ vncipher $out3,$out3,v28
+ vncipher $out4,$out4,v28
+ vncipher $out5,$out5,v28
+ vncipher $out6,$out6,v28
+ vncipher $out7,$out7,v28
+ lvx v24,$x00,$key_ # re-pre-load round[1]
+
+ vncipher $out0,$out0,v29
+ vncipher $out1,$out1,v29
+ vncipher $out2,$out2,v29
+ vncipher $out3,$out3,v29
+ vncipher $out4,$out4,v29
+ vncipher $out5,$out5,v29
+ vncipher $out6,$out6,v29
+ vncipher $out7,$out7,v29
+ lvx v25,$x10,$key_ # re-pre-load round[2]
+
+ vncipher $out0,$out0,v30
+ vxor $ivec,$ivec,v31 # xor with last round key
+ vncipher $out1,$out1,v30
+ vxor $in0,$in0,v31
+ vncipher $out2,$out2,v30
+ vxor $in1,$in1,v31
+ vncipher $out3,$out3,v30
+ vxor $in2,$in2,v31
+ vncipher $out4,$out4,v30
+ vxor $in3,$in3,v31
+ vncipher $out5,$out5,v30
+ vxor $in4,$in4,v31
+ vncipher $out6,$out6,v30
+ vxor $in5,$in5,v31
+ vncipher $out7,$out7,v30
+ vxor $in6,$in6,v31
+
+ vncipherlast $out0,$out0,$ivec
+ vncipherlast $out1,$out1,$in0
+ lvx_u $in0,$x00,$inp # load next input block
+ vncipherlast $out2,$out2,$in1
+ lvx_u $in1,$x10,$inp
+ vncipherlast $out3,$out3,$in2
+ le?vperm $in0,$in0,$in0,$inpperm
+ lvx_u $in2,$x20,$inp
+ vncipherlast $out4,$out4,$in3
+ le?vperm $in1,$in1,$in1,$inpperm
+ lvx_u $in3,$x30,$inp
+ vncipherlast $out5,$out5,$in4
+ le?vperm $in2,$in2,$in2,$inpperm
+ lvx_u $in4,$x40,$inp
+ vncipherlast $out6,$out6,$in5
+ le?vperm $in3,$in3,$in3,$inpperm
+ lvx_u $in5,$x50,$inp
+ vncipherlast $out7,$out7,$in6
+ le?vperm $in4,$in4,$in4,$inpperm
+ lvx_u $in6,$x60,$inp
+ vmr $ivec,$in7
+ le?vperm $in5,$in5,$in5,$inpperm
+ lvx_u $in7,$x70,$inp
+ addi $inp,$inp,0x80
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ le?vperm $in6,$in6,$in6,$inpperm
+ vxor $out0,$in0,$rndkey0
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x10,$out
+ le?vperm $in7,$in7,$in7,$inpperm
+ vxor $out1,$in1,$rndkey0
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x20,$out
+ vxor $out2,$in2,$rndkey0
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x30,$out
+ vxor $out3,$in3,$rndkey0
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x40,$out
+ vxor $out4,$in4,$rndkey0
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x50,$out
+ vxor $out5,$in5,$rndkey0
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x60,$out
+ vxor $out6,$in6,$rndkey0
+ stvx_u $out7,$x70,$out
+ addi $out,$out,0x80
+ vxor $out7,$in7,$rndkey0
+
+ mtctr $rounds
+ beq Loop_cbc_dec8x # did $len-=128 borrow?
+
+ addic. $len,$len,128
+ beq Lcbc_dec8x_done
+ nop
+ nop
+
+Loop_cbc_dec8x_tail: # up to 7 "words" tail...
+ vncipher $out1,$out1,v24
+ vncipher $out2,$out2,v24
+ vncipher $out3,$out3,v24
+ vncipher $out4,$out4,v24
+ vncipher $out5,$out5,v24
+ vncipher $out6,$out6,v24
+ vncipher $out7,$out7,v24
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vncipher $out1,$out1,v25
+ vncipher $out2,$out2,v25
+ vncipher $out3,$out3,v25
+ vncipher $out4,$out4,v25
+ vncipher $out5,$out5,v25
+ vncipher $out6,$out6,v25
+ vncipher $out7,$out7,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz Loop_cbc_dec8x_tail
+
+ vncipher $out1,$out1,v24
+ vncipher $out2,$out2,v24
+ vncipher $out3,$out3,v24
+ vncipher $out4,$out4,v24
+ vncipher $out5,$out5,v24
+ vncipher $out6,$out6,v24
+ vncipher $out7,$out7,v24
+
+ vncipher $out1,$out1,v25
+ vncipher $out2,$out2,v25
+ vncipher $out3,$out3,v25
+ vncipher $out4,$out4,v25
+ vncipher $out5,$out5,v25
+ vncipher $out6,$out6,v25
+ vncipher $out7,$out7,v25
+
+ vncipher $out1,$out1,v26
+ vncipher $out2,$out2,v26
+ vncipher $out3,$out3,v26
+ vncipher $out4,$out4,v26
+ vncipher $out5,$out5,v26
+ vncipher $out6,$out6,v26
+ vncipher $out7,$out7,v26
+
+ vncipher $out1,$out1,v27
+ vncipher $out2,$out2,v27
+ vncipher $out3,$out3,v27
+ vncipher $out4,$out4,v27
+ vncipher $out5,$out5,v27
+ vncipher $out6,$out6,v27
+ vncipher $out7,$out7,v27
+
+ vncipher $out1,$out1,v28
+ vncipher $out2,$out2,v28
+ vncipher $out3,$out3,v28
+ vncipher $out4,$out4,v28
+ vncipher $out5,$out5,v28
+ vncipher $out6,$out6,v28
+ vncipher $out7,$out7,v28
+
+ vncipher $out1,$out1,v29
+ vncipher $out2,$out2,v29
+ vncipher $out3,$out3,v29
+ vncipher $out4,$out4,v29
+ vncipher $out5,$out5,v29
+ vncipher $out6,$out6,v29
+ vncipher $out7,$out7,v29
+
+ vncipher $out1,$out1,v30
+ vxor $ivec,$ivec,v31 # last round key
+ vncipher $out2,$out2,v30
+ vxor $in1,$in1,v31
+ vncipher $out3,$out3,v30
+ vxor $in2,$in2,v31
+ vncipher $out4,$out4,v30
+ vxor $in3,$in3,v31
+ vncipher $out5,$out5,v30
+ vxor $in4,$in4,v31
+ vncipher $out6,$out6,v30
+ vxor $in5,$in5,v31
+ vncipher $out7,$out7,v30
+ vxor $in6,$in6,v31
+
+ cmplwi $len,32 # switch($len)
+ blt Lcbc_dec8x_one
+ nop
+ beq Lcbc_dec8x_two
+ cmplwi $len,64
+ blt Lcbc_dec8x_three
+ nop
+ beq Lcbc_dec8x_four
+ cmplwi $len,96
+ blt Lcbc_dec8x_five
+ nop
+ beq Lcbc_dec8x_six
+
+Lcbc_dec8x_seven:
+ vncipherlast $out1,$out1,$ivec
+ vncipherlast $out2,$out2,$in1
+ vncipherlast $out3,$out3,$in2
+ vncipherlast $out4,$out4,$in3
+ vncipherlast $out5,$out5,$in4
+ vncipherlast $out6,$out6,$in5
+ vncipherlast $out7,$out7,$in6
+ vmr $ivec,$in7
+
+ le?vperm $out1,$out1,$out1,$inpperm
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x00,$out
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x10,$out
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x20,$out
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x30,$out
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x40,$out
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x50,$out
+ stvx_u $out7,$x60,$out
+ addi $out,$out,0x70
+ b Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_six:
+ vncipherlast $out2,$out2,$ivec
+ vncipherlast $out3,$out3,$in2
+ vncipherlast $out4,$out4,$in3
+ vncipherlast $out5,$out5,$in4
+ vncipherlast $out6,$out6,$in5
+ vncipherlast $out7,$out7,$in6
+ vmr $ivec,$in7
+
+ le?vperm $out2,$out2,$out2,$inpperm
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x00,$out
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x10,$out
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x20,$out
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x30,$out
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x40,$out
+ stvx_u $out7,$x50,$out
+ addi $out,$out,0x60
+ b Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_five:
+ vncipherlast $out3,$out3,$ivec
+ vncipherlast $out4,$out4,$in3
+ vncipherlast $out5,$out5,$in4
+ vncipherlast $out6,$out6,$in5
+ vncipherlast $out7,$out7,$in6
+ vmr $ivec,$in7
+
+ le?vperm $out3,$out3,$out3,$inpperm
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x00,$out
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x10,$out
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x20,$out
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x30,$out
+ stvx_u $out7,$x40,$out
+ addi $out,$out,0x50
+ b Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_four:
+ vncipherlast $out4,$out4,$ivec
+ vncipherlast $out5,$out5,$in4
+ vncipherlast $out6,$out6,$in5
+ vncipherlast $out7,$out7,$in6
+ vmr $ivec,$in7
+
+ le?vperm $out4,$out4,$out4,$inpperm
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x00,$out
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x10,$out
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x20,$out
+ stvx_u $out7,$x30,$out
+ addi $out,$out,0x40
+ b Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_three:
+ vncipherlast $out5,$out5,$ivec
+ vncipherlast $out6,$out6,$in5
+ vncipherlast $out7,$out7,$in6
+ vmr $ivec,$in7
+
+ le?vperm $out5,$out5,$out5,$inpperm
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x00,$out
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x10,$out
+ stvx_u $out7,$x20,$out
+ addi $out,$out,0x30
+ b Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_two:
+ vncipherlast $out6,$out6,$ivec
+ vncipherlast $out7,$out7,$in6
+ vmr $ivec,$in7
+
+ le?vperm $out6,$out6,$out6,$inpperm
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x00,$out
+ stvx_u $out7,$x10,$out
+ addi $out,$out,0x20
+ b Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_one:
+ vncipherlast $out7,$out7,$ivec
+ vmr $ivec,$in7
+
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out7,0,$out
+ addi $out,$out,0x10
+
+Lcbc_dec8x_done:
+ le?vperm $ivec,$ivec,$ivec,$inpperm
+ stvx_u $ivec,0,$ivp # write [unaligned] iv
+
+ li r10,`$FRAME+15`
+ li r11,`$FRAME+31`
+ stvx $inpperm,r10,$sp # wipe copies of round keys
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+ stvx $inpperm,r10,$sp
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+ stvx $inpperm,r10,$sp
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+ stvx $inpperm,r10,$sp
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+
+ mtspr 256,$vrsave
+ lvx v20,r10,$sp # ABI says so
+ addi r10,r10,32
+ lvx v21,r11,$sp
+ addi r11,r11,32
+ lvx v22,r10,$sp
+ addi r10,r10,32
+ lvx v23,r11,$sp
+ addi r11,r11,32
+ lvx v24,r10,$sp
+ addi r10,r10,32
+ lvx v25,r11,$sp
+ addi r11,r11,32
+ lvx v26,r10,$sp
+ addi r10,r10,32
+ lvx v27,r11,$sp
+ addi r11,r11,32
+ lvx v28,r10,$sp
+ addi r10,r10,32
+ lvx v29,r11,$sp
+ addi r11,r11,32
+ lvx v30,r10,$sp
+ lvx v31,r11,$sp
+ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
+ blr
+ .long 0
+ .byte 0,12,0x04,0,0x80,6,6,0
+ .long 0
+.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
+___
+}} }}}
+
+#########################################################################
+{{{ # CTR procedure[s] #
+my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
+my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
+my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
+ map("v$_",(4..11));
+my $dat=$tmp;
+
+$code.=<<___;
+.globl .${prefix}_ctr32_encrypt_blocks
+.align 5
+.${prefix}_ctr32_encrypt_blocks:
+ ${UCMP}i $len,1
+ bltlr-
+
+ lis r0,0xfff0
+ mfspr $vrsave,256
+ mtspr 256,r0
+
+ li $idx,15
+ vxor $rndkey0,$rndkey0,$rndkey0
+ le?vspltisb $tmp,0x0f
+
+ lvx $ivec,0,$ivp # load [unaligned] iv
+ lvsl $inpperm,0,$ivp
+ lvx $inptail,$idx,$ivp
+ vspltisb $one,1
+ le?vxor $inpperm,$inpperm,$tmp
+ vperm $ivec,$ivec,$inptail,$inpperm
+ vsldoi $one,$rndkey0,$one,1
+
+ neg r11,$inp
+ ?lvsl $keyperm,0,$key # prepare for unaligned key
+ lwz $rounds,240($key)
+
+ lvsr $inpperm,0,r11 # prepare for unaligned load
+ lvx $inptail,0,$inp
+ addi $inp,$inp,15 # 15 is not typo
+ le?vxor $inpperm,$inpperm,$tmp
+
+ srwi $rounds,$rounds,1
+ li $idx,16
+ subi $rounds,$rounds,1
+
+ ${UCMP}i $len,8
+ bge _aesp8_ctr32_encrypt8x
+
+ ?lvsr $outperm,0,$out # prepare for unaligned store
+ vspltisb $outmask,-1
+ lvx $outhead,0,$out
+ ?vperm $outmask,$rndkey0,$outmask,$outperm
+ le?vxor $outperm,$outperm,$tmp
+
+ lvx $rndkey0,0,$key
+ mtctr $rounds
+ lvx $rndkey1,$idx,$key
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $inout,$ivec,$rndkey0
+ lvx $rndkey0,$idx,$key
+ addi $idx,$idx,16
+ b Loop_ctr32_enc
+
+.align 5
+Loop_ctr32_enc:
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vcipher $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key
+ addi $idx,$idx,16
+ bdnz Loop_ctr32_enc
+
+ vadduwm $ivec,$ivec,$one
+ vmr $dat,$inptail
+ lvx $inptail,0,$inp
+ addi $inp,$inp,16
+ subic. $len,$len,1 # blocks--
+
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key
+ vperm $dat,$dat,$inptail,$inpperm
+ li $idx,16
+ ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
+ lvx $rndkey0,0,$key
+ vxor $dat,$dat,$rndkey1 # last round key
+ vcipherlast $inout,$inout,$dat
+
+ lvx $rndkey1,$idx,$key
+ addi $idx,$idx,16
+ vperm $inout,$inout,$inout,$outperm
+ vsel $dat,$outhead,$inout,$outmask
+ mtctr $rounds
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vmr $outhead,$inout
+ vxor $inout,$ivec,$rndkey0
+ lvx $rndkey0,$idx,$key
+ addi $idx,$idx,16
+ stvx $dat,0,$out
+ addi $out,$out,16
+ bne Loop_ctr32_enc
+
+ addi $out,$out,-1
+ lvx $inout,0,$out # redundant in aligned case
+ vsel $inout,$outhead,$inout,$outmask
+ stvx $inout,0,$out
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,6,0
+ .long 0
+___
+#########################################################################
+{{ # Optimized CTR procedure #
+my $key_="r11";
+my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
+ $x00=0 if ($flavour =~ /osx/);
+my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
+my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
+my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
+ # v26-v31 last 6 round keys
+my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
+my ($two,$three,$four)=($outhead,$outperm,$outmask);
+
+$code.=<<___;
+.align 5
+_aesp8_ctr32_encrypt8x:
+ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
+ li r10,`$FRAME+8*16+15`
+ li r11,`$FRAME+8*16+31`
+ stvx v20,r10,$sp # ABI says so
+ addi r10,r10,32
+ stvx v21,r11,$sp
+ addi r11,r11,32
+ stvx v22,r10,$sp
+ addi r10,r10,32
+ stvx v23,r11,$sp
+ addi r11,r11,32
+ stvx v24,r10,$sp
+ addi r10,r10,32
+ stvx v25,r11,$sp
+ addi r11,r11,32
+ stvx v26,r10,$sp
+ addi r10,r10,32
+ stvx v27,r11,$sp
+ addi r11,r11,32
+ stvx v28,r10,$sp
+ addi r10,r10,32
+ stvx v29,r11,$sp
+ addi r11,r11,32
+ stvx v30,r10,$sp
+ stvx v31,r11,$sp
+ li r0,-1
+ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
+ li $x10,0x10
+ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+ li $x20,0x20
+ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+ li $x30,0x30
+ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+ li $x40,0x40
+ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+ li $x50,0x50
+ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+ li $x60,0x60
+ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+ li $x70,0x70
+ mtspr 256,r0
+
+ subi $rounds,$rounds,3 # -4 in total
+
+ lvx $rndkey0,$x00,$key # load key schedule
+ lvx v30,$x10,$key
+ addi $key,$key,0x20
+ lvx v31,$x00,$key
+ ?vperm $rndkey0,$rndkey0,v30,$keyperm
+ addi $key_,$sp,$FRAME+15
+ mtctr $rounds
+
+Load_ctr32_enc_key:
+ ?vperm v24,v30,v31,$keyperm
+ lvx v30,$x10,$key
+ addi $key,$key,0x20
+ stvx v24,$x00,$key_ # off-load round[1]
+ ?vperm v25,v31,v30,$keyperm
+ lvx v31,$x00,$key
+ stvx v25,$x10,$key_ # off-load round[2]
+ addi $key_,$key_,0x20
+ bdnz Load_ctr32_enc_key
+
+ lvx v26,$x10,$key
+ ?vperm v24,v30,v31,$keyperm
+ lvx v27,$x20,$key
+ stvx v24,$x00,$key_ # off-load round[3]
+ ?vperm v25,v31,v26,$keyperm
+ lvx v28,$x30,$key
+ stvx v25,$x10,$key_ # off-load round[4]
+ addi $key_,$sp,$FRAME+15 # rewind $key_
+ ?vperm v26,v26,v27,$keyperm
+ lvx v29,$x40,$key
+ ?vperm v27,v27,v28,$keyperm
+ lvx v30,$x50,$key
+ ?vperm v28,v28,v29,$keyperm
+ lvx v31,$x60,$key
+ ?vperm v29,v29,v30,$keyperm
+ lvx $out0,$x70,$key # borrow $out0
+ ?vperm v30,v30,v31,$keyperm
+ lvx v24,$x00,$key_ # pre-load round[1]
+ ?vperm v31,v31,$out0,$keyperm
+ lvx v25,$x10,$key_ # pre-load round[2]
+
+ vadduwm $two,$one,$one
+ subi $inp,$inp,15 # undo "caller"
+ $SHL $len,$len,4
+
+ vadduwm $out1,$ivec,$one # counter values ...
+ vadduwm $out2,$ivec,$two
+ vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
+ le?li $idx,8
+ vadduwm $out3,$out1,$two
+ vxor $out1,$out1,$rndkey0
+ le?lvsl $inpperm,0,$idx
+ vadduwm $out4,$out2,$two
+ vxor $out2,$out2,$rndkey0
+ le?vspltisb $tmp,0x0f
+ vadduwm $out5,$out3,$two
+ vxor $out3,$out3,$rndkey0
+ le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
+ vadduwm $out6,$out4,$two
+ vxor $out4,$out4,$rndkey0
+ vadduwm $out7,$out5,$two
+ vxor $out5,$out5,$rndkey0
+ vadduwm $ivec,$out6,$two # next counter value
+ vxor $out6,$out6,$rndkey0
+ vxor $out7,$out7,$rndkey0
+
+ mtctr $rounds
+ b Loop_ctr32_enc8x
+.align 5
+Loop_ctr32_enc8x:
+ vcipher $out0,$out0,v24
+ vcipher $out1,$out1,v24
+ vcipher $out2,$out2,v24
+ vcipher $out3,$out3,v24
+ vcipher $out4,$out4,v24
+ vcipher $out5,$out5,v24
+ vcipher $out6,$out6,v24
+ vcipher $out7,$out7,v24
+Loop_ctr32_enc8x_middle:
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vcipher $out0,$out0,v25
+ vcipher $out1,$out1,v25
+ vcipher $out2,$out2,v25
+ vcipher $out3,$out3,v25
+ vcipher $out4,$out4,v25
+ vcipher $out5,$out5,v25
+ vcipher $out6,$out6,v25
+ vcipher $out7,$out7,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz Loop_ctr32_enc8x
+
+ subic r11,$len,256 # $len-256, borrow $key_
+ vcipher $out0,$out0,v24
+ vcipher $out1,$out1,v24
+ vcipher $out2,$out2,v24
+ vcipher $out3,$out3,v24
+ vcipher $out4,$out4,v24
+ vcipher $out5,$out5,v24
+ vcipher $out6,$out6,v24
+ vcipher $out7,$out7,v24
+
+ subfe r0,r0,r0 # borrow?-1:0
+ vcipher $out0,$out0,v25
+ vcipher $out1,$out1,v25
+ vcipher $out2,$out2,v25
+ vcipher $out3,$out3,v25
+ vcipher $out4,$out4,v25
+ vcipher $out5,$out5,v25
+ vcipher $out6,$out6,v25
+ vcipher $out7,$out7,v25
+
+ and r0,r0,r11
+ addi $key_,$sp,$FRAME+15 # rewind $key_
+ vcipher $out0,$out0,v26
+ vcipher $out1,$out1,v26
+ vcipher $out2,$out2,v26
+ vcipher $out3,$out3,v26
+ vcipher $out4,$out4,v26
+ vcipher $out5,$out5,v26
+ vcipher $out6,$out6,v26
+ vcipher $out7,$out7,v26
+ lvx v24,$x00,$key_ # re-pre-load round[1]
+
+ subic $len,$len,129 # $len-=129
+ vcipher $out0,$out0,v27
+ addi $len,$len,1 # $len-=128 really
+ vcipher $out1,$out1,v27
+ vcipher $out2,$out2,v27
+ vcipher $out3,$out3,v27
+ vcipher $out4,$out4,v27
+ vcipher $out5,$out5,v27
+ vcipher $out6,$out6,v27
+ vcipher $out7,$out7,v27
+ lvx v25,$x10,$key_ # re-pre-load round[2]
+
+ vcipher $out0,$out0,v28
+ lvx_u $in0,$x00,$inp # load input
+ vcipher $out1,$out1,v28
+ lvx_u $in1,$x10,$inp
+ vcipher $out2,$out2,v28
+ lvx_u $in2,$x20,$inp
+ vcipher $out3,$out3,v28
+ lvx_u $in3,$x30,$inp
+ vcipher $out4,$out4,v28
+ lvx_u $in4,$x40,$inp
+ vcipher $out5,$out5,v28
+ lvx_u $in5,$x50,$inp
+ vcipher $out6,$out6,v28
+ lvx_u $in6,$x60,$inp
+ vcipher $out7,$out7,v28
+ lvx_u $in7,$x70,$inp
+ addi $inp,$inp,0x80
+
+ vcipher $out0,$out0,v29
+ le?vperm $in0,$in0,$in0,$inpperm
+ vcipher $out1,$out1,v29
+ le?vperm $in1,$in1,$in1,$inpperm
+ vcipher $out2,$out2,v29
+ le?vperm $in2,$in2,$in2,$inpperm
+ vcipher $out3,$out3,v29
+ le?vperm $in3,$in3,$in3,$inpperm
+ vcipher $out4,$out4,v29
+ le?vperm $in4,$in4,$in4,$inpperm
+ vcipher $out5,$out5,v29
+ le?vperm $in5,$in5,$in5,$inpperm
+ vcipher $out6,$out6,v29
+ le?vperm $in6,$in6,$in6,$inpperm
+ vcipher $out7,$out7,v29
+ le?vperm $in7,$in7,$in7,$inpperm
+
+ add $inp,$inp,r0 # $inp is adjusted in such
+ # way that at exit from the
+ # loop inX-in7 are loaded
+ # with last "words"
+ subfe. r0,r0,r0 # borrow?-1:0
+ vcipher $out0,$out0,v30
+ vxor $in0,$in0,v31 # xor with last round key
+ vcipher $out1,$out1,v30
+ vxor $in1,$in1,v31
+ vcipher $out2,$out2,v30
+ vxor $in2,$in2,v31
+ vcipher $out3,$out3,v30
+ vxor $in3,$in3,v31
+ vcipher $out4,$out4,v30
+ vxor $in4,$in4,v31
+ vcipher $out5,$out5,v30
+ vxor $in5,$in5,v31
+ vcipher $out6,$out6,v30
+ vxor $in6,$in6,v31
+ vcipher $out7,$out7,v30
+ vxor $in7,$in7,v31
+
+ bne Lctr32_enc8x_break # did $len-129 borrow?
+
+ vcipherlast $in0,$out0,$in0
+ vcipherlast $in1,$out1,$in1
+ vadduwm $out1,$ivec,$one # counter values ...
+ vcipherlast $in2,$out2,$in2
+ vadduwm $out2,$ivec,$two
+ vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
+ vcipherlast $in3,$out3,$in3
+ vadduwm $out3,$out1,$two
+ vxor $out1,$out1,$rndkey0
+ vcipherlast $in4,$out4,$in4
+ vadduwm $out4,$out2,$two
+ vxor $out2,$out2,$rndkey0
+ vcipherlast $in5,$out5,$in5
+ vadduwm $out5,$out3,$two
+ vxor $out3,$out3,$rndkey0
+ vcipherlast $in6,$out6,$in6
+ vadduwm $out6,$out4,$two
+ vxor $out4,$out4,$rndkey0
+ vcipherlast $in7,$out7,$in7
+ vadduwm $out7,$out5,$two
+ vxor $out5,$out5,$rndkey0
+ le?vperm $in0,$in0,$in0,$inpperm
+ vadduwm $ivec,$out6,$two # next counter value
+ vxor $out6,$out6,$rndkey0
+ le?vperm $in1,$in1,$in1,$inpperm
+ vxor $out7,$out7,$rndkey0
+ mtctr $rounds
+
+ vcipher $out0,$out0,v24
+ stvx_u $in0,$x00,$out
+ le?vperm $in2,$in2,$in2,$inpperm
+ vcipher $out1,$out1,v24
+ stvx_u $in1,$x10,$out
+ le?vperm $in3,$in3,$in3,$inpperm
+ vcipher $out2,$out2,v24
+ stvx_u $in2,$x20,$out
+ le?vperm $in4,$in4,$in4,$inpperm
+ vcipher $out3,$out3,v24
+ stvx_u $in3,$x30,$out
+ le?vperm $in5,$in5,$in5,$inpperm
+ vcipher $out4,$out4,v24
+ stvx_u $in4,$x40,$out
+ le?vperm $in6,$in6,$in6,$inpperm
+ vcipher $out5,$out5,v24
+ stvx_u $in5,$x50,$out
+ le?vperm $in7,$in7,$in7,$inpperm
+ vcipher $out6,$out6,v24
+ stvx_u $in6,$x60,$out
+ vcipher $out7,$out7,v24
+ stvx_u $in7,$x70,$out
+ addi $out,$out,0x80
+
+ b Loop_ctr32_enc8x_middle
+
+.align 5
+Lctr32_enc8x_break:
+ cmpwi $len,-0x60
+ blt Lctr32_enc8x_one
+ nop
+ beq Lctr32_enc8x_two
+ cmpwi $len,-0x40
+ blt Lctr32_enc8x_three
+ nop
+ beq Lctr32_enc8x_four
+ cmpwi $len,-0x20
+ blt Lctr32_enc8x_five
+ nop
+ beq Lctr32_enc8x_six
+ cmpwi $len,0x00
+ blt Lctr32_enc8x_seven
+
+Lctr32_enc8x_eight:
+ vcipherlast $out0,$out0,$in0
+ vcipherlast $out1,$out1,$in1
+ vcipherlast $out2,$out2,$in2
+ vcipherlast $out3,$out3,$in3
+ vcipherlast $out4,$out4,$in4
+ vcipherlast $out5,$out5,$in5
+ vcipherlast $out6,$out6,$in6
+ vcipherlast $out7,$out7,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x10,$out
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x20,$out
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x30,$out
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x40,$out
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x50,$out
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x60,$out
+ stvx_u $out7,$x70,$out
+ addi $out,$out,0x80
+ b Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_seven:
+ vcipherlast $out0,$out0,$in1
+ vcipherlast $out1,$out1,$in2
+ vcipherlast $out2,$out2,$in3
+ vcipherlast $out3,$out3,$in4
+ vcipherlast $out4,$out4,$in5
+ vcipherlast $out5,$out5,$in6
+ vcipherlast $out6,$out6,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x10,$out
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x20,$out
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x30,$out
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x40,$out
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x50,$out
+ stvx_u $out6,$x60,$out
+ addi $out,$out,0x70
+ b Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_six:
+ vcipherlast $out0,$out0,$in2
+ vcipherlast $out1,$out1,$in3
+ vcipherlast $out2,$out2,$in4
+ vcipherlast $out3,$out3,$in5
+ vcipherlast $out4,$out4,$in6
+ vcipherlast $out5,$out5,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x10,$out
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x20,$out
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x30,$out
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x40,$out
+ stvx_u $out5,$x50,$out
+ addi $out,$out,0x60
+ b Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_five:
+ vcipherlast $out0,$out0,$in3
+ vcipherlast $out1,$out1,$in4
+ vcipherlast $out2,$out2,$in5
+ vcipherlast $out3,$out3,$in6
+ vcipherlast $out4,$out4,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x10,$out
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x20,$out
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x30,$out
+ stvx_u $out4,$x40,$out
+ addi $out,$out,0x50
+ b Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_four:
+ vcipherlast $out0,$out0,$in4
+ vcipherlast $out1,$out1,$in5
+ vcipherlast $out2,$out2,$in6
+ vcipherlast $out3,$out3,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x10,$out
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x20,$out
+ stvx_u $out3,$x30,$out
+ addi $out,$out,0x40
+ b Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_three:
+ vcipherlast $out0,$out0,$in5
+ vcipherlast $out1,$out1,$in6
+ vcipherlast $out2,$out2,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x10,$out
+ stvx_u $out2,$x20,$out
+ addi $out,$out,0x30
+ b Lcbc_dec8x_done
+
+.align 5
+Lctr32_enc8x_two:
+ vcipherlast $out0,$out0,$in6
+ vcipherlast $out1,$out1,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ stvx_u $out1,$x10,$out
+ addi $out,$out,0x20
+ b Lcbc_dec8x_done
+
+.align 5
+Lctr32_enc8x_one:
+ vcipherlast $out0,$out0,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ stvx_u $out0,0,$out
+ addi $out,$out,0x10
+
+Lctr32_enc8x_done:
+ li r10,`$FRAME+15`
+ li r11,`$FRAME+31`
+ stvx $inpperm,r10,$sp # wipe copies of round keys
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+ stvx $inpperm,r10,$sp
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+ stvx $inpperm,r10,$sp
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+ stvx $inpperm,r10,$sp
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+
+ mtspr 256,$vrsave
+ lvx v20,r10,$sp # ABI says so
+ addi r10,r10,32
+ lvx v21,r11,$sp
+ addi r11,r11,32
+ lvx v22,r10,$sp
+ addi r10,r10,32
+ lvx v23,r11,$sp
+ addi r11,r11,32
+ lvx v24,r10,$sp
+ addi r10,r10,32
+ lvx v25,r11,$sp
+ addi r11,r11,32
+ lvx v26,r10,$sp
+ addi r10,r10,32
+ lvx v27,r11,$sp
+ addi r11,r11,32
+ lvx v28,r10,$sp
+ addi r10,r10,32
+ lvx v29,r11,$sp
+ addi r11,r11,32
+ lvx v30,r10,$sp
+ lvx v31,r11,$sp
+ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
+ blr
+ .long 0
+ .byte 0,12,0x04,0,0x80,6,6,0
+ .long 0
+.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
+___
+}} }}}
+
+#########################################################################
+{{{ # XTS procedures #
+# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
+# const AES_KEY *key1, const AES_KEY *key2, #
+# [const] unsigned char iv[16]); #
+# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
+# input tweak value is assumed to be encrypted already, and last tweak #
+# value, one suitable for consecutive call on same chunk of data, is #
+# written back to original buffer. In addition, in "tweak chaining" #
+# mode only complete input blocks are processed. #
+
+my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
+my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
+my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
+my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
+my $taillen = $key2;
+
+ ($inp,$idx) = ($idx,$inp); # reassign
+
+$code.=<<___;
+.globl .${prefix}_xts_encrypt
+.align 5
+.${prefix}_xts_encrypt:
+ mr $inp,r3 # reassign
+ li r3,-1
+ ${UCMP}i $len,16
+ bltlr-
+
+ lis r0,0xfff0
+ mfspr r12,256 # save vrsave
+ li r11,0
+ mtspr 256,r0
+
+ vspltisb $seven,0x07 # 0x070707..07
+ le?lvsl $leperm,r11,r11
+ le?vspltisb $tmp,0x0f
+ le?vxor $leperm,$leperm,$seven
+
+ li $idx,15
+ lvx $tweak,0,$ivp # load [unaligned] iv
+ lvsl $inpperm,0,$ivp
+ lvx $inptail,$idx,$ivp
+ le?vxor $inpperm,$inpperm,$tmp
+ vperm $tweak,$tweak,$inptail,$inpperm
+
+ neg r11,$inp
+ lvsr $inpperm,0,r11 # prepare for unaligned load
+ lvx $inout,0,$inp
+ addi $inp,$inp,15 # 15 is not typo
+ le?vxor $inpperm,$inpperm,$tmp
+
+ ${UCMP}i $key2,0 # key2==NULL?
+ beq Lxts_enc_no_key2
+
+ ?lvsl $keyperm,0,$key2 # prepare for unaligned key
+ lwz $rounds,240($key2)
+ srwi $rounds,$rounds,1
+ subi $rounds,$rounds,1
+ li $idx,16
+
+ lvx $rndkey0,0,$key2
+ lvx $rndkey1,$idx,$key2
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $tweak,$tweak,$rndkey0
+ lvx $rndkey0,$idx,$key2
+ addi $idx,$idx,16
+ mtctr $rounds
+
+Ltweak_xts_enc:
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $tweak,$tweak,$rndkey1
+ lvx $rndkey1,$idx,$key2
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vcipher $tweak,$tweak,$rndkey0
+ lvx $rndkey0,$idx,$key2
+ addi $idx,$idx,16
+ bdnz Ltweak_xts_enc
+
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $tweak,$tweak,$rndkey1
+ lvx $rndkey1,$idx,$key2
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vcipherlast $tweak,$tweak,$rndkey0
+
+ li $ivp,0 # don't chain the tweak
+ b Lxts_enc
+
+Lxts_enc_no_key2:
+ li $idx,-16
+ and $len,$len,$idx # in "tweak chaining"
+ # mode only complete
+ # blocks are processed
+Lxts_enc:
+ lvx $inptail,0,$inp
+ addi $inp,$inp,16
+
+ ?lvsl $keyperm,0,$key1 # prepare for unaligned key
+ lwz $rounds,240($key1)
+ srwi $rounds,$rounds,1
+ subi $rounds,$rounds,1
+ li $idx,16
+
+ vslb $eighty7,$seven,$seven # 0x808080..80
+ vor $eighty7,$eighty7,$seven # 0x878787..87
+ vspltisb $tmp,1 # 0x010101..01
+ vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
+
+ ${UCMP}i $len,96
+ bge _aesp8_xts_encrypt6x
+
+ andi. $taillen,$len,15
+ subic r0,$len,32
+ subi $taillen,$taillen,16
+ subfe r0,r0,r0
+ and r0,r0,$taillen
+ add $inp,$inp,r0
+
+ lvx $rndkey0,0,$key1
+ lvx $rndkey1,$idx,$key1
+ addi $idx,$idx,16
+ vperm $inout,$inout,$inptail,$inpperm
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $inout,$inout,$tweak
+ vxor $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key1
+ addi $idx,$idx,16
+ mtctr $rounds
+ b Loop_xts_enc
+
+.align 5
+Loop_xts_enc:
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key1
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vcipher $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key1
+ addi $idx,$idx,16
+ bdnz Loop_xts_enc
+
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key1
+ li $idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $rndkey0,$rndkey0,$tweak
+ vcipherlast $output,$inout,$rndkey0
+
+ le?vperm $tmp,$output,$output,$leperm
+ be?nop
+ le?stvx_u $tmp,0,$out
+ be?stvx_u $output,0,$out
+ addi $out,$out,16
+
+ subic. $len,$len,16
+ beq Lxts_enc_done
+
+ vmr $inout,$inptail
+ lvx $inptail,0,$inp
+ addi $inp,$inp,16
+ lvx $rndkey0,0,$key1
+ lvx $rndkey1,$idx,$key1
+ addi $idx,$idx,16
+
+ subic r0,$len,32
+ subfe r0,r0,r0
+ and r0,r0,$taillen
+ add $inp,$inp,r0
+
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vand $tmp,$tmp,$eighty7
+ vxor $tweak,$tweak,$tmp
+
+ vperm $inout,$inout,$inptail,$inpperm
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $inout,$inout,$tweak
+ vxor $output,$output,$rndkey0 # just in case $len<16
+ vxor $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key1
+ addi $idx,$idx,16
+
+ mtctr $rounds
+ ${UCMP}i $len,16
+ bge Loop_xts_enc
+
+ vxor $output,$output,$tweak
+ lvsr $inpperm,0,$len # $inpperm is no longer needed
+ vxor $inptail,$inptail,$inptail # $inptail is no longer needed
+ vspltisb $tmp,-1
+ vperm $inptail,$inptail,$tmp,$inpperm
+ vsel $inout,$inout,$output,$inptail
+
+ subi r11,$out,17
+ subi $out,$out,16
+ mtctr $len
+ li $len,16
+Loop_xts_enc_steal:
+ lbzu r0,1(r11)
+ stb r0,16(r11)
+ bdnz Loop_xts_enc_steal
+
+ mtctr $rounds
+ b Loop_xts_enc # one more time...
+
+Lxts_enc_done:
+ ${UCMP}i $ivp,0
+ beq Lxts_enc_ret
+
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vand $tmp,$tmp,$eighty7
+ vxor $tweak,$tweak,$tmp
+
+ le?vperm $tweak,$tweak,$tweak,$leperm
+ stvx_u $tweak,0,$ivp
+
+Lxts_enc_ret:
+ mtspr 256,r12 # restore vrsave
+ li r3,0
+ blr
+ .long 0
+ .byte 0,12,0x04,0,0x80,6,6,0
+ .long 0
+.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
+
+.globl .${prefix}_xts_decrypt
+.align 5
+.${prefix}_xts_decrypt:
+ mr $inp,r3 # reassign
+ li r3,-1
+ ${UCMP}i $len,16
+ bltlr-
+
+ lis r0,0xfff8
+ mfspr r12,256 # save vrsave
+ li r11,0
+ mtspr 256,r0
+
+ andi. r0,$len,15
+ neg r0,r0
+ andi. r0,r0,16
+ sub $len,$len,r0
+
+ vspltisb $seven,0x07 # 0x070707..07
+ le?lvsl $leperm,r11,r11
+ le?vspltisb $tmp,0x0f
+ le?vxor $leperm,$leperm,$seven
+
+ li $idx,15
+ lvx $tweak,0,$ivp # load [unaligned] iv
+ lvsl $inpperm,0,$ivp
+ lvx $inptail,$idx,$ivp
+ le?vxor $inpperm,$inpperm,$tmp
+ vperm $tweak,$tweak,$inptail,$inpperm
+
+ neg r11,$inp
+ lvsr $inpperm,0,r11 # prepare for unaligned load
+ lvx $inout,0,$inp
+ addi $inp,$inp,15 # 15 is not typo
+ le?vxor $inpperm,$inpperm,$tmp
+
+ ${UCMP}i $key2,0 # key2==NULL?
+ beq Lxts_dec_no_key2
+
+ ?lvsl $keyperm,0,$key2 # prepare for unaligned key
+ lwz $rounds,240($key2)
+ srwi $rounds,$rounds,1
+ subi $rounds,$rounds,1
+ li $idx,16
+
+ lvx $rndkey0,0,$key2
+ lvx $rndkey1,$idx,$key2
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $tweak,$tweak,$rndkey0
+ lvx $rndkey0,$idx,$key2
+ addi $idx,$idx,16
+ mtctr $rounds
+
+Ltweak_xts_dec:
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $tweak,$tweak,$rndkey1
+ lvx $rndkey1,$idx,$key2
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vcipher $tweak,$tweak,$rndkey0
+ lvx $rndkey0,$idx,$key2
+ addi $idx,$idx,16
+ bdnz Ltweak_xts_dec
+
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $tweak,$tweak,$rndkey1
+ lvx $rndkey1,$idx,$key2
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vcipherlast $tweak,$tweak,$rndkey0
+
+ li $ivp,0 # don't chain the tweak
+ b Lxts_dec
+
+Lxts_dec_no_key2:
+ neg $idx,$len
+ andi. $idx,$idx,15
+ add $len,$len,$idx # in "tweak chaining"
+ # mode only complete
+ # blocks are processed
+Lxts_dec:
+ lvx $inptail,0,$inp
+ addi $inp,$inp,16
+
+ ?lvsl $keyperm,0,$key1 # prepare for unaligned key
+ lwz $rounds,240($key1)
+ srwi $rounds,$rounds,1
+ subi $rounds,$rounds,1
+ li $idx,16
+
+ vslb $eighty7,$seven,$seven # 0x808080..80
+ vor $eighty7,$eighty7,$seven # 0x878787..87
+ vspltisb $tmp,1 # 0x010101..01
+ vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
+
+ ${UCMP}i $len,96
+ bge _aesp8_xts_decrypt6x
+
+ lvx $rndkey0,0,$key1
+ lvx $rndkey1,$idx,$key1
+ addi $idx,$idx,16
+ vperm $inout,$inout,$inptail,$inpperm
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $inout,$inout,$tweak
+ vxor $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key1
+ addi $idx,$idx,16
+ mtctr $rounds
+
+ ${UCMP}i $len,16
+ blt Ltail_xts_dec
+ be?b Loop_xts_dec
+
+.align 5
+Loop_xts_dec:
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vncipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key1
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vncipher $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key1
+ addi $idx,$idx,16
+ bdnz Loop_xts_dec
+
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vncipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key1
+ li $idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $rndkey0,$rndkey0,$tweak
+ vncipherlast $output,$inout,$rndkey0
+
+ le?vperm $tmp,$output,$output,$leperm
+ be?nop
+ le?stvx_u $tmp,0,$out
+ be?stvx_u $output,0,$out
+ addi $out,$out,16
+
+ subic. $len,$len,16
+ beq Lxts_dec_done
+
+ vmr $inout,$inptail
+ lvx $inptail,0,$inp
+ addi $inp,$inp,16
+ lvx $rndkey0,0,$key1
+ lvx $rndkey1,$idx,$key1
+ addi $idx,$idx,16
+
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vand $tmp,$tmp,$eighty7
+ vxor $tweak,$tweak,$tmp
+
+ vperm $inout,$inout,$inptail,$inpperm
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $inout,$inout,$tweak
+ vxor $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key1
+ addi $idx,$idx,16
+
+ mtctr $rounds
+ ${UCMP}i $len,16
+ bge Loop_xts_dec
+
+Ltail_xts_dec:
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak1,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vand $tmp,$tmp,$eighty7
+ vxor $tweak1,$tweak1,$tmp
+
+ subi $inp,$inp,16
+ add $inp,$inp,$len
+
+ vxor $inout,$inout,$tweak # :-(
+ vxor $inout,$inout,$tweak1 # :-)
+
+Loop_xts_dec_short:
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vncipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key1
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vncipher $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key1
+ addi $idx,$idx,16
+ bdnz Loop_xts_dec_short
+
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vncipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key1
+ li $idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $rndkey0,$rndkey0,$tweak1
+ vncipherlast $output,$inout,$rndkey0
+
+ le?vperm $tmp,$output,$output,$leperm
+ be?nop
+ le?stvx_u $tmp,0,$out
+ be?stvx_u $output,0,$out
+
+ vmr $inout,$inptail
+ lvx $inptail,0,$inp
+ #addi $inp,$inp,16
+ lvx $rndkey0,0,$key1
+ lvx $rndkey1,$idx,$key1
+ addi $idx,$idx,16
+ vperm $inout,$inout,$inptail,$inpperm
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+
+ lvsr $inpperm,0,$len # $inpperm is no longer needed
+ vxor $inptail,$inptail,$inptail # $inptail is no longer needed
+ vspltisb $tmp,-1
+ vperm $inptail,$inptail,$tmp,$inpperm
+ vsel $inout,$inout,$output,$inptail
+
+ vxor $rndkey0,$rndkey0,$tweak
+ vxor $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key1
+ addi $idx,$idx,16
+
+ subi r11,$out,1
+ mtctr $len
+ li $len,16
+Loop_xts_dec_steal:
+ lbzu r0,1(r11)
+ stb r0,16(r11)
+ bdnz Loop_xts_dec_steal
+
+ mtctr $rounds
+ b Loop_xts_dec # one more time...
+
+Lxts_dec_done:
+ ${UCMP}i $ivp,0
+ beq Lxts_dec_ret
+
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vand $tmp,$tmp,$eighty7
+ vxor $tweak,$tweak,$tmp
+
+ le?vperm $tweak,$tweak,$tweak,$leperm
+ stvx_u $tweak,0,$ivp
+
+Lxts_dec_ret:
+ mtspr 256,r12 # restore vrsave
+ li r3,0
+ blr
+ .long 0
+ .byte 0,12,0x04,0,0x80,6,6,0
+ .long 0
+.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
+___
+#########################################################################
+{{ # Optimized XTS procedures #
+my $key_=$key2;
+my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
+ $x00=0 if ($flavour =~ /osx/);
+my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
+my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
+my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
+my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
+ # v26-v31 last 6 round keys
+my ($keyperm)=($out0); # aliases with "caller", redundant assignment
+my $taillen=$x70;
+
+$code.=<<___;
+.align 5
+_aesp8_xts_encrypt6x:
+ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
+ mflr r11
+ li r7,`$FRAME+8*16+15`
+ li r3,`$FRAME+8*16+31`
+ $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
+ stvx v20,r7,$sp # ABI says so
+ addi r7,r7,32
+ stvx v21,r3,$sp
+ addi r3,r3,32
+ stvx v22,r7,$sp
+ addi r7,r7,32
+ stvx v23,r3,$sp
+ addi r3,r3,32
+ stvx v24,r7,$sp
+ addi r7,r7,32
+ stvx v25,r3,$sp
+ addi r3,r3,32
+ stvx v26,r7,$sp
+ addi r7,r7,32
+ stvx v27,r3,$sp
+ addi r3,r3,32
+ stvx v28,r7,$sp
+ addi r7,r7,32
+ stvx v29,r3,$sp
+ addi r3,r3,32
+ stvx v30,r7,$sp
+ stvx v31,r3,$sp
+ li r0,-1
+ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
+ li $x10,0x10
+ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+ li $x20,0x20
+ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+ li $x30,0x30
+ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+ li $x40,0x40
+ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+ li $x50,0x50
+ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+ li $x60,0x60
+ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+ li $x70,0x70
+ mtspr 256,r0
+
+ subi $rounds,$rounds,3 # -4 in total
+
+ lvx $rndkey0,$x00,$key1 # load key schedule
+ lvx v30,$x10,$key1
+ addi $key1,$key1,0x20
+ lvx v31,$x00,$key1
+ ?vperm $rndkey0,$rndkey0,v30,$keyperm
+ addi $key_,$sp,$FRAME+15
+ mtctr $rounds
+
+Load_xts_enc_key:
+ ?vperm v24,v30,v31,$keyperm
+ lvx v30,$x10,$key1
+ addi $key1,$key1,0x20
+ stvx v24,$x00,$key_ # off-load round[1]
+ ?vperm v25,v31,v30,$keyperm
+ lvx v31,$x00,$key1
+ stvx v25,$x10,$key_ # off-load round[2]
+ addi $key_,$key_,0x20
+ bdnz Load_xts_enc_key
+
+ lvx v26,$x10,$key1
+ ?vperm v24,v30,v31,$keyperm
+ lvx v27,$x20,$key1
+ stvx v24,$x00,$key_ # off-load round[3]
+ ?vperm v25,v31,v26,$keyperm
+ lvx v28,$x30,$key1
+ stvx v25,$x10,$key_ # off-load round[4]
+ addi $key_,$sp,$FRAME+15 # rewind $key_
+ ?vperm v26,v26,v27,$keyperm
+ lvx v29,$x40,$key1
+ ?vperm v27,v27,v28,$keyperm
+ lvx v30,$x50,$key1
+ ?vperm v28,v28,v29,$keyperm
+ lvx v31,$x60,$key1
+ ?vperm v29,v29,v30,$keyperm
+ lvx $twk5,$x70,$key1 # borrow $twk5
+ ?vperm v30,v30,v31,$keyperm
+ lvx v24,$x00,$key_ # pre-load round[1]
+ ?vperm v31,v31,$twk5,$keyperm
+ lvx v25,$x10,$key_ # pre-load round[2]
+
+ vperm $in0,$inout,$inptail,$inpperm
+ subi $inp,$inp,31 # undo "caller"
+ vxor $twk0,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vand $tmp,$tmp,$eighty7
+ vxor $out0,$in0,$twk0
+ vxor $tweak,$tweak,$tmp
+
+ lvx_u $in1,$x10,$inp
+ vxor $twk1,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ le?vperm $in1,$in1,$in1,$leperm
+ vand $tmp,$tmp,$eighty7
+ vxor $out1,$in1,$twk1
+ vxor $tweak,$tweak,$tmp
+
+ lvx_u $in2,$x20,$inp
+ andi. $taillen,$len,15
+ vxor $twk2,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ le?vperm $in2,$in2,$in2,$leperm
+ vand $tmp,$tmp,$eighty7
+ vxor $out2,$in2,$twk2
+ vxor $tweak,$tweak,$tmp
+
+ lvx_u $in3,$x30,$inp
+ sub $len,$len,$taillen
+ vxor $twk3,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ le?vperm $in3,$in3,$in3,$leperm
+ vand $tmp,$tmp,$eighty7
+ vxor $out3,$in3,$twk3
+ vxor $tweak,$tweak,$tmp
+
+ lvx_u $in4,$x40,$inp
+ subi $len,$len,0x60
+ vxor $twk4,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ le?vperm $in4,$in4,$in4,$leperm
+ vand $tmp,$tmp,$eighty7
+ vxor $out4,$in4,$twk4
+ vxor $tweak,$tweak,$tmp
+
+ lvx_u $in5,$x50,$inp
+ addi $inp,$inp,0x60
+ vxor $twk5,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ le?vperm $in5,$in5,$in5,$leperm
+ vand $tmp,$tmp,$eighty7
+ vxor $out5,$in5,$twk5
+ vxor $tweak,$tweak,$tmp
+
+ vxor v31,v31,$rndkey0
+ mtctr $rounds
+ b Loop_xts_enc6x
+
+.align 5
+Loop_xts_enc6x:
+ vcipher $out0,$out0,v24
+ vcipher $out1,$out1,v24
+ vcipher $out2,$out2,v24
+ vcipher $out3,$out3,v24
+ vcipher $out4,$out4,v24
+ vcipher $out5,$out5,v24
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vcipher $out0,$out0,v25
+ vcipher $out1,$out1,v25
+ vcipher $out2,$out2,v25
+ vcipher $out3,$out3,v25
+ vcipher $out4,$out4,v25
+ vcipher $out5,$out5,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz Loop_xts_enc6x
+
+ subic $len,$len,96 # $len-=96
+ vxor $in0,$twk0,v31 # xor with last round key
+ vcipher $out0,$out0,v24
+ vcipher $out1,$out1,v24
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk0,$tweak,$rndkey0
+ vaddubm $tweak,$tweak,$tweak
+ vcipher $out2,$out2,v24
+ vcipher $out3,$out3,v24
+ vsldoi $tmp,$tmp,$tmp,15
+ vcipher $out4,$out4,v24
+ vcipher $out5,$out5,v24
+
+ subfe. r0,r0,r0 # borrow?-1:0
+ vand $tmp,$tmp,$eighty7
+ vcipher $out0,$out0,v25
+ vcipher $out1,$out1,v25
+ vxor $tweak,$tweak,$tmp
+ vcipher $out2,$out2,v25
+ vcipher $out3,$out3,v25
+ vxor $in1,$twk1,v31
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk1,$tweak,$rndkey0
+ vcipher $out4,$out4,v25
+ vcipher $out5,$out5,v25
+
+ and r0,r0,$len
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vcipher $out0,$out0,v26
+ vcipher $out1,$out1,v26
+ vand $tmp,$tmp,$eighty7
+ vcipher $out2,$out2,v26
+ vcipher $out3,$out3,v26
+ vxor $tweak,$tweak,$tmp
+ vcipher $out4,$out4,v26
+ vcipher $out5,$out5,v26
+
+ add $inp,$inp,r0 # $inp is adjusted in such
+ # way that at exit from the
+ # loop inX-in5 are loaded
+ # with last "words"
+ vxor $in2,$twk2,v31
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk2,$tweak,$rndkey0
+ vaddubm $tweak,$tweak,$tweak
+ vcipher $out0,$out0,v27
+ vcipher $out1,$out1,v27
+ vsldoi $tmp,$tmp,$tmp,15
+ vcipher $out2,$out2,v27
+ vcipher $out3,$out3,v27
+ vand $tmp,$tmp,$eighty7
+ vcipher $out4,$out4,v27
+ vcipher $out5,$out5,v27
+
+ addi $key_,$sp,$FRAME+15 # rewind $key_
+ vxor $tweak,$tweak,$tmp
+ vcipher $out0,$out0,v28
+ vcipher $out1,$out1,v28
+ vxor $in3,$twk3,v31
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk3,$tweak,$rndkey0
+ vcipher $out2,$out2,v28
+ vcipher $out3,$out3,v28
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vcipher $out4,$out4,v28
+ vcipher $out5,$out5,v28
+ lvx v24,$x00,$key_ # re-pre-load round[1]
+ vand $tmp,$tmp,$eighty7
+
+ vcipher $out0,$out0,v29
+ vcipher $out1,$out1,v29
+ vxor $tweak,$tweak,$tmp
+ vcipher $out2,$out2,v29
+ vcipher $out3,$out3,v29
+ vxor $in4,$twk4,v31
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk4,$tweak,$rndkey0
+ vcipher $out4,$out4,v29
+ vcipher $out5,$out5,v29
+ lvx v25,$x10,$key_ # re-pre-load round[2]
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+
+ vcipher $out0,$out0,v30
+ vcipher $out1,$out1,v30
+ vand $tmp,$tmp,$eighty7
+ vcipher $out2,$out2,v30
+ vcipher $out3,$out3,v30
+ vxor $tweak,$tweak,$tmp
+ vcipher $out4,$out4,v30
+ vcipher $out5,$out5,v30
+ vxor $in5,$twk5,v31
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk5,$tweak,$rndkey0
+
+ vcipherlast $out0,$out0,$in0
+ lvx_u $in0,$x00,$inp # load next input block
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vcipherlast $out1,$out1,$in1
+ lvx_u $in1,$x10,$inp
+ vcipherlast $out2,$out2,$in2
+ le?vperm $in0,$in0,$in0,$leperm
+ lvx_u $in2,$x20,$inp
+ vand $tmp,$tmp,$eighty7
+ vcipherlast $out3,$out3,$in3
+ le?vperm $in1,$in1,$in1,$leperm
+ lvx_u $in3,$x30,$inp
+ vcipherlast $out4,$out4,$in4
+ le?vperm $in2,$in2,$in2,$leperm
+ lvx_u $in4,$x40,$inp
+ vxor $tweak,$tweak,$tmp
+ vcipherlast $tmp,$out5,$in5 # last block might be needed
+ # in stealing mode
+ le?vperm $in3,$in3,$in3,$leperm
+ lvx_u $in5,$x50,$inp
+ addi $inp,$inp,0x60
+ le?vperm $in4,$in4,$in4,$leperm
+ le?vperm $in5,$in5,$in5,$leperm
+
+ le?vperm $out0,$out0,$out0,$leperm
+ le?vperm $out1,$out1,$out1,$leperm
+ stvx_u $out0,$x00,$out # store output
+ vxor $out0,$in0,$twk0
+ le?vperm $out2,$out2,$out2,$leperm
+ stvx_u $out1,$x10,$out
+ vxor $out1,$in1,$twk1
+ le?vperm $out3,$out3,$out3,$leperm
+ stvx_u $out2,$x20,$out
+ vxor $out2,$in2,$twk2
+ le?vperm $out4,$out4,$out4,$leperm
+ stvx_u $out3,$x30,$out
+ vxor $out3,$in3,$twk3
+ le?vperm $out5,$tmp,$tmp,$leperm
+ stvx_u $out4,$x40,$out
+ vxor $out4,$in4,$twk4
+ le?stvx_u $out5,$x50,$out
+ be?stvx_u $tmp, $x50,$out
+ vxor $out5,$in5,$twk5
+ addi $out,$out,0x60
+
+ mtctr $rounds
+ beq Loop_xts_enc6x # did $len-=96 borrow?
+
+ addic. $len,$len,0x60
+ beq Lxts_enc6x_zero
+ cmpwi $len,0x20
+ blt Lxts_enc6x_one
+ nop
+ beq Lxts_enc6x_two
+ cmpwi $len,0x40
+ blt Lxts_enc6x_three
+ nop
+ beq Lxts_enc6x_four
+
+Lxts_enc6x_five:
+ vxor $out0,$in1,$twk0
+ vxor $out1,$in2,$twk1
+ vxor $out2,$in3,$twk2
+ vxor $out3,$in4,$twk3
+ vxor $out4,$in5,$twk4
+
+ bl _aesp8_xts_enc5x
+
+ le?vperm $out0,$out0,$out0,$leperm
+ vmr $twk0,$twk5 # unused tweak
+ le?vperm $out1,$out1,$out1,$leperm
+ stvx_u $out0,$x00,$out # store output
+ le?vperm $out2,$out2,$out2,$leperm
+ stvx_u $out1,$x10,$out
+ le?vperm $out3,$out3,$out3,$leperm
+ stvx_u $out2,$x20,$out
+ vxor $tmp,$out4,$twk5 # last block prep for stealing
+ le?vperm $out4,$out4,$out4,$leperm
+ stvx_u $out3,$x30,$out
+ stvx_u $out4,$x40,$out
+ addi $out,$out,0x50
+ bne Lxts_enc6x_steal
+ b Lxts_enc6x_done
+
+.align 4
+Lxts_enc6x_four:
+ vxor $out0,$in2,$twk0
+ vxor $out1,$in3,$twk1
+ vxor $out2,$in4,$twk2
+ vxor $out3,$in5,$twk3
+ vxor $out4,$out4,$out4
+
+ bl _aesp8_xts_enc5x
+
+ le?vperm $out0,$out0,$out0,$leperm
+ vmr $twk0,$twk4 # unused tweak
+ le?vperm $out1,$out1,$out1,$leperm
+ stvx_u $out0,$x00,$out # store output
+ le?vperm $out2,$out2,$out2,$leperm
+ stvx_u $out1,$x10,$out
+ vxor $tmp,$out3,$twk4 # last block prep for stealing
+ le?vperm $out3,$out3,$out3,$leperm
+ stvx_u $out2,$x20,$out
+ stvx_u $out3,$x30,$out
+ addi $out,$out,0x40
+ bne Lxts_enc6x_steal
+ b Lxts_enc6x_done
+
+.align 4
+Lxts_enc6x_three:
+ vxor $out0,$in3,$twk0
+ vxor $out1,$in4,$twk1
+ vxor $out2,$in5,$twk2
+ vxor $out3,$out3,$out3
+ vxor $out4,$out4,$out4
+
+ bl _aesp8_xts_enc5x
+
+ le?vperm $out0,$out0,$out0,$leperm
+ vmr $twk0,$twk3 # unused tweak
+ le?vperm $out1,$out1,$out1,$leperm
+ stvx_u $out0,$x00,$out # store output
+ vxor $tmp,$out2,$twk3 # last block prep for stealing
+ le?vperm $out2,$out2,$out2,$leperm
+ stvx_u $out1,$x10,$out
+ stvx_u $out2,$x20,$out
+ addi $out,$out,0x30
+ bne Lxts_enc6x_steal
+ b Lxts_enc6x_done
+
+.align 4
+Lxts_enc6x_two:
+ vxor $out0,$in4,$twk0
+ vxor $out1,$in5,$twk1
+ vxor $out2,$out2,$out2
+ vxor $out3,$out3,$out3
+ vxor $out4,$out4,$out4
+
+ bl _aesp8_xts_enc5x
+
+ le?vperm $out0,$out0,$out0,$leperm
+ vmr $twk0,$twk2 # unused tweak
+ vxor $tmp,$out1,$twk2 # last block prep for stealing
+ le?vperm $out1,$out1,$out1,$leperm
+ stvx_u $out0,$x00,$out # store output
+ stvx_u $out1,$x10,$out
+ addi $out,$out,0x20
+ bne Lxts_enc6x_steal
+ b Lxts_enc6x_done
+
+.align 4
+Lxts_enc6x_one:
+ vxor $out0,$in5,$twk0
+ nop
+Loop_xts_enc1x:
+ vcipher $out0,$out0,v24
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vcipher $out0,$out0,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz Loop_xts_enc1x
+
+ add $inp,$inp,$taillen
+ cmpwi $taillen,0
+ vcipher $out0,$out0,v24
+
+ subi $inp,$inp,16
+ vcipher $out0,$out0,v25
+
+ lvsr $inpperm,0,$taillen
+ vcipher $out0,$out0,v26
+
+ lvx_u $in0,0,$inp
+ vcipher $out0,$out0,v27
+
+ addi $key_,$sp,$FRAME+15 # rewind $key_
+ vcipher $out0,$out0,v28
+ lvx v24,$x00,$key_ # re-pre-load round[1]
+
+ vcipher $out0,$out0,v29
+ lvx v25,$x10,$key_ # re-pre-load round[2]
+ vxor $twk0,$twk0,v31
+
+ le?vperm $in0,$in0,$in0,$leperm
+ vcipher $out0,$out0,v30
+
+ vperm $in0,$in0,$in0,$inpperm
+ vcipherlast $out0,$out0,$twk0
+
+ vmr $twk0,$twk1 # unused tweak
+ vxor $tmp,$out0,$twk1 # last block prep for stealing
+ le?vperm $out0,$out0,$out0,$leperm
+ stvx_u $out0,$x00,$out # store output
+ addi $out,$out,0x10
+ bne Lxts_enc6x_steal
+ b Lxts_enc6x_done
+
+.align 4
+Lxts_enc6x_zero:
+ cmpwi $taillen,0
+ beq Lxts_enc6x_done
+
+ add $inp,$inp,$taillen
+ subi $inp,$inp,16
+ lvx_u $in0,0,$inp
+ lvsr $inpperm,0,$taillen # $in5 is no more
+ le?vperm $in0,$in0,$in0,$leperm
+ vperm $in0,$in0,$in0,$inpperm
+ vxor $tmp,$tmp,$twk0
+Lxts_enc6x_steal:
+ vxor $in0,$in0,$twk0
+ vxor $out0,$out0,$out0
+ vspltisb $out1,-1
+ vperm $out0,$out0,$out1,$inpperm
+ vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
+
+ subi r30,$out,17
+ subi $out,$out,16
+ mtctr $taillen
+Loop_xts_enc6x_steal:
+ lbzu r0,1(r30)
+ stb r0,16(r30)
+ bdnz Loop_xts_enc6x_steal
+
+ li $taillen,0
+ mtctr $rounds
+ b Loop_xts_enc1x # one more time...
+
+.align 4
+Lxts_enc6x_done:
+ ${UCMP}i $ivp,0
+ beq Lxts_enc6x_ret
+
+ vxor $tweak,$twk0,$rndkey0
+ le?vperm $tweak,$tweak,$tweak,$leperm
+ stvx_u $tweak,0,$ivp
+
+Lxts_enc6x_ret:
+ mtlr r11
+ li r10,`$FRAME+15`
+ li r11,`$FRAME+31`
+ stvx $seven,r10,$sp # wipe copies of round keys
+ addi r10,r10,32
+ stvx $seven,r11,$sp
+ addi r11,r11,32
+ stvx $seven,r10,$sp
+ addi r10,r10,32
+ stvx $seven,r11,$sp
+ addi r11,r11,32
+ stvx $seven,r10,$sp
+ addi r10,r10,32
+ stvx $seven,r11,$sp
+ addi r11,r11,32
+ stvx $seven,r10,$sp
+ addi r10,r10,32
+ stvx $seven,r11,$sp
+ addi r11,r11,32
+
+ mtspr 256,$vrsave
+ lvx v20,r10,$sp # ABI says so
+ addi r10,r10,32
+ lvx v21,r11,$sp
+ addi r11,r11,32
+ lvx v22,r10,$sp
+ addi r10,r10,32
+ lvx v23,r11,$sp
+ addi r11,r11,32
+ lvx v24,r10,$sp
+ addi r10,r10,32
+ lvx v25,r11,$sp
+ addi r11,r11,32
+ lvx v26,r10,$sp
+ addi r10,r10,32
+ lvx v27,r11,$sp
+ addi r11,r11,32
+ lvx v28,r10,$sp
+ addi r10,r10,32
+ lvx v29,r11,$sp
+ addi r11,r11,32
+ lvx v30,r10,$sp
+ lvx v31,r11,$sp
+ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
+ blr
+ .long 0
+ .byte 0,12,0x04,1,0x80,6,6,0
+ .long 0
+
+.align 5
+_aesp8_xts_enc5x:
+ vcipher $out0,$out0,v24
+ vcipher $out1,$out1,v24
+ vcipher $out2,$out2,v24
+ vcipher $out3,$out3,v24
+ vcipher $out4,$out4,v24
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vcipher $out0,$out0,v25
+ vcipher $out1,$out1,v25
+ vcipher $out2,$out2,v25
+ vcipher $out3,$out3,v25
+ vcipher $out4,$out4,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz _aesp8_xts_enc5x
+
+ add $inp,$inp,$taillen
+ cmpwi $taillen,0
+ vcipher $out0,$out0,v24
+ vcipher $out1,$out1,v24
+ vcipher $out2,$out2,v24
+ vcipher $out3,$out3,v24
+ vcipher $out4,$out4,v24
+
+ subi $inp,$inp,16
+ vcipher $out0,$out0,v25
+ vcipher $out1,$out1,v25
+ vcipher $out2,$out2,v25
+ vcipher $out3,$out3,v25
+ vcipher $out4,$out4,v25
+ vxor $twk0,$twk0,v31
+
+ vcipher $out0,$out0,v26
+ lvsr $inpperm,r0,$taillen # $in5 is no more
+ vcipher $out1,$out1,v26
+ vcipher $out2,$out2,v26
+ vcipher $out3,$out3,v26
+ vcipher $out4,$out4,v26
+ vxor $in1,$twk1,v31
+
+ vcipher $out0,$out0,v27
+ lvx_u $in0,0,$inp
+ vcipher $out1,$out1,v27
+ vcipher $out2,$out2,v27
+ vcipher $out3,$out3,v27
+ vcipher $out4,$out4,v27
+ vxor $in2,$twk2,v31
+
+ addi $key_,$sp,$FRAME+15 # rewind $key_
+ vcipher $out0,$out0,v28
+ vcipher $out1,$out1,v28
+ vcipher $out2,$out2,v28
+ vcipher $out3,$out3,v28
+ vcipher $out4,$out4,v28
+ lvx v24,$x00,$key_ # re-pre-load round[1]
+ vxor $in3,$twk3,v31
+
+ vcipher $out0,$out0,v29
+ le?vperm $in0,$in0,$in0,$leperm
+ vcipher $out1,$out1,v29
+ vcipher $out2,$out2,v29
+ vcipher $out3,$out3,v29
+ vcipher $out4,$out4,v29
+ lvx v25,$x10,$key_ # re-pre-load round[2]
+ vxor $in4,$twk4,v31
+
+ vcipher $out0,$out0,v30
+ vperm $in0,$in0,$in0,$inpperm
+ vcipher $out1,$out1,v30
+ vcipher $out2,$out2,v30
+ vcipher $out3,$out3,v30
+ vcipher $out4,$out4,v30
+
+ vcipherlast $out0,$out0,$twk0
+ vcipherlast $out1,$out1,$in1
+ vcipherlast $out2,$out2,$in2
+ vcipherlast $out3,$out3,$in3
+ vcipherlast $out4,$out4,$in4
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
+
+.align 5
+_aesp8_xts_decrypt6x:
+ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
+ mflr r11
+ li r7,`$FRAME+8*16+15`
+ li r3,`$FRAME+8*16+31`
+ $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
+ stvx v20,r7,$sp # ABI says so
+ addi r7,r7,32
+ stvx v21,r3,$sp
+ addi r3,r3,32
+ stvx v22,r7,$sp
+ addi r7,r7,32
+ stvx v23,r3,$sp
+ addi r3,r3,32
+ stvx v24,r7,$sp
+ addi r7,r7,32
+ stvx v25,r3,$sp
+ addi r3,r3,32
+ stvx v26,r7,$sp
+ addi r7,r7,32
+ stvx v27,r3,$sp
+ addi r3,r3,32
+ stvx v28,r7,$sp
+ addi r7,r7,32
+ stvx v29,r3,$sp
+ addi r3,r3,32
+ stvx v30,r7,$sp
+ stvx v31,r3,$sp
+ li r0,-1
+ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
+ li $x10,0x10
+ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+ li $x20,0x20
+ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+ li $x30,0x30
+ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+ li $x40,0x40
+ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+ li $x50,0x50
+ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+ li $x60,0x60
+ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+ li $x70,0x70
+ mtspr 256,r0
+
+ subi $rounds,$rounds,3 # -4 in total
+
+ lvx $rndkey0,$x00,$key1 # load key schedule
+ lvx v30,$x10,$key1
+ addi $key1,$key1,0x20
+ lvx v31,$x00,$key1
+ ?vperm $rndkey0,$rndkey0,v30,$keyperm
+ addi $key_,$sp,$FRAME+15
+ mtctr $rounds
+
+Load_xts_dec_key:
+ ?vperm v24,v30,v31,$keyperm
+ lvx v30,$x10,$key1
+ addi $key1,$key1,0x20
+ stvx v24,$x00,$key_ # off-load round[1]
+ ?vperm v25,v31,v30,$keyperm
+ lvx v31,$x00,$key1
+ stvx v25,$x10,$key_ # off-load round[2]
+ addi $key_,$key_,0x20
+ bdnz Load_xts_dec_key
+
+ lvx v26,$x10,$key1
+ ?vperm v24,v30,v31,$keyperm
+ lvx v27,$x20,$key1
+ stvx v24,$x00,$key_ # off-load round[3]
+ ?vperm v25,v31,v26,$keyperm
+ lvx v28,$x30,$key1
+ stvx v25,$x10,$key_ # off-load round[4]
+ addi $key_,$sp,$FRAME+15 # rewind $key_
+ ?vperm v26,v26,v27,$keyperm
+ lvx v29,$x40,$key1
+ ?vperm v27,v27,v28,$keyperm
+ lvx v30,$x50,$key1
+ ?vperm v28,v28,v29,$keyperm
+ lvx v31,$x60,$key1
+ ?vperm v29,v29,v30,$keyperm
+ lvx $twk5,$x70,$key1 # borrow $twk5
+ ?vperm v30,v30,v31,$keyperm
+ lvx v24,$x00,$key_ # pre-load round[1]
+ ?vperm v31,v31,$twk5,$keyperm
+ lvx v25,$x10,$key_ # pre-load round[2]
+
+ vperm $in0,$inout,$inptail,$inpperm
+ subi $inp,$inp,31 # undo "caller"
+ vxor $twk0,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vand $tmp,$tmp,$eighty7
+ vxor $out0,$in0,$twk0
+ vxor $tweak,$tweak,$tmp
+
+ lvx_u $in1,$x10,$inp
+ vxor $twk1,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ le?vperm $in1,$in1,$in1,$leperm
+ vand $tmp,$tmp,$eighty7
+ vxor $out1,$in1,$twk1
+ vxor $tweak,$tweak,$tmp
+
+ lvx_u $in2,$x20,$inp
+ andi. $taillen,$len,15
+ vxor $twk2,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ le?vperm $in2,$in2,$in2,$leperm
+ vand $tmp,$tmp,$eighty7
+ vxor $out2,$in2,$twk2
+ vxor $tweak,$tweak,$tmp
+
+ lvx_u $in3,$x30,$inp
+ sub $len,$len,$taillen
+ vxor $twk3,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ le?vperm $in3,$in3,$in3,$leperm
+ vand $tmp,$tmp,$eighty7
+ vxor $out3,$in3,$twk3
+ vxor $tweak,$tweak,$tmp
+
+ lvx_u $in4,$x40,$inp
+ subi $len,$len,0x60
+ vxor $twk4,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ le?vperm $in4,$in4,$in4,$leperm
+ vand $tmp,$tmp,$eighty7
+ vxor $out4,$in4,$twk4
+ vxor $tweak,$tweak,$tmp
+
+ lvx_u $in5,$x50,$inp
+ addi $inp,$inp,0x60
+ vxor $twk5,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ le?vperm $in5,$in5,$in5,$leperm
+ vand $tmp,$tmp,$eighty7
+ vxor $out5,$in5,$twk5
+ vxor $tweak,$tweak,$tmp
+
+ vxor v31,v31,$rndkey0
+ mtctr $rounds
+ b Loop_xts_dec6x
+
+.align 5
+Loop_xts_dec6x:
+ vncipher $out0,$out0,v24
+ vncipher $out1,$out1,v24
+ vncipher $out2,$out2,v24
+ vncipher $out3,$out3,v24
+ vncipher $out4,$out4,v24
+ vncipher $out5,$out5,v24
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vncipher $out0,$out0,v25
+ vncipher $out1,$out1,v25
+ vncipher $out2,$out2,v25
+ vncipher $out3,$out3,v25
+ vncipher $out4,$out4,v25
+ vncipher $out5,$out5,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz Loop_xts_dec6x
+
+ subic $len,$len,96 # $len-=96
+ vxor $in0,$twk0,v31 # xor with last round key
+ vncipher $out0,$out0,v24
+ vncipher $out1,$out1,v24
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk0,$tweak,$rndkey0
+ vaddubm $tweak,$tweak,$tweak
+ vncipher $out2,$out2,v24
+ vncipher $out3,$out3,v24
+ vsldoi $tmp,$tmp,$tmp,15
+ vncipher $out4,$out4,v24
+ vncipher $out5,$out5,v24
+
+ subfe. r0,r0,r0 # borrow?-1:0
+ vand $tmp,$tmp,$eighty7
+ vncipher $out0,$out0,v25
+ vncipher $out1,$out1,v25
+ vxor $tweak,$tweak,$tmp
+ vncipher $out2,$out2,v25
+ vncipher $out3,$out3,v25
+ vxor $in1,$twk1,v31
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk1,$tweak,$rndkey0
+ vncipher $out4,$out4,v25
+ vncipher $out5,$out5,v25
+
+ and r0,r0,$len
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vncipher $out0,$out0,v26
+ vncipher $out1,$out1,v26
+ vand $tmp,$tmp,$eighty7
+ vncipher $out2,$out2,v26
+ vncipher $out3,$out3,v26
+ vxor $tweak,$tweak,$tmp
+ vncipher $out4,$out4,v26
+ vncipher $out5,$out5,v26
+
+ add $inp,$inp,r0 # $inp is adjusted in such
+ # way that at exit from the
+ # loop inX-in5 are loaded
+ # with last "words"
+ vxor $in2,$twk2,v31
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk2,$tweak,$rndkey0
+ vaddubm $tweak,$tweak,$tweak
+ vncipher $out0,$out0,v27
+ vncipher $out1,$out1,v27
+ vsldoi $tmp,$tmp,$tmp,15
+ vncipher $out2,$out2,v27
+ vncipher $out3,$out3,v27
+ vand $tmp,$tmp,$eighty7
+ vncipher $out4,$out4,v27
+ vncipher $out5,$out5,v27
+
+ addi $key_,$sp,$FRAME+15 # rewind $key_
+ vxor $tweak,$tweak,$tmp
+ vncipher $out0,$out0,v28
+ vncipher $out1,$out1,v28
+ vxor $in3,$twk3,v31
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk3,$tweak,$rndkey0
+ vncipher $out2,$out2,v28
+ vncipher $out3,$out3,v28
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vncipher $out4,$out4,v28
+ vncipher $out5,$out5,v28
+ lvx v24,$x00,$key_ # re-pre-load round[1]
+ vand $tmp,$tmp,$eighty7
+
+ vncipher $out0,$out0,v29
+ vncipher $out1,$out1,v29
+ vxor $tweak,$tweak,$tmp
+ vncipher $out2,$out2,v29
+ vncipher $out3,$out3,v29
+ vxor $in4,$twk4,v31
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk4,$tweak,$rndkey0
+ vncipher $out4,$out4,v29
+ vncipher $out5,$out5,v29
+ lvx v25,$x10,$key_ # re-pre-load round[2]
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+
+ vncipher $out0,$out0,v30
+ vncipher $out1,$out1,v30
+ vand $tmp,$tmp,$eighty7
+ vncipher $out2,$out2,v30
+ vncipher $out3,$out3,v30
+ vxor $tweak,$tweak,$tmp
+ vncipher $out4,$out4,v30
+ vncipher $out5,$out5,v30
+ vxor $in5,$twk5,v31
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk5,$tweak,$rndkey0
+
+ vncipherlast $out0,$out0,$in0
+ lvx_u $in0,$x00,$inp # load next input block
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vncipherlast $out1,$out1,$in1
+ lvx_u $in1,$x10,$inp
+ vncipherlast $out2,$out2,$in2
+ le?vperm $in0,$in0,$in0,$leperm
+ lvx_u $in2,$x20,$inp
+ vand $tmp,$tmp,$eighty7
+ vncipherlast $out3,$out3,$in3
+ le?vperm $in1,$in1,$in1,$leperm
+ lvx_u $in3,$x30,$inp
+ vncipherlast $out4,$out4,$in4
+ le?vperm $in2,$in2,$in2,$leperm
+ lvx_u $in4,$x40,$inp
+ vxor $tweak,$tweak,$tmp
+ vncipherlast $out5,$out5,$in5
+ le?vperm $in3,$in3,$in3,$leperm
+ lvx_u $in5,$x50,$inp
+ addi $inp,$inp,0x60
+ le?vperm $in4,$in4,$in4,$leperm
+ le?vperm $in5,$in5,$in5,$leperm
+
+ le?vperm $out0,$out0,$out0,$leperm
+ le?vperm $out1,$out1,$out1,$leperm
+ stvx_u $out0,$x00,$out # store output
+ vxor $out0,$in0,$twk0
+ le?vperm $out2,$out2,$out2,$leperm
+ stvx_u $out1,$x10,$out
+ vxor $out1,$in1,$twk1
+ le?vperm $out3,$out3,$out3,$leperm
+ stvx_u $out2,$x20,$out
+ vxor $out2,$in2,$twk2
+ le?vperm $out4,$out4,$out4,$leperm
+ stvx_u $out3,$x30,$out
+ vxor $out3,$in3,$twk3
+ le?vperm $out5,$out5,$out5,$leperm
+ stvx_u $out4,$x40,$out
+ vxor $out4,$in4,$twk4
+ stvx_u $out5,$x50,$out
+ vxor $out5,$in5,$twk5
+ addi $out,$out,0x60
+
+ mtctr $rounds
+ beq Loop_xts_dec6x # did $len-=96 borrow?
+
+ addic. $len,$len,0x60
+ beq Lxts_dec6x_zero
+ cmpwi $len,0x20
+ blt Lxts_dec6x_one
+ nop
+ beq Lxts_dec6x_two
+ cmpwi $len,0x40
+ blt Lxts_dec6x_three
+ nop
+ beq Lxts_dec6x_four
+
+Lxts_dec6x_five:
+ vxor $out0,$in1,$twk0
+ vxor $out1,$in2,$twk1
+ vxor $out2,$in3,$twk2
+ vxor $out3,$in4,$twk3
+ vxor $out4,$in5,$twk4
+
+ bl _aesp8_xts_dec5x
+
+ le?vperm $out0,$out0,$out0,$leperm
+ vmr $twk0,$twk5 # unused tweak
+ vxor $twk1,$tweak,$rndkey0
+ le?vperm $out1,$out1,$out1,$leperm
+ stvx_u $out0,$x00,$out # store output
+ vxor $out0,$in0,$twk1
+ le?vperm $out2,$out2,$out2,$leperm
+ stvx_u $out1,$x10,$out
+ le?vperm $out3,$out3,$out3,$leperm
+ stvx_u $out2,$x20,$out
+ le?vperm $out4,$out4,$out4,$leperm
+ stvx_u $out3,$x30,$out
+ stvx_u $out4,$x40,$out
+ addi $out,$out,0x50
+ bne Lxts_dec6x_steal
+ b Lxts_dec6x_done
+
+.align 4
+Lxts_dec6x_four:
+ vxor $out0,$in2,$twk0
+ vxor $out1,$in3,$twk1
+ vxor $out2,$in4,$twk2
+ vxor $out3,$in5,$twk3
+ vxor $out4,$out4,$out4
+
+ bl _aesp8_xts_dec5x
+
+ le?vperm $out0,$out0,$out0,$leperm
+ vmr $twk0,$twk4 # unused tweak
+ vmr $twk1,$twk5
+ le?vperm $out1,$out1,$out1,$leperm
+ stvx_u $out0,$x00,$out # store output
+ vxor $out0,$in0,$twk5
+ le?vperm $out2,$out2,$out2,$leperm
+ stvx_u $out1,$x10,$out
+ le?vperm $out3,$out3,$out3,$leperm
+ stvx_u $out2,$x20,$out
+ stvx_u $out3,$x30,$out
+ addi $out,$out,0x40
+ bne Lxts_dec6x_steal
+ b Lxts_dec6x_done
+
+.align 4
+Lxts_dec6x_three:
+ vxor $out0,$in3,$twk0
+ vxor $out1,$in4,$twk1
+ vxor $out2,$in5,$twk2
+ vxor $out3,$out3,$out3
+ vxor $out4,$out4,$out4
+
+ bl _aesp8_xts_dec5x
+
+ le?vperm $out0,$out0,$out0,$leperm
+ vmr $twk0,$twk3 # unused tweak
+ vmr $twk1,$twk4
+ le?vperm $out1,$out1,$out1,$leperm
+ stvx_u $out0,$x00,$out # store output
+ vxor $out0,$in0,$twk4
+ le?vperm $out2,$out2,$out2,$leperm
+ stvx_u $out1,$x10,$out
+ stvx_u $out2,$x20,$out
+ addi $out,$out,0x30
+ bne Lxts_dec6x_steal
+ b Lxts_dec6x_done
+
+.align 4
+Lxts_dec6x_two:
+ vxor $out0,$in4,$twk0
+ vxor $out1,$in5,$twk1
+ vxor $out2,$out2,$out2
+ vxor $out3,$out3,$out3
+ vxor $out4,$out4,$out4
+
+ bl _aesp8_xts_dec5x
+
+ le?vperm $out0,$out0,$out0,$leperm
+ vmr $twk0,$twk2 # unused tweak
+ vmr $twk1,$twk3
+ le?vperm $out1,$out1,$out1,$leperm
+ stvx_u $out0,$x00,$out # store output
+ vxor $out0,$in0,$twk3
+ stvx_u $out1,$x10,$out
+ addi $out,$out,0x20
+ bne Lxts_dec6x_steal
+ b Lxts_dec6x_done
+
+.align 4
+Lxts_dec6x_one:
+ vxor $out0,$in5,$twk0
+ nop
+Loop_xts_dec1x:
+ vncipher $out0,$out0,v24
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vncipher $out0,$out0,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz Loop_xts_dec1x
+
+ subi r0,$taillen,1
+ vncipher $out0,$out0,v24
+
+ andi. r0,r0,16
+ cmpwi $taillen,0
+ vncipher $out0,$out0,v25
+
+ sub $inp,$inp,r0
+ vncipher $out0,$out0,v26
+
+ lvx_u $in0,0,$inp
+ vncipher $out0,$out0,v27
+
+ addi $key_,$sp,$FRAME+15 # rewind $key_
+ vncipher $out0,$out0,v28
+ lvx v24,$x00,$key_ # re-pre-load round[1]
+
+ vncipher $out0,$out0,v29
+ lvx v25,$x10,$key_ # re-pre-load round[2]
+ vxor $twk0,$twk0,v31
+
+ le?vperm $in0,$in0,$in0,$leperm
+ vncipher $out0,$out0,v30
+
+ mtctr $rounds
+ vncipherlast $out0,$out0,$twk0
+
+ vmr $twk0,$twk1 # unused tweak
+ vmr $twk1,$twk2
+ le?vperm $out0,$out0,$out0,$leperm
+ stvx_u $out0,$x00,$out # store output
+ addi $out,$out,0x10
+ vxor $out0,$in0,$twk2
+ bne Lxts_dec6x_steal
+ b Lxts_dec6x_done
+
+.align 4
+Lxts_dec6x_zero:
+ cmpwi $taillen,0
+ beq Lxts_dec6x_done
+
+ lvx_u $in0,0,$inp
+ le?vperm $in0,$in0,$in0,$leperm
+ vxor $out0,$in0,$twk1
+Lxts_dec6x_steal:
+ vncipher $out0,$out0,v24
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vncipher $out0,$out0,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz Lxts_dec6x_steal
+
+ add $inp,$inp,$taillen
+ vncipher $out0,$out0,v24
+
+ cmpwi $taillen,0
+ vncipher $out0,$out0,v25
+
+ lvx_u $in0,0,$inp
+ vncipher $out0,$out0,v26
+
+ lvsr $inpperm,0,$taillen # $in5 is no more
+ vncipher $out0,$out0,v27
+
+ addi $key_,$sp,$FRAME+15 # rewind $key_
+ vncipher $out0,$out0,v28
+ lvx v24,$x00,$key_ # re-pre-load round[1]
+
+ vncipher $out0,$out0,v29
+ lvx v25,$x10,$key_ # re-pre-load round[2]
+ vxor $twk1,$twk1,v31
+
+ le?vperm $in0,$in0,$in0,$leperm
+ vncipher $out0,$out0,v30
+
+ vperm $in0,$in0,$in0,$inpperm
+ vncipherlast $tmp,$out0,$twk1
+
+ le?vperm $out0,$tmp,$tmp,$leperm
+ le?stvx_u $out0,0,$out
+ be?stvx_u $tmp,0,$out
+
+ vxor $out0,$out0,$out0
+ vspltisb $out1,-1
+ vperm $out0,$out0,$out1,$inpperm
+ vsel $out0,$in0,$tmp,$out0
+ vxor $out0,$out0,$twk0
+
+ subi r30,$out,1
+ mtctr $taillen
+Loop_xts_dec6x_steal:
+ lbzu r0,1(r30)
+ stb r0,16(r30)
+ bdnz Loop_xts_dec6x_steal
+
+ li $taillen,0
+ mtctr $rounds
+ b Loop_xts_dec1x # one more time...
+
+.align 4
+Lxts_dec6x_done:
+ ${UCMP}i $ivp,0
+ beq Lxts_dec6x_ret
+
+ vxor $tweak,$twk0,$rndkey0
+ le?vperm $tweak,$tweak,$tweak,$leperm
+ stvx_u $tweak,0,$ivp
+
+Lxts_dec6x_ret:
+ mtlr r11
+ li r10,`$FRAME+15`
+ li r11,`$FRAME+31`
+ stvx $seven,r10,$sp # wipe copies of round keys
+ addi r10,r10,32
+ stvx $seven,r11,$sp
+ addi r11,r11,32
+ stvx $seven,r10,$sp
+ addi r10,r10,32
+ stvx $seven,r11,$sp
+ addi r11,r11,32
+ stvx $seven,r10,$sp
+ addi r10,r10,32
+ stvx $seven,r11,$sp
+ addi r11,r11,32
+ stvx $seven,r10,$sp
+ addi r10,r10,32
+ stvx $seven,r11,$sp
+ addi r11,r11,32
+
+ mtspr 256,$vrsave
+ lvx v20,r10,$sp # ABI says so
+ addi r10,r10,32
+ lvx v21,r11,$sp
+ addi r11,r11,32
+ lvx v22,r10,$sp
+ addi r10,r10,32
+ lvx v23,r11,$sp
+ addi r11,r11,32
+ lvx v24,r10,$sp
+ addi r10,r10,32
+ lvx v25,r11,$sp
+ addi r11,r11,32
+ lvx v26,r10,$sp
+ addi r10,r10,32
+ lvx v27,r11,$sp
+ addi r11,r11,32
+ lvx v28,r10,$sp
+ addi r10,r10,32
+ lvx v29,r11,$sp
+ addi r11,r11,32
+ lvx v30,r10,$sp
+ lvx v31,r11,$sp
+ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
+ blr
+ .long 0
+ .byte 0,12,0x04,1,0x80,6,6,0
+ .long 0
+
+.align 5
+_aesp8_xts_dec5x:
+ vncipher $out0,$out0,v24
+ vncipher $out1,$out1,v24
+ vncipher $out2,$out2,v24
+ vncipher $out3,$out3,v24
+ vncipher $out4,$out4,v24
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vncipher $out0,$out0,v25
+ vncipher $out1,$out1,v25
+ vncipher $out2,$out2,v25
+ vncipher $out3,$out3,v25
+ vncipher $out4,$out4,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz _aesp8_xts_dec5x
+
+ subi r0,$taillen,1
+ vncipher $out0,$out0,v24
+ vncipher $out1,$out1,v24
+ vncipher $out2,$out2,v24
+ vncipher $out3,$out3,v24
+ vncipher $out4,$out4,v24
+
+ andi. r0,r0,16
+ cmpwi $taillen,0
+ vncipher $out0,$out0,v25
+ vncipher $out1,$out1,v25
+ vncipher $out2,$out2,v25
+ vncipher $out3,$out3,v25
+ vncipher $out4,$out4,v25
+ vxor $twk0,$twk0,v31
+
+ sub $inp,$inp,r0
+ vncipher $out0,$out0,v26
+ vncipher $out1,$out1,v26
+ vncipher $out2,$out2,v26
+ vncipher $out3,$out3,v26
+ vncipher $out4,$out4,v26
+ vxor $in1,$twk1,v31
+
+ vncipher $out0,$out0,v27
+ lvx_u $in0,0,$inp
+ vncipher $out1,$out1,v27
+ vncipher $out2,$out2,v27
+ vncipher $out3,$out3,v27
+ vncipher $out4,$out4,v27
+ vxor $in2,$twk2,v31
+
+ addi $key_,$sp,$FRAME+15 # rewind $key_
+ vncipher $out0,$out0,v28
+ vncipher $out1,$out1,v28
+ vncipher $out2,$out2,v28
+ vncipher $out3,$out3,v28
+ vncipher $out4,$out4,v28
+ lvx v24,$x00,$key_ # re-pre-load round[1]
+ vxor $in3,$twk3,v31
+
+ vncipher $out0,$out0,v29
+ le?vperm $in0,$in0,$in0,$leperm
+ vncipher $out1,$out1,v29
+ vncipher $out2,$out2,v29
+ vncipher $out3,$out3,v29
+ vncipher $out4,$out4,v29
+ lvx v25,$x10,$key_ # re-pre-load round[2]
+ vxor $in4,$twk4,v31
+
+ vncipher $out0,$out0,v30
+ vncipher $out1,$out1,v30
+ vncipher $out2,$out2,v30
+ vncipher $out3,$out3,v30
+ vncipher $out4,$out4,v30
+
+ vncipherlast $out0,$out0,$twk0
+ vncipherlast $out1,$out1,$in1
+ vncipherlast $out2,$out2,$in2
+ vncipherlast $out3,$out3,$in3
+ vncipherlast $out4,$out4,$in4
+ mtctr $rounds
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
+___
+}} }}}
+
+my $consts=1;
+foreach(split("\n",$code)) {
+ s/\`([^\`]*)\`/eval($1)/geo;
+
+ # constants table endian-specific conversion
+ if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
+ my $conv=$3;
+ my @bytes=();
+
+ # convert to endian-agnostic format
+ if ($1 eq "long") {
+ foreach (split(/,\s*/,$2)) {
+ my $l = /^0/?oct:int;
+ push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
+ }
+ } else {
+ @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
+ }
+
+ # little-endian conversion
+ if ($flavour =~ /le$/o) {
+ SWITCH: for($conv) {
+ /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
+ /\?rev/ && do { @bytes=reverse(@bytes); last; };
+ }
+ }
+
+ #emit
+ print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
+ next;
+ }
+ $consts=0 if (m/Lconsts:/o); # end of table
+
+ # instructions prefixed with '?' are endian-specific and need
+ # to be adjusted accordingly...
+ if ($flavour =~ /le$/o) { # little-endian
+ s/le\?//o or
+ s/be\?/#be#/o or
+ s/\?lvsr/lvsl/o or
+ s/\?lvsl/lvsr/o or
+ s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
+ s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
+ s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
+ } else { # big-endian
+ s/le\?/#le#/o or
+ s/be\?//o or
+ s/\?([a-z]+)/$1/o;
+ }
+
+ print $_,"\n";
+}
+
+close STDOUT;
diff --git a/src/crypto/aes/asm/aesv8-armx.pl b/src/crypto/aes/asm/aesv8-armx.pl
index a180426..f6d0dab 100644
--- a/src/crypto/aes/asm/aesv8-armx.pl
+++ b/src/crypto/aes/asm/aesv8-armx.pl
@@ -42,7 +42,7 @@
open OUT,"| \"$^X\" $xlate $flavour $output";
*STDOUT=*OUT;
-$prefix="aes_v8";
+$prefix="aes_hw";
$code=<<___;
#include <openssl/arm_arch.h>
diff --git a/src/crypto/aes/key_wrap.c b/src/crypto/aes/key_wrap.c
new file mode 100644
index 0000000..e955c47
--- /dev/null
+++ b/src/crypto/aes/key_wrap.c
@@ -0,0 +1,134 @@
+/* ====================================================================
+ * Copyright (c) 2001-2011 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ==================================================================== */
+
+#include <openssl/aes.h>
+
+#include <limits.h>
+#include <string.h>
+
+#include <openssl/mem.h>
+
+
+/* kDefaultIV is the default IV value given in RFC 3394, 2.2.3.1. */
+static const uint8_t kDefaultIV[] = {
+ 0xa6, 0xa6, 0xa6, 0xa6, 0xa6, 0xa6, 0xa6, 0xa6,
+};
+
+int AES_wrap_key(const AES_KEY *key, const uint8_t *iv, uint8_t *out,
+ const uint8_t *in, size_t in_len) {
+ /* See RFC 3394, section 2.2.1. */
+
+ if (in_len > INT_MAX - 8 || in_len < 8 || in_len % 8 != 0) {
+ return -1;
+ }
+
+ if (iv == NULL) {
+ iv = kDefaultIV;
+ }
+
+ memmove(out + 8, in, in_len);
+ uint8_t A[AES_BLOCK_SIZE];
+ memcpy(A, iv, 8);
+
+ size_t n = in_len / 8;
+
+ for (unsigned j = 0; j < 6; j++) {
+ for (size_t i = 1; i <= n; i++) {
+ memcpy(A + 8, out + 8 * i, 8);
+ AES_encrypt(A, A, key);
+
+ uint32_t t = (uint32_t)(n * j + i);
+ A[7] ^= t & 0xff;
+ A[6] ^= (t >> 8) & 0xff;
+ A[5] ^= (t >> 16) & 0xff;
+ A[4] ^= (t >> 24) & 0xff;
+ memcpy(out + 8 * i, A + 8, 8);
+ }
+ }
+
+ memcpy(out, A, 8);
+ return (int)in_len + 8;
+}
+
+int AES_unwrap_key(const AES_KEY *key, const uint8_t *iv, uint8_t *out,
+ const uint8_t *in, size_t in_len) {
+ /* See RFC 3394, section 2.2.2. */
+
+ if (in_len > INT_MAX || in_len < 16 || in_len % 8 != 0) {
+ return -1;
+ }
+
+ if (iv == NULL) {
+ iv = kDefaultIV;
+ }
+
+ uint8_t A[AES_BLOCK_SIZE];
+ memcpy(A, in, 8);
+ memmove(out, in + 8, in_len - 8);
+
+ size_t n = (in_len / 8) - 1;
+
+ for (unsigned j = 5; j < 6; j--) {
+ for (size_t i = n; i > 0; i--) {
+ uint32_t t = (uint32_t)(n * j + i);
+ A[7] ^= t & 0xff;
+ A[6] ^= (t >> 8) & 0xff;
+ A[5] ^= (t >> 16) & 0xff;
+ A[4] ^= (t >> 24) & 0xff;
+ memcpy(A + 8, out + 8 * (i - 1), 8);
+ AES_decrypt(A, A, key);
+ memcpy(out + 8 * (i - 1), A + 8, 8);
+ }
+ }
+
+ if (CRYPTO_memcmp(A, iv, 8) != 0) {
+ return -1;
+ }
+
+ return (int)in_len - 8;
+}
diff --git a/src/crypto/bn/bn_test.cc b/src/crypto/bn/bn_test.cc
index 0a359bd..0488810 100644
--- a/src/crypto/bn/bn_test.cc
+++ b/src/crypto/bn/bn_test.cc
@@ -340,7 +340,7 @@
BN_zero(zero.get());
bssl::UniquePtr<BIGNUM> ret(BN_new()), remainder(BN_new());
- if (!ret ||
+ if (!ret || !remainder ||
!BN_sqr(ret.get(), a.get(), ctx) ||
!ExpectBIGNUMsEqual(t, "A^2", square.get(), ret.get()) ||
!BN_mul(ret.get(), a.get(), a.get(), ctx) ||
@@ -876,6 +876,10 @@
for (size_t i = 0; i < OPENSSL_ARRAY_SIZE(kMPITests); i++) {
const MPITest &test = kMPITests[i];
bssl::UniquePtr<BIGNUM> bn(ASCIIToBIGNUM(test.base10));
+ if (!bn) {
+ return false;
+ }
+
const size_t mpi_len = BN_bn2mpi(bn.get(), NULL);
if (mpi_len > sizeof(scratch)) {
fprintf(stderr, "MPI test #%u: MPI size is too large to test.\n",
diff --git a/src/crypto/bytestring/bytestring_test.cc b/src/crypto/bytestring/bytestring_test.cc
index 251aa37..563c6b0 100644
--- a/src/crypto/bytestring/bytestring_test.cc
+++ b/src/crypto/bytestring/bytestring_test.cc
@@ -231,6 +231,25 @@
return false;
}
+ unsigned tag;
+ CBS_init(&data, kData1, sizeof(kData1));
+ if (!CBS_get_any_asn1(&data, &contents, &tag) ||
+ tag != CBS_ASN1_SEQUENCE ||
+ CBS_len(&contents) != 2 ||
+ memcmp(CBS_data(&contents), "\x01\x02", 2) != 0) {
+ return false;
+ }
+
+ size_t header_len;
+ CBS_init(&data, kData1, sizeof(kData1));
+ if (!CBS_get_any_asn1_element(&data, &contents, &tag, &header_len) ||
+ tag != CBS_ASN1_SEQUENCE ||
+ header_len != 2 ||
+ CBS_len(&contents) != 4 ||
+ memcmp(CBS_data(&contents), "\x30\x02\x01\x02", 2) != 0) {
+ return false;
+ }
+
return true;
}
diff --git a/src/crypto/bytestring/cbs.c b/src/crypto/bytestring/cbs.c
index a7adc43..f702f06 100644
--- a/src/crypto/bytestring/cbs.c
+++ b/src/crypto/bytestring/cbs.c
@@ -262,6 +262,20 @@
return CBS_get_bytes(cbs, out, len);
}
+int CBS_get_any_asn1(CBS *cbs, CBS *out, unsigned *out_tag) {
+ size_t header_len;
+ if (!CBS_get_any_asn1_element(cbs, out, out_tag, &header_len)) {
+ return 0;
+ }
+
+ if (!CBS_skip(out, header_len)) {
+ assert(0);
+ return 0;
+ }
+
+ return 1;
+}
+
int CBS_get_any_asn1_element(CBS *cbs, CBS *out, unsigned *out_tag,
size_t *out_header_len) {
return cbs_get_any_asn1_element(cbs, out, out_tag, out_header_len,
diff --git a/src/crypto/cipher/aead_test.cc b/src/crypto/cipher/aead_test.cc
index 0dbb8c6..b33a36d 100644
--- a/src/crypto/cipher/aead_test.cc
+++ b/src/crypto/cipher/aead_test.cc
@@ -316,8 +316,6 @@
{ "aes-128-cbc-sha1-ssl3", EVP_aead_aes_128_cbc_sha1_ssl3, true },
{ "aes-256-cbc-sha1-ssl3", EVP_aead_aes_256_cbc_sha1_ssl3, true },
{ "des-ede3-cbc-sha1-ssl3", EVP_aead_des_ede3_cbc_sha1_ssl3, true },
- { "aes-128-key-wrap", EVP_aead_aes_128_key_wrap, true },
- { "aes-256-key-wrap", EVP_aead_aes_256_key_wrap, true },
{ "aes-128-ctr-hmac-sha256", EVP_aead_aes_128_ctr_hmac_sha256, false },
{ "aes-256-ctr-hmac-sha256", EVP_aead_aes_256_ctr_hmac_sha256, false },
{ "", NULL, false },
diff --git a/src/crypto/cipher/e_aes.c b/src/crypto/cipher/e_aes.c
index 6e9ea9f..9225d6a 100644
--- a/src/crypto/cipher/e_aes.c
+++ b/src/crypto/cipher/e_aes.c
@@ -125,18 +125,15 @@
return CRYPTO_is_ARMv8_AES_capable();
}
-int aes_v8_set_encrypt_key(const uint8_t *user_key, const int bits,
- AES_KEY *key);
-int aes_v8_set_decrypt_key(const uint8_t *user_key, const int bits,
- AES_KEY *key);
-void aes_v8_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
-void aes_v8_decrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
-void aes_v8_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t length,
- const AES_KEY *key, uint8_t *ivec, const int enc);
-void aes_v8_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, size_t len,
- const AES_KEY *key, const uint8_t ivec[16]);
+#elif !defined(OPENSSL_NO_ASM) && defined(OPENSSL_PPC64LE)
-#endif /* OPENSSL_ARM */
+#define HWAES
+static int hwaes_capable(void) {
+ return CRYPTO_is_PPC64LE_vcrypto_capable();
+}
+
+#endif /* OPENSSL_PPC64LE */
+
#if defined(BSAES)
/* On platforms where BSAES gets defined (just above), then these functions are
@@ -202,39 +199,50 @@
}
#endif
-#if !defined(HWAES)
+#if defined(HWAES)
+int aes_hw_set_encrypt_key(const uint8_t *user_key, const int bits,
+ AES_KEY *key);
+int aes_hw_set_decrypt_key(const uint8_t *user_key, const int bits,
+ AES_KEY *key);
+void aes_hw_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
+void aes_hw_decrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
+void aes_hw_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t length,
+ const AES_KEY *key, uint8_t *ivec, const int enc);
+void aes_hw_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, size_t len,
+ const AES_KEY *key, const uint8_t ivec[16]);
+#else
/* If HWAES isn't defined then we provide dummy functions for each of the hwaes
* functions. */
static int hwaes_capable(void) {
return 0;
}
-static int aes_v8_set_encrypt_key(const uint8_t *user_key, int bits,
+static int aes_hw_set_encrypt_key(const uint8_t *user_key, int bits,
AES_KEY *key) {
abort();
}
-static int aes_v8_set_decrypt_key(const uint8_t *user_key, int bits,
+static int aes_hw_set_decrypt_key(const uint8_t *user_key, int bits,
AES_KEY *key) {
abort();
}
-static void aes_v8_encrypt(const uint8_t *in, uint8_t *out,
+static void aes_hw_encrypt(const uint8_t *in, uint8_t *out,
const AES_KEY *key) {
abort();
}
-static void aes_v8_decrypt(const uint8_t *in, uint8_t *out,
+static void aes_hw_decrypt(const uint8_t *in, uint8_t *out,
const AES_KEY *key) {
abort();
}
-static void aes_v8_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t length,
+static void aes_hw_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t length,
const AES_KEY *key, uint8_t *ivec, int enc) {
abort();
}
-static void aes_v8_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out,
+static void aes_hw_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out,
size_t len, const AES_KEY *key,
const uint8_t ivec[16]) {
abort();
@@ -281,11 +289,11 @@
mode = ctx->cipher->flags & EVP_CIPH_MODE_MASK;
if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE) && !enc) {
if (hwaes_capable()) {
- ret = aes_v8_set_decrypt_key(key, ctx->key_len * 8, &dat->ks.ks);
- dat->block = (block128_f)aes_v8_decrypt;
+ ret = aes_hw_set_decrypt_key(key, ctx->key_len * 8, &dat->ks.ks);
+ dat->block = (block128_f)aes_hw_decrypt;
dat->stream.cbc = NULL;
if (mode == EVP_CIPH_CBC_MODE) {
- dat->stream.cbc = (cbc128_f)aes_v8_cbc_encrypt;
+ dat->stream.cbc = (cbc128_f)aes_hw_cbc_encrypt;
}
} else if (bsaes_capable() && mode == EVP_CIPH_CBC_MODE) {
ret = AES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks.ks);
@@ -303,13 +311,13 @@
mode == EVP_CIPH_CBC_MODE ? (cbc128_f)AES_cbc_encrypt : NULL;
}
} else if (hwaes_capable()) {
- ret = aes_v8_set_encrypt_key(key, ctx->key_len * 8, &dat->ks.ks);
- dat->block = (block128_f)aes_v8_encrypt;
+ ret = aes_hw_set_encrypt_key(key, ctx->key_len * 8, &dat->ks.ks);
+ dat->block = (block128_f)aes_hw_encrypt;
dat->stream.cbc = NULL;
if (mode == EVP_CIPH_CBC_MODE) {
- dat->stream.cbc = (cbc128_f)aes_v8_cbc_encrypt;
+ dat->stream.cbc = (cbc128_f)aes_hw_cbc_encrypt;
} else if (mode == EVP_CIPH_CTR_MODE) {
- dat->stream.ctr = (ctr128_f)aes_v8_ctr32_encrypt_blocks;
+ dat->stream.ctr = (ctr128_f)aes_hw_ctr32_encrypt_blocks;
}
} else if (bsaes_capable() && mode == EVP_CIPH_CTR_MODE) {
ret = AES_set_encrypt_key(key, ctx->key_len * 8, &dat->ks.ks);
@@ -406,14 +414,14 @@
}
if (hwaes_capable()) {
- aes_v8_set_encrypt_key(key, key_len * 8, aes_key);
+ aes_hw_set_encrypt_key(key, key_len * 8, aes_key);
if (gcm_ctx != NULL) {
- CRYPTO_gcm128_init(gcm_ctx, aes_key, (block128_f)aes_v8_encrypt);
+ CRYPTO_gcm128_init(gcm_ctx, aes_key, (block128_f)aes_hw_encrypt);
}
if (out_block) {
- *out_block = (block128_f) aes_v8_encrypt;
+ *out_block = (block128_f) aes_hw_encrypt;
}
- return (ctr128_f)aes_v8_ctr32_encrypt_blocks;
+ return (ctr128_f)aes_hw_ctr32_encrypt_blocks;
}
if (bsaes_capable()) {
@@ -1171,266 +1179,6 @@
const EVP_AEAD *EVP_aead_aes_256_gcm(void) { return &aead_aes_256_gcm; }
-/* AES Key Wrap is specified in
- * http://csrc.nist.gov/groups/ST/toolkit/documents/kms/key-wrap.pdf
- * or https://tools.ietf.org/html/rfc3394 */
-
-struct aead_aes_key_wrap_ctx {
- uint8_t key[32];
- unsigned key_bits;
-};
-
-static int aead_aes_key_wrap_init(EVP_AEAD_CTX *ctx, const uint8_t *key,
- size_t key_len, size_t tag_len) {
- struct aead_aes_key_wrap_ctx *kw_ctx;
- const size_t key_bits = key_len * 8;
-
- if (key_bits != 128 && key_bits != 256) {
- OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BAD_KEY_LENGTH);
- return 0; /* EVP_AEAD_CTX_init should catch this. */
- }
-
- if (tag_len == EVP_AEAD_DEFAULT_TAG_LENGTH) {
- tag_len = 8;
- }
-
- if (tag_len != 8) {
- OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_UNSUPPORTED_TAG_SIZE);
- return 0;
- }
-
- kw_ctx = OPENSSL_malloc(sizeof(struct aead_aes_key_wrap_ctx));
- if (kw_ctx == NULL) {
- OPENSSL_PUT_ERROR(CIPHER, ERR_R_MALLOC_FAILURE);
- return 0;
- }
-
- memcpy(kw_ctx->key, key, key_len);
- kw_ctx->key_bits = key_bits;
-
- ctx->aead_state = kw_ctx;
- return 1;
-}
-
-static void aead_aes_key_wrap_cleanup(EVP_AEAD_CTX *ctx) {
- struct aead_aes_key_wrap_ctx *kw_ctx = ctx->aead_state;
- OPENSSL_cleanse(kw_ctx, sizeof(struct aead_aes_key_wrap_ctx));
- OPENSSL_free(kw_ctx);
-}
-
-/* kDefaultAESKeyWrapNonce is the default nonce value given in 2.2.3.1. */
-static const uint8_t kDefaultAESKeyWrapNonce[8] = {0xa6, 0xa6, 0xa6, 0xa6,
- 0xa6, 0xa6, 0xa6, 0xa6};
-
-
-static int aead_aes_key_wrap_seal(const EVP_AEAD_CTX *ctx, uint8_t *out,
- size_t *out_len, size_t max_out_len,
- const uint8_t *nonce, size_t nonce_len,
- const uint8_t *in, size_t in_len,
- const uint8_t *ad, size_t ad_len) {
- const struct aead_aes_key_wrap_ctx *kw_ctx = ctx->aead_state;
- union {
- double align;
- AES_KEY ks;
- } ks;
- /* Variables in this function match up with the variables in the second half
- * of section 2.2.1. */
- unsigned i, j, n;
- uint8_t A[AES_BLOCK_SIZE];
-
- if (ad_len != 0) {
- OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_UNSUPPORTED_AD_SIZE);
- return 0;
- }
-
- if (nonce_len == 0) {
- nonce = kDefaultAESKeyWrapNonce;
- nonce_len = sizeof(kDefaultAESKeyWrapNonce);
- }
-
- if (nonce_len != 8) {
- OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_UNSUPPORTED_NONCE_SIZE);
- return 0;
- }
-
- if (in_len % 8 != 0) {
- OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_UNSUPPORTED_INPUT_SIZE);
- return 0;
- }
-
- /* The code below only handles a 32-bit |t| thus 6*|n| must be less than
- * 2^32, where |n| is |in_len| / 8. So in_len < 4/3 * 2^32 and we
- * conservatively cap it to 2^32-16 to stop 32-bit platforms complaining that
- * a comparison is always true. */
- if (in_len > 0xfffffff0) {
- OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_TOO_LARGE);
- return 0;
- }
-
- n = in_len / 8;
-
- if (n < 2) {
- OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_UNSUPPORTED_INPUT_SIZE);
- return 0;
- }
-
- if (in_len + 8 < in_len) {
- OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_TOO_LARGE);
- return 0;
- }
-
- if (max_out_len < in_len + 8) {
- OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BUFFER_TOO_SMALL);
- return 0;
- }
-
- if (AES_set_encrypt_key(kw_ctx->key, kw_ctx->key_bits, &ks.ks) < 0) {
- OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_AES_KEY_SETUP_FAILED);
- return 0;
- }
-
- memmove(out + 8, in, in_len);
- memcpy(A, nonce, 8);
-
- for (j = 0; j < 6; j++) {
- for (i = 1; i <= n; i++) {
- uint32_t t;
-
- memcpy(A + 8, out + 8 * i, 8);
- AES_encrypt(A, A, &ks.ks);
- t = n * j + i;
- A[7] ^= t & 0xff;
- A[6] ^= (t >> 8) & 0xff;
- A[5] ^= (t >> 16) & 0xff;
- A[4] ^= (t >> 24) & 0xff;
- memcpy(out + 8 * i, A + 8, 8);
- }
- }
-
- memcpy(out, A, 8);
- *out_len = in_len + 8;
- return 1;
-}
-
-static int aead_aes_key_wrap_open(const EVP_AEAD_CTX *ctx, uint8_t *out,
- size_t *out_len, size_t max_out_len,
- const uint8_t *nonce, size_t nonce_len,
- const uint8_t *in, size_t in_len,
- const uint8_t *ad, size_t ad_len) {
- const struct aead_aes_key_wrap_ctx *kw_ctx = ctx->aead_state;
- union {
- double align;
- AES_KEY ks;
- } ks;
- /* Variables in this function match up with the variables in the second half
- * of section 2.2.1. */
- unsigned i, j, n;
- uint8_t A[AES_BLOCK_SIZE];
-
- if (ad_len != 0) {
- OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_UNSUPPORTED_AD_SIZE);
- return 0;
- }
-
- if (nonce_len == 0) {
- nonce = kDefaultAESKeyWrapNonce;
- nonce_len = sizeof(kDefaultAESKeyWrapNonce);
- }
-
- if (nonce_len != 8) {
- OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_UNSUPPORTED_NONCE_SIZE);
- return 0;
- }
-
- if (in_len % 8 != 0) {
- OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_UNSUPPORTED_INPUT_SIZE);
- return 0;
- }
-
- /* The code below only handles a 32-bit |t| thus 6*|n| must be less than
- * 2^32, where |n| is |in_len| / 8. So in_len < 4/3 * 2^32 and we
- * conservatively cap it to 2^32-8 to stop 32-bit platforms complaining that
- * a comparison is always true. */
- if (in_len > 0xfffffff8) {
- OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_TOO_LARGE);
- return 0;
- }
-
- if (in_len < 24) {
- OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BAD_DECRYPT);
- return 0;
- }
-
- n = (in_len / 8) - 1;
-
- if (max_out_len < in_len - 8) {
- OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BUFFER_TOO_SMALL);
- return 0;
- }
-
- if (AES_set_decrypt_key(kw_ctx->key, kw_ctx->key_bits, &ks.ks) < 0) {
- OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_AES_KEY_SETUP_FAILED);
- return 0;
- }
-
- memcpy(A, in, 8);
- memmove(out, in + 8, in_len - 8);
-
- for (j = 5; j < 6; j--) {
- for (i = n; i > 0; i--) {
- uint32_t t;
-
- t = n * j + i;
- A[7] ^= t & 0xff;
- A[6] ^= (t >> 8) & 0xff;
- A[5] ^= (t >> 16) & 0xff;
- A[4] ^= (t >> 24) & 0xff;
- memcpy(A + 8, out + 8 * (i - 1), 8);
- AES_decrypt(A, A, &ks.ks);
- memcpy(out + 8 * (i - 1), A + 8, 8);
- }
- }
-
- if (CRYPTO_memcmp(A, nonce, 8) != 0) {
- OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BAD_DECRYPT);
- return 0;
- }
-
- *out_len = in_len - 8;
- return 1;
-}
-
-static const EVP_AEAD aead_aes_128_key_wrap = {
- 16, /* key len */
- 8, /* nonce len */
- 8, /* overhead */
- 8, /* max tag length */
- aead_aes_key_wrap_init,
- NULL, /* init_with_direction */
- aead_aes_key_wrap_cleanup,
- aead_aes_key_wrap_seal,
- aead_aes_key_wrap_open,
- NULL, /* get_iv */
-};
-
-static const EVP_AEAD aead_aes_256_key_wrap = {
- 32, /* key len */
- 8, /* nonce len */
- 8, /* overhead */
- 8, /* max tag length */
- aead_aes_key_wrap_init,
- NULL, /* init_with_direction */
- aead_aes_key_wrap_cleanup,
- aead_aes_key_wrap_seal,
- aead_aes_key_wrap_open,
- NULL, /* get_iv */
-};
-
-const EVP_AEAD *EVP_aead_aes_128_key_wrap(void) { return &aead_aes_128_key_wrap; }
-
-const EVP_AEAD *EVP_aead_aes_256_key_wrap(void) { return &aead_aes_256_key_wrap; }
-
-
#define EVP_AEAD_AES_CTR_HMAC_SHA256_TAG_LEN SHA256_DIGEST_LENGTH
#define EVP_AEAD_AES_CTR_HMAC_SHA256_NONCE_LEN 12
diff --git a/src/crypto/cipher/test/aes_128_key_wrap_tests.txt b/src/crypto/cipher/test/aes_128_key_wrap_tests.txt
deleted file mode 100644
index 561ec90..0000000
--- a/src/crypto/cipher/test/aes_128_key_wrap_tests.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-# These test vectors have been taken from
-# http://csrc.nist.gov/groups/ST/toolkit/documents/kms/key-wrap.pdf
-
-KEY: 000102030405060708090A0B0C0D0E0F
-NONCE:
-IN: 00112233445566778899AABBCCDDEEFF
-AD:
-CT: 1FA68B0A8112B447AEF34BD8FB5A7B82
-TAG: 9D3E862371D2CFE5
diff --git a/src/crypto/cipher/test/aes_256_key_wrap_tests.txt b/src/crypto/cipher/test/aes_256_key_wrap_tests.txt
deleted file mode 100644
index 92d3a04..0000000
--- a/src/crypto/cipher/test/aes_256_key_wrap_tests.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-# These test vectors have been taken from
-# http://csrc.nist.gov/groups/ST/toolkit/documents/kms/key-wrap.pdf
-
-KEY: 000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F
-NONCE:
-IN: 00112233445566778899AABBCCDDEEFF
-AD:
-CT: 64E8C3F9CE0F5BA263E9777905818A2A
-TAG: 93C8191E7D6E8AE7
-
-KEY: 000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F
-NONCE:
-IN: 00112233445566778899AABBCCDDEEFF0001020304050607
-AD:
-CT: A8F9BC1612C68B3FF6E6F4FBE30E71E4769C8B80A32CB895
-TAG: 8CD5D17D6B254DA1
-
-KEY: 000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F
-NONCE:
-IN: 00112233445566778899AABBCCDDEEFF000102030405060708090A0B0C0D0E0F
-AD:
-CT: 28C9F404C4B810F4CBCCB35CFB87F8263F5786E2D80ED326CBC7F0E71A99F43B
-TAG: FB988B9B7A02DD21
diff --git a/src/crypto/cpu-ppc64le.c b/src/crypto/cpu-ppc64le.c
new file mode 100644
index 0000000..c431c81
--- /dev/null
+++ b/src/crypto/cpu-ppc64le.c
@@ -0,0 +1,40 @@
+/* Copyright (c) 2016, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <openssl/cpu.h>
+
+#if defined(OPENSSL_PPC64LE)
+
+#include <sys/auxv.h>
+
+#include "internal.h"
+
+
+#if !defined(PPC_FEATURE2_HAS_VCRYPTO)
+/* PPC_FEATURE2_HAS_VCRYPTO was taken from section 4.1.2.3 of the “OpenPOWER
+ * ABI for Linux Supplement”. */
+#define PPC_FEATURE2_HAS_VCRYPTO 0x02000000
+#endif
+
+static unsigned long g_ppc64le_hwcap2 = 0;
+
+void OPENSSL_cpuid_setup(void) {
+ g_ppc64le_hwcap2 = getauxval(AT_HWCAP2);
+}
+
+int CRYPTO_is_PPC64LE_vcrypto_capable(void) {
+ return (g_ppc64le_hwcap2 & PPC_FEATURE2_HAS_VCRYPTO) != 0;
+}
+
+#endif /* OPENSSL_PPC64LE */
diff --git a/src/crypto/crypto.c b/src/crypto/crypto.c
index 2443126..c32f514 100644
--- a/src/crypto/crypto.c
+++ b/src/crypto/crypto.c
@@ -21,9 +21,11 @@
#if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_STATIC_ARMCAP) && \
(defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
- defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64))
-/* x86, x86_64 and the ARMs need to record the result of a cpuid call for the
- * asm to work correctly, unless compiled without asm code. */
+ defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) || \
+ defined(OPENSSL_PPC64LE))
+/* x86, x86_64, the ARMs and ppc64le need to record the result of a
+ * cpuid/getauxval call for the asm to work correctly, unless compiled without
+ * asm code. */
#define NEED_CPUID
#else
diff --git a/src/crypto/ec/asm/p256-x86_64-asm.pl b/src/crypto/ec/asm/p256-x86_64-asm.pl
index 2a6e5e2..0a7d640 100755
--- a/src/crypto/ec/asm/p256-x86_64-asm.pl
+++ b/src/crypto/ec/asm/p256-x86_64-asm.pl
@@ -333,7 +333,7 @@
adc \$0, $acc0
########################################################################
- # Second reduction step
+ # Second reduction step
mov $acc1, $t1
shl \$32, $acc1
mulq $poly3
@@ -380,7 +380,7 @@
adc \$0, $acc1
########################################################################
- # Third reduction step
+ # Third reduction step
mov $acc2, $t1
shl \$32, $acc2
mulq $poly3
@@ -427,7 +427,7 @@
adc \$0, $acc2
########################################################################
- # Final reduction step
+ # Final reduction step
mov $acc3, $t1
shl \$32, $acc3
mulq $poly3
@@ -440,7 +440,7 @@
mov $acc5, $t1
adc \$0, $acc2
- ########################################################################
+ ########################################################################
# Branch-less conditional subtraction of P
sub \$-1, $acc4 # .Lpoly[0]
mov $acc0, $t2
@@ -1067,6 +1067,8 @@
mov $acc1, $in_ptr
adc \$0, %rdx
+ ###########################################
+ # Branch-less conditional subtraction
sub \$-1, $acc0
mov $acc2, %rax
sbb $t1, $acc1
@@ -1793,7 +1795,7 @@
movq %xmm1, $r_ptr
call __ecp_nistz256_sqr_mont$x # p256_sqr_mont(res_y, S);
___
-{
+{
######## ecp_nistz256_div_by_2(res_y, res_y); ##########################
# operate in 4-5-6-7 "name space" that matches squaring output
#
@@ -1882,7 +1884,7 @@
lea $M(%rsp), $b_ptr
mov $acc4, $acc6 # harmonize sub output and mul input
xor %ecx, %ecx
- mov $acc4, $S+8*0(%rsp) # have to save:-(
+ mov $acc4, $S+8*0(%rsp) # have to save:-(
mov $acc5, $acc2
mov $acc5, $S+8*1(%rsp)
cmovz $acc0, $acc3
diff --git a/src/crypto/err/ssl.errordata b/src/crypto/err/ssl.errordata
index e19b347..9045e9a 100644
--- a/src/crypto/err/ssl.errordata
+++ b/src/crypto/err/ssl.errordata
@@ -95,6 +95,7 @@
SSL,182,NO_RENEGOTIATION
SSL,183,NO_REQUIRED_DIGEST
SSL,184,NO_SHARED_CIPHER
+SSL,266,NO_SHARED_GROUP
SSL,185,NULL_SSL_CTX
SSL,186,NULL_SSL_METHOD_PASSED
SSL,187,OLD_SESSION_CIPHER_NOT_RETURNED
diff --git a/src/crypto/internal.h b/src/crypto/internal.h
index 8ac60cb..896cc3b 100644
--- a/src/crypto/internal.h
+++ b/src/crypto/internal.h
@@ -141,8 +141,8 @@
#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || defined(OPENSSL_ARM) || \
- defined(OPENSSL_AARCH64)
-/* OPENSSL_cpuid_setup initializes OPENSSL_ia32cap_P. */
+ defined(OPENSSL_AARCH64) || defined(OPENSSL_PPC64LE)
+/* OPENSSL_cpuid_setup initializes the platform-specific feature cache. */
void OPENSSL_cpuid_setup(void);
#endif
diff --git a/src/crypto/modes/CMakeLists.txt b/src/crypto/modes/CMakeLists.txt
index e8aa272..17faa15 100644
--- a/src/crypto/modes/CMakeLists.txt
+++ b/src/crypto/modes/CMakeLists.txt
@@ -34,6 +34,14 @@
)
endif()
+if (${ARCH} STREQUAL "ppc64le")
+ set(
+ MODES_ARCH_SOURCES
+
+ ghashp8-ppc.${ASM_EXT}
+ )
+endif()
+
add_library(
modes
@@ -53,6 +61,7 @@
perlasm(ghash-x86.${ASM_EXT} asm/ghash-x86.pl)
perlasm(ghash-armv4.${ASM_EXT} asm/ghash-armv4.pl)
perlasm(ghashv8-armx.${ASM_EXT} asm/ghashv8-armx.pl)
+perlasm(ghashp8-ppc.${ASM_EXT} asm/ghashp8-ppc.pl)
add_executable(
gcm_test
diff --git a/src/crypto/modes/asm/ghashp8-ppc.pl b/src/crypto/modes/asm/ghashp8-ppc.pl
new file mode 100644
index 0000000..f0598cb
--- /dev/null
+++ b/src/crypto/modes/asm/ghashp8-ppc.pl
@@ -0,0 +1,670 @@
+#! /usr/bin/env perl
+# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# GHASH for for PowerISA v2.07.
+#
+# July 2014
+#
+# Accurate performance measurements are problematic, because it's
+# always virtualized setup with possibly throttled processor.
+# Relative comparison is therefore more informative. This initial
+# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
+# faster than "4-bit" integer-only compiler-generated 64-bit code.
+# "Initial version" means that there is room for futher improvement.
+
+# May 2016
+#
+# 2x aggregated reduction improves performance by 50% (resulting
+# performance on POWER8 is 1 cycle per processed byte), and 4x
+# aggregated reduction - by 170% or 2.7x (resulting in 0.55 cpb).
+
+$flavour=shift;
+$output =shift;
+
+if ($flavour =~ /64/) {
+ $SIZE_T=8;
+ $LRSAVE=2*$SIZE_T;
+ $STU="stdu";
+ $POP="ld";
+ $PUSH="std";
+ $UCMP="cmpld";
+ $SHRI="srdi";
+} elsif ($flavour =~ /32/) {
+ $SIZE_T=4;
+ $LRSAVE=$SIZE_T;
+ $STU="stwu";
+ $POP="lwz";
+ $PUSH="stw";
+ $UCMP="cmplw";
+ $SHRI="srwi";
+} else { die "nonsense $flavour"; }
+
+$sp="r1";
+$FRAME=6*$SIZE_T+13*16; # 13*16 is for v20-v31 offload
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+die "can't locate ppc-xlate.pl";
+
+open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
+
+my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block
+
+my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
+my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
+my ($Xl1,$Xm1,$Xh1,$IN1,$H2,$H2h,$H2l)=map("v$_",(13..19));
+my $vrsave="r12";
+
+$code=<<___;
+.machine "any"
+
+.text
+
+.globl .gcm_init_p8
+.align 5
+.gcm_init_p8:
+ li r0,-4096
+ li r8,0x10
+ mfspr $vrsave,256
+ li r9,0x20
+ mtspr 256,r0
+ li r10,0x30
+ lvx_u $H,0,r4 # load H
+
+ vspltisb $xC2,-16 # 0xf0
+ vspltisb $t0,1 # one
+ vaddubm $xC2,$xC2,$xC2 # 0xe0
+ vxor $zero,$zero,$zero
+ vor $xC2,$xC2,$t0 # 0xe1
+ vsldoi $xC2,$xC2,$zero,15 # 0xe1...
+ vsldoi $t1,$zero,$t0,1 # ...1
+ vaddubm $xC2,$xC2,$xC2 # 0xc2...
+ vspltisb $t2,7
+ vor $xC2,$xC2,$t1 # 0xc2....01
+ vspltb $t1,$H,0 # most significant byte
+ vsl $H,$H,$t0 # H<<=1
+ vsrab $t1,$t1,$t2 # broadcast carry bit
+ vand $t1,$t1,$xC2
+ vxor $IN,$H,$t1 # twisted H
+
+ vsldoi $H,$IN,$IN,8 # twist even more ...
+ vsldoi $xC2,$zero,$xC2,8 # 0xc2.0
+ vsldoi $Hl,$zero,$H,8 # ... and split
+ vsldoi $Hh,$H,$zero,8
+
+ stvx_u $xC2,0,r3 # save pre-computed table
+ stvx_u $Hl,r8,r3
+ li r8,0x40
+ stvx_u $H, r9,r3
+ li r9,0x50
+ stvx_u $Hh,r10,r3
+ li r10,0x60
+
+ vpmsumd $Xl,$IN,$Hl # H.lo·H.lo
+ vpmsumd $Xm,$IN,$H # H.hi·H.lo+H.lo·H.hi
+ vpmsumd $Xh,$IN,$Hh # H.hi·H.hi
+
+ vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
+
+ vsldoi $t0,$Xm,$zero,8
+ vsldoi $t1,$zero,$Xm,8
+ vxor $Xl,$Xl,$t0
+ vxor $Xh,$Xh,$t1
+
+ vsldoi $Xl,$Xl,$Xl,8
+ vxor $Xl,$Xl,$t2
+
+ vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
+ vpmsumd $Xl,$Xl,$xC2
+ vxor $t1,$t1,$Xh
+ vxor $IN1,$Xl,$t1
+
+ vsldoi $H2,$IN1,$IN1,8
+ vsldoi $H2l,$zero,$H2,8
+ vsldoi $H2h,$H2,$zero,8
+
+ stvx_u $H2l,r8,r3 # save H^2
+ li r8,0x70
+ stvx_u $H2,r9,r3
+ li r9,0x80
+ stvx_u $H2h,r10,r3
+ li r10,0x90
+___
+{
+my ($t4,$t5,$t6) = ($Hl,$H,$Hh);
+$code.=<<___;
+ vpmsumd $Xl,$IN,$H2l # H.lo·H^2.lo
+ vpmsumd $Xl1,$IN1,$H2l # H^2.lo·H^2.lo
+ vpmsumd $Xm,$IN,$H2 # H.hi·H^2.lo+H.lo·H^2.hi
+ vpmsumd $Xm1,$IN1,$H2 # H^2.hi·H^2.lo+H^2.lo·H^2.hi
+ vpmsumd $Xh,$IN,$H2h # H.hi·H^2.hi
+ vpmsumd $Xh1,$IN1,$H2h # H^2.hi·H^2.hi
+
+ vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
+ vpmsumd $t6,$Xl1,$xC2 # 1st reduction phase
+
+ vsldoi $t0,$Xm,$zero,8
+ vsldoi $t1,$zero,$Xm,8
+ vsldoi $t4,$Xm1,$zero,8
+ vsldoi $t5,$zero,$Xm1,8
+ vxor $Xl,$Xl,$t0
+ vxor $Xh,$Xh,$t1
+ vxor $Xl1,$Xl1,$t4
+ vxor $Xh1,$Xh1,$t5
+
+ vsldoi $Xl,$Xl,$Xl,8
+ vsldoi $Xl1,$Xl1,$Xl1,8
+ vxor $Xl,$Xl,$t2
+ vxor $Xl1,$Xl1,$t6
+
+ vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
+ vsldoi $t5,$Xl1,$Xl1,8 # 2nd reduction phase
+ vpmsumd $Xl,$Xl,$xC2
+ vpmsumd $Xl1,$Xl1,$xC2
+ vxor $t1,$t1,$Xh
+ vxor $t5,$t5,$Xh1
+ vxor $Xl,$Xl,$t1
+ vxor $Xl1,$Xl1,$t5
+
+ vsldoi $H,$Xl,$Xl,8
+ vsldoi $H2,$Xl1,$Xl1,8
+ vsldoi $Hl,$zero,$H,8
+ vsldoi $Hh,$H,$zero,8
+ vsldoi $H2l,$zero,$H2,8
+ vsldoi $H2h,$H2,$zero,8
+
+ stvx_u $Hl,r8,r3 # save H^3
+ li r8,0xa0
+ stvx_u $H,r9,r3
+ li r9,0xb0
+ stvx_u $Hh,r10,r3
+ li r10,0xc0
+ stvx_u $H2l,r8,r3 # save H^4
+ stvx_u $H2,r9,r3
+ stvx_u $H2h,r10,r3
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,2,0
+ .long 0
+.size .gcm_init_p8,.-.gcm_init_p8
+___
+}
+$code.=<<___;
+.globl .gcm_gmult_p8
+.align 5
+.gcm_gmult_p8:
+ lis r0,0xfff8
+ li r8,0x10
+ mfspr $vrsave,256
+ li r9,0x20
+ mtspr 256,r0
+ li r10,0x30
+ lvx_u $IN,0,$Xip # load Xi
+
+ lvx_u $Hl,r8,$Htbl # load pre-computed table
+ le?lvsl $lemask,r0,r0
+ lvx_u $H, r9,$Htbl
+ le?vspltisb $t0,0x07
+ lvx_u $Hh,r10,$Htbl
+ le?vxor $lemask,$lemask,$t0
+ lvx_u $xC2,0,$Htbl
+ le?vperm $IN,$IN,$IN,$lemask
+ vxor $zero,$zero,$zero
+
+ vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
+ vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
+ vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
+
+ vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
+
+ vsldoi $t0,$Xm,$zero,8
+ vsldoi $t1,$zero,$Xm,8
+ vxor $Xl,$Xl,$t0
+ vxor $Xh,$Xh,$t1
+
+ vsldoi $Xl,$Xl,$Xl,8
+ vxor $Xl,$Xl,$t2
+
+ vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
+ vpmsumd $Xl,$Xl,$xC2
+ vxor $t1,$t1,$Xh
+ vxor $Xl,$Xl,$t1
+
+ le?vperm $Xl,$Xl,$Xl,$lemask
+ stvx_u $Xl,0,$Xip # write out Xi
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,2,0
+ .long 0
+.size .gcm_gmult_p8,.-.gcm_gmult_p8
+
+.globl .gcm_ghash_p8
+.align 5
+.gcm_ghash_p8:
+ li r0,-4096
+ li r8,0x10
+ mfspr $vrsave,256
+ li r9,0x20
+ mtspr 256,r0
+ li r10,0x30
+ lvx_u $Xl,0,$Xip # load Xi
+
+ lvx_u $Hl,r8,$Htbl # load pre-computed table
+ li r8,0x40
+ le?lvsl $lemask,r0,r0
+ lvx_u $H, r9,$Htbl
+ li r9,0x50
+ le?vspltisb $t0,0x07
+ lvx_u $Hh,r10,$Htbl
+ li r10,0x60
+ le?vxor $lemask,$lemask,$t0
+ lvx_u $xC2,0,$Htbl
+ le?vperm $Xl,$Xl,$Xl,$lemask
+ vxor $zero,$zero,$zero
+
+ ${UCMP}i $len,64
+ bge Lgcm_ghash_p8_4x
+
+ lvx_u $IN,0,$inp
+ addi $inp,$inp,16
+ subic. $len,$len,16
+ le?vperm $IN,$IN,$IN,$lemask
+ vxor $IN,$IN,$Xl
+ beq Lshort
+
+ lvx_u $H2l,r8,$Htbl # load H^2
+ li r8,16
+ lvx_u $H2, r9,$Htbl
+ add r9,$inp,$len # end of input
+ lvx_u $H2h,r10,$Htbl
+ be?b Loop_2x
+
+.align 5
+Loop_2x:
+ lvx_u $IN1,0,$inp
+ le?vperm $IN1,$IN1,$IN1,$lemask
+
+ subic $len,$len,32
+ vpmsumd $Xl,$IN,$H2l # H^2.lo·Xi.lo
+ vpmsumd $Xl1,$IN1,$Hl # H.lo·Xi+1.lo
+ subfe r0,r0,r0 # borrow?-1:0
+ vpmsumd $Xm,$IN,$H2 # H^2.hi·Xi.lo+H^2.lo·Xi.hi
+ vpmsumd $Xm1,$IN1,$H # H.hi·Xi+1.lo+H.lo·Xi+1.hi
+ and r0,r0,$len
+ vpmsumd $Xh,$IN,$H2h # H^2.hi·Xi.hi
+ vpmsumd $Xh1,$IN1,$Hh # H.hi·Xi+1.hi
+ add $inp,$inp,r0
+
+ vxor $Xl,$Xl,$Xl1
+ vxor $Xm,$Xm,$Xm1
+
+ vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
+
+ vsldoi $t0,$Xm,$zero,8
+ vsldoi $t1,$zero,$Xm,8
+ vxor $Xh,$Xh,$Xh1
+ vxor $Xl,$Xl,$t0
+ vxor $Xh,$Xh,$t1
+
+ vsldoi $Xl,$Xl,$Xl,8
+ vxor $Xl,$Xl,$t2
+ lvx_u $IN,r8,$inp
+ addi $inp,$inp,32
+
+ vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
+ vpmsumd $Xl,$Xl,$xC2
+ le?vperm $IN,$IN,$IN,$lemask
+ vxor $t1,$t1,$Xh
+ vxor $IN,$IN,$t1
+ vxor $IN,$IN,$Xl
+ $UCMP r9,$inp
+ bgt Loop_2x # done yet?
+
+ cmplwi $len,0
+ bne Leven
+
+Lshort:
+ vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
+ vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
+ vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
+
+ vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
+
+ vsldoi $t0,$Xm,$zero,8
+ vsldoi $t1,$zero,$Xm,8
+ vxor $Xl,$Xl,$t0
+ vxor $Xh,$Xh,$t1
+
+ vsldoi $Xl,$Xl,$Xl,8
+ vxor $Xl,$Xl,$t2
+
+ vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
+ vpmsumd $Xl,$Xl,$xC2
+ vxor $t1,$t1,$Xh
+
+Leven:
+ vxor $Xl,$Xl,$t1
+ le?vperm $Xl,$Xl,$Xl,$lemask
+ stvx_u $Xl,0,$Xip # write out Xi
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,4,0
+ .long 0
+___
+{
+my ($Xl3,$Xm2,$IN2,$H3l,$H3,$H3h,
+ $Xh3,$Xm3,$IN3,$H4l,$H4,$H4h) = map("v$_",(20..31));
+my $IN0=$IN;
+my ($H21l,$H21h,$loperm,$hiperm) = ($Hl,$Hh,$H2l,$H2h);
+
+$code.=<<___;
+.align 5
+.gcm_ghash_p8_4x:
+Lgcm_ghash_p8_4x:
+ $STU $sp,-$FRAME($sp)
+ li r10,`15+6*$SIZE_T`
+ li r11,`31+6*$SIZE_T`
+ stvx v20,r10,$sp
+ addi r10,r10,32
+ stvx v21,r11,$sp
+ addi r11,r11,32
+ stvx v22,r10,$sp
+ addi r10,r10,32
+ stvx v23,r11,$sp
+ addi r11,r11,32
+ stvx v24,r10,$sp
+ addi r10,r10,32
+ stvx v25,r11,$sp
+ addi r11,r11,32
+ stvx v26,r10,$sp
+ addi r10,r10,32
+ stvx v27,r11,$sp
+ addi r11,r11,32
+ stvx v28,r10,$sp
+ addi r10,r10,32
+ stvx v29,r11,$sp
+ addi r11,r11,32
+ stvx v30,r10,$sp
+ li r10,0x60
+ stvx v31,r11,$sp
+ li r0,-1
+ stw $vrsave,`$FRAME-4`($sp) # save vrsave
+ mtspr 256,r0 # preserve all AltiVec registers
+
+ lvsl $t0,0,r8 # 0x0001..0e0f
+ #lvx_u $H2l,r8,$Htbl # load H^2
+ li r8,0x70
+ lvx_u $H2, r9,$Htbl
+ li r9,0x80
+ vspltisb $t1,8 # 0x0808..0808
+ #lvx_u $H2h,r10,$Htbl
+ li r10,0x90
+ lvx_u $H3l,r8,$Htbl # load H^3
+ li r8,0xa0
+ lvx_u $H3, r9,$Htbl
+ li r9,0xb0
+ lvx_u $H3h,r10,$Htbl
+ li r10,0xc0
+ lvx_u $H4l,r8,$Htbl # load H^4
+ li r8,0x10
+ lvx_u $H4, r9,$Htbl
+ li r9,0x20
+ lvx_u $H4h,r10,$Htbl
+ li r10,0x30
+
+ vsldoi $t2,$zero,$t1,8 # 0x0000..0808
+ vaddubm $hiperm,$t0,$t2 # 0x0001..1617
+ vaddubm $loperm,$t1,$hiperm # 0x0809..1e1f
+
+ $SHRI $len,$len,4 # this allows to use sign bit
+ # as carry
+ lvx_u $IN0,0,$inp # load input
+ lvx_u $IN1,r8,$inp
+ subic. $len,$len,8
+ lvx_u $IN2,r9,$inp
+ lvx_u $IN3,r10,$inp
+ addi $inp,$inp,0x40
+ le?vperm $IN0,$IN0,$IN0,$lemask
+ le?vperm $IN1,$IN1,$IN1,$lemask
+ le?vperm $IN2,$IN2,$IN2,$lemask
+ le?vperm $IN3,$IN3,$IN3,$lemask
+
+ vxor $Xh,$IN0,$Xl
+
+ vpmsumd $Xl1,$IN1,$H3l
+ vpmsumd $Xm1,$IN1,$H3
+ vpmsumd $Xh1,$IN1,$H3h
+
+ vperm $H21l,$H2,$H,$hiperm
+ vperm $t0,$IN2,$IN3,$loperm
+ vperm $H21h,$H2,$H,$loperm
+ vperm $t1,$IN2,$IN3,$hiperm
+ vpmsumd $Xm2,$IN2,$H2 # H^2.lo·Xi+2.hi+H^2.hi·Xi+2.lo
+ vpmsumd $Xl3,$t0,$H21l # H^2.lo·Xi+2.lo+H.lo·Xi+3.lo
+ vpmsumd $Xm3,$IN3,$H # H.hi·Xi+3.lo +H.lo·Xi+3.hi
+ vpmsumd $Xh3,$t1,$H21h # H^2.hi·Xi+2.hi+H.hi·Xi+3.hi
+
+ vxor $Xm2,$Xm2,$Xm1
+ vxor $Xl3,$Xl3,$Xl1
+ vxor $Xm3,$Xm3,$Xm2
+ vxor $Xh3,$Xh3,$Xh1
+
+ blt Ltail_4x
+
+Loop_4x:
+ lvx_u $IN0,0,$inp
+ lvx_u $IN1,r8,$inp
+ subic. $len,$len,4
+ lvx_u $IN2,r9,$inp
+ lvx_u $IN3,r10,$inp
+ addi $inp,$inp,0x40
+ le?vperm $IN1,$IN1,$IN1,$lemask
+ le?vperm $IN2,$IN2,$IN2,$lemask
+ le?vperm $IN3,$IN3,$IN3,$lemask
+ le?vperm $IN0,$IN0,$IN0,$lemask
+
+ vpmsumd $Xl,$Xh,$H4l # H^4.lo·Xi.lo
+ vpmsumd $Xm,$Xh,$H4 # H^4.hi·Xi.lo+H^4.lo·Xi.hi
+ vpmsumd $Xh,$Xh,$H4h # H^4.hi·Xi.hi
+ vpmsumd $Xl1,$IN1,$H3l
+ vpmsumd $Xm1,$IN1,$H3
+ vpmsumd $Xh1,$IN1,$H3h
+
+ vxor $Xl,$Xl,$Xl3
+ vxor $Xm,$Xm,$Xm3
+ vxor $Xh,$Xh,$Xh3
+ vperm $t0,$IN2,$IN3,$loperm
+ vperm $t1,$IN2,$IN3,$hiperm
+
+ vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
+ vpmsumd $Xl3,$t0,$H21l # H.lo·Xi+3.lo +H^2.lo·Xi+2.lo
+ vpmsumd $Xh3,$t1,$H21h # H.hi·Xi+3.hi +H^2.hi·Xi+2.hi
+
+ vsldoi $t0,$Xm,$zero,8
+ vsldoi $t1,$zero,$Xm,8
+ vxor $Xl,$Xl,$t0
+ vxor $Xh,$Xh,$t1
+
+ vsldoi $Xl,$Xl,$Xl,8
+ vxor $Xl,$Xl,$t2
+
+ vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
+ vpmsumd $Xm2,$IN2,$H2 # H^2.hi·Xi+2.lo+H^2.lo·Xi+2.hi
+ vpmsumd $Xm3,$IN3,$H # H.hi·Xi+3.lo +H.lo·Xi+3.hi
+ vpmsumd $Xl,$Xl,$xC2
+
+ vxor $Xl3,$Xl3,$Xl1
+ vxor $Xh3,$Xh3,$Xh1
+ vxor $Xh,$Xh,$IN0
+ vxor $Xm2,$Xm2,$Xm1
+ vxor $Xh,$Xh,$t1
+ vxor $Xm3,$Xm3,$Xm2
+ vxor $Xh,$Xh,$Xl
+ bge Loop_4x
+
+Ltail_4x:
+ vpmsumd $Xl,$Xh,$H4l # H^4.lo·Xi.lo
+ vpmsumd $Xm,$Xh,$H4 # H^4.hi·Xi.lo+H^4.lo·Xi.hi
+ vpmsumd $Xh,$Xh,$H4h # H^4.hi·Xi.hi
+
+ vxor $Xl,$Xl,$Xl3
+ vxor $Xm,$Xm,$Xm3
+
+ vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
+
+ vsldoi $t0,$Xm,$zero,8
+ vsldoi $t1,$zero,$Xm,8
+ vxor $Xh,$Xh,$Xh3
+ vxor $Xl,$Xl,$t0
+ vxor $Xh,$Xh,$t1
+
+ vsldoi $Xl,$Xl,$Xl,8
+ vxor $Xl,$Xl,$t2
+
+ vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
+ vpmsumd $Xl,$Xl,$xC2
+ vxor $t1,$t1,$Xh
+ vxor $Xl,$Xl,$t1
+
+ addic. $len,$len,4
+ beq Ldone_4x
+
+ lvx_u $IN0,0,$inp
+ ${UCMP}i $len,2
+ li $len,-4
+ blt Lone
+ lvx_u $IN1,r8,$inp
+ beq Ltwo
+
+Lthree:
+ lvx_u $IN2,r9,$inp
+ le?vperm $IN0,$IN0,$IN0,$lemask
+ le?vperm $IN1,$IN1,$IN1,$lemask
+ le?vperm $IN2,$IN2,$IN2,$lemask
+
+ vxor $Xh,$IN0,$Xl
+ vmr $H4l,$H3l
+ vmr $H4, $H3
+ vmr $H4h,$H3h
+
+ vperm $t0,$IN1,$IN2,$loperm
+ vperm $t1,$IN1,$IN2,$hiperm
+ vpmsumd $Xm2,$IN1,$H2 # H^2.lo·Xi+1.hi+H^2.hi·Xi+1.lo
+ vpmsumd $Xm3,$IN2,$H # H.hi·Xi+2.lo +H.lo·Xi+2.hi
+ vpmsumd $Xl3,$t0,$H21l # H^2.lo·Xi+1.lo+H.lo·Xi+2.lo
+ vpmsumd $Xh3,$t1,$H21h # H^2.hi·Xi+1.hi+H.hi·Xi+2.hi
+
+ vxor $Xm3,$Xm3,$Xm2
+ b Ltail_4x
+
+.align 4
+Ltwo:
+ le?vperm $IN0,$IN0,$IN0,$lemask
+ le?vperm $IN1,$IN1,$IN1,$lemask
+
+ vxor $Xh,$IN0,$Xl
+ vperm $t0,$zero,$IN1,$loperm
+ vperm $t1,$zero,$IN1,$hiperm
+
+ vsldoi $H4l,$zero,$H2,8
+ vmr $H4, $H2
+ vsldoi $H4h,$H2,$zero,8
+
+ vpmsumd $Xl3,$t0, $H21l # H.lo·Xi+1.lo
+ vpmsumd $Xm3,$IN1,$H # H.hi·Xi+1.lo+H.lo·Xi+2.hi
+ vpmsumd $Xh3,$t1, $H21h # H.hi·Xi+1.hi
+
+ b Ltail_4x
+
+.align 4
+Lone:
+ le?vperm $IN0,$IN0,$IN0,$lemask
+
+ vsldoi $H4l,$zero,$H,8
+ vmr $H4, $H
+ vsldoi $H4h,$H,$zero,8
+
+ vxor $Xh,$IN0,$Xl
+ vxor $Xl3,$Xl3,$Xl3
+ vxor $Xm3,$Xm3,$Xm3
+ vxor $Xh3,$Xh3,$Xh3
+
+ b Ltail_4x
+
+Ldone_4x:
+ le?vperm $Xl,$Xl,$Xl,$lemask
+ stvx_u $Xl,0,$Xip # write out Xi
+
+ li r10,`15+6*$SIZE_T`
+ li r11,`31+6*$SIZE_T`
+ mtspr 256,$vrsave
+ lvx v20,r10,$sp
+ addi r10,r10,32
+ lvx v21,r11,$sp
+ addi r11,r11,32
+ lvx v22,r10,$sp
+ addi r10,r10,32
+ lvx v23,r11,$sp
+ addi r11,r11,32
+ lvx v24,r10,$sp
+ addi r10,r10,32
+ lvx v25,r11,$sp
+ addi r11,r11,32
+ lvx v26,r10,$sp
+ addi r10,r10,32
+ lvx v27,r11,$sp
+ addi r11,r11,32
+ lvx v28,r10,$sp
+ addi r10,r10,32
+ lvx v29,r11,$sp
+ addi r11,r11,32
+ lvx v30,r10,$sp
+ lvx v31,r11,$sp
+ addi $sp,$sp,$FRAME
+ blr
+ .long 0
+ .byte 0,12,0x04,0,0x80,0,4,0
+ .long 0
+___
+}
+$code.=<<___;
+.size .gcm_ghash_p8,.-.gcm_ghash_p8
+
+.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
+.align 2
+___
+
+foreach (split("\n",$code)) {
+ s/\`([^\`]*)\`/eval $1/geo;
+
+ if ($flavour =~ /le$/o) { # little-endian
+ s/le\?//o or
+ s/be\?/#be#/o;
+ } else {
+ s/le\?/#le#/o or
+ s/be\?//o;
+ }
+ print $_,"\n";
+}
+
+close STDOUT; # enforce flush
diff --git a/src/crypto/modes/gcm.c b/src/crypto/modes/gcm.c
index 1b43cd4..69bdfdc 100644
--- a/src/crypto/modes/gcm.c
+++ b/src/crypto/modes/gcm.c
@@ -60,7 +60,8 @@
#if !defined(OPENSSL_NO_ASM) && \
(defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
- defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64))
+ defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) || \
+ defined(OPENSSL_PPC64LE))
#define GHASH_ASM
#endif
@@ -145,7 +146,7 @@
#endif
}
-#if !defined(GHASH_ASM) || defined(OPENSSL_AARCH64)
+#if !defined(GHASH_ASM) || defined(OPENSSL_AARCH64) || defined(OPENSSL_PPC64LE)
static const size_t rem_4bit[16] = {
PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
@@ -405,6 +406,13 @@
#endif
#endif
+#elif defined(OPENSSL_PPC64LE)
+#define GHASH_ASM_PPC64LE
+#define GCM_FUNCREF_4BIT
+void gcm_init_p8(u128 Htable[16], const uint64_t Xi[2]);
+void gcm_gmult_p8(uint64_t Xi[2], const u128 Htable[16]);
+void gcm_ghash_p8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
+ size_t len);
#endif
#endif
@@ -484,6 +492,16 @@
ctx->gmult = gcm_gmult_4bit;
ctx->ghash = gcm_ghash_4bit;
}
+#elif defined(GHASH_ASM_PPC64LE)
+ if (CRYPTO_is_PPC64LE_vcrypto_capable()) {
+ gcm_init_p8(ctx->Htable, ctx->H.u);
+ ctx->gmult = gcm_gmult_p8;
+ ctx->ghash = gcm_ghash_p8;
+ } else {
+ gcm_init_4bit(ctx->Htable, ctx->H.u);
+ ctx->gmult = gcm_gmult_4bit;
+ ctx->ghash = gcm_ghash_4bit;
+ }
#else
gcm_init_4bit(ctx->Htable, ctx->H.u);
ctx->gmult = gcm_gmult_4bit;
diff --git a/src/crypto/perlasm/ppc-xlate.pl b/src/crypto/perlasm/ppc-xlate.pl
new file mode 100644
index 0000000..55b02bc
--- /dev/null
+++ b/src/crypto/perlasm/ppc-xlate.pl
@@ -0,0 +1,299 @@
+#! /usr/bin/env perl
+# Copyright 2006-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+my $flavour = shift;
+my $output = shift;
+open STDOUT,">$output" || die "can't open $output: $!";
+
+my %GLOBALS;
+my %TYPES;
+my $dotinlocallabels=($flavour=~/linux/)?1:0;
+
+################################################################
+# directives which need special treatment on different platforms
+################################################################
+my $type = sub {
+ my ($dir,$name,$type) = @_;
+
+ $TYPES{$name} = $type;
+ if ($flavour =~ /linux/) {
+ $name =~ s|^\.||;
+ ".type $name,$type";
+ } else {
+ "";
+ }
+};
+my $globl = sub {
+ my $junk = shift;
+ my $name = shift;
+ my $global = \$GLOBALS{$name};
+ my $type = \$TYPES{$name};
+ my $ret;
+
+ $name =~ s|^\.||;
+
+ SWITCH: for ($flavour) {
+ /aix/ && do { if (!$$type) {
+ $$type = "\@function";
+ }
+ if ($$type =~ /function/) {
+ $name = ".$name";
+ }
+ last;
+ };
+ /osx/ && do { $name = "_$name";
+ last;
+ };
+ /linux.*(32|64le)/
+ && do { $ret .= ".globl $name";
+ if (!$$type) {
+ $ret .= "\n.type $name,\@function";
+ $$type = "\@function";
+ }
+ last;
+ };
+ /linux.*64/ && do { $ret .= ".globl $name";
+ if (!$$type) {
+ $ret .= "\n.type $name,\@function";
+ $$type = "\@function";
+ }
+ if ($$type =~ /function/) {
+ $ret .= "\n.section \".opd\",\"aw\"";
+ $ret .= "\n.align 3";
+ $ret .= "\n$name:";
+ $ret .= "\n.quad .$name,.TOC.\@tocbase,0";
+ $ret .= "\n.previous";
+ $name = ".$name";
+ }
+ last;
+ };
+ }
+
+ $ret = ".globl $name" if (!$ret);
+ $$global = $name;
+ $ret;
+};
+my $text = sub {
+ my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text";
+ $ret = ".abiversion 2\n".$ret if ($flavour =~ /linux.*64le/);
+ $ret;
+};
+my $machine = sub {
+ my $junk = shift;
+ my $arch = shift;
+ if ($flavour =~ /osx/)
+ { $arch =~ s/\"//g;
+ $arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any");
+ }
+ ".machine $arch";
+};
+my $size = sub {
+ if ($flavour =~ /linux/)
+ { shift;
+ my $name = shift;
+ my $real = $GLOBALS{$name} ? \$GLOBALS{$name} : \$name;
+ my $ret = ".size $$real,.-$$real";
+ $name =~ s|^\.||;
+ if ($$real ne $name) {
+ $ret .= "\n.size $name,.-$$real";
+ }
+ $ret;
+ }
+ else
+ { ""; }
+};
+my $asciz = sub {
+ shift;
+ my $line = join(",",@_);
+ if ($line =~ /^"(.*)"$/)
+ { ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; }
+ else
+ { ""; }
+};
+my $quad = sub {
+ shift;
+ my @ret;
+ my ($hi,$lo);
+ for (@_) {
+ if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io)
+ { $hi=$1?"0x$1":"0"; $lo="0x$2"; }
+ elsif (/^([0-9]+)$/o)
+ { $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl
+ else
+ { $hi=undef; $lo=$_; }
+
+ if (defined($hi))
+ { push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); }
+ else
+ { push(@ret,".quad $lo"); }
+ }
+ join("\n",@ret);
+};
+
+################################################################
+# simplified mnemonics not handled by at least one assembler
+################################################################
+my $cmplw = sub {
+ my $f = shift;
+ my $cr = 0; $cr = shift if ($#_>1);
+ # Some out-of-date 32-bit GNU assembler just can't handle cmplw...
+ ($flavour =~ /linux.*32/) ?
+ " .long ".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 :
+ " cmplw ".join(',',$cr,@_);
+};
+my $bdnz = sub {
+ my $f = shift;
+ my $bo = $f=~/[\+\-]/ ? 16+9 : 16; # optional "to be taken" hint
+ " bc $bo,0,".shift;
+} if ($flavour!~/linux/);
+my $bltlr = sub {
+ my $f = shift;
+ my $bo = $f=~/\-/ ? 12+2 : 12; # optional "not to be taken" hint
+ ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
+ " .long ".sprintf "0x%x",19<<26|$bo<<21|16<<1 :
+ " bclr $bo,0";
+};
+my $bnelr = sub {
+ my $f = shift;
+ my $bo = $f=~/\-/ ? 4+2 : 4; # optional "not to be taken" hint
+ ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
+ " .long ".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 :
+ " bclr $bo,2";
+};
+my $beqlr = sub {
+ my $f = shift;
+ my $bo = $f=~/-/ ? 12+2 : 12; # optional "not to be taken" hint
+ ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
+ " .long ".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 :
+ " bclr $bo,2";
+};
+# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two
+# arguments is 64, with "operand out of range" error.
+my $extrdi = sub {
+ my ($f,$ra,$rs,$n,$b) = @_;
+ $b = ($b+$n)&63; $n = 64-$n;
+ " rldicl $ra,$rs,$b,$n";
+};
+my $vmr = sub {
+ my ($f,$vx,$vy) = @_;
+ " vor $vx,$vy,$vy";
+};
+
+# Some ABIs specify vrsave, special-purpose register #256, as reserved
+# for system use.
+my $no_vrsave = ($flavour =~ /aix|linux64le/);
+my $mtspr = sub {
+ my ($f,$idx,$ra) = @_;
+ if ($idx == 256 && $no_vrsave) {
+ " or $ra,$ra,$ra";
+ } else {
+ " mtspr $idx,$ra";
+ }
+};
+my $mfspr = sub {
+ my ($f,$rd,$idx) = @_;
+ if ($idx == 256 && $no_vrsave) {
+ " li $rd,-1";
+ } else {
+ " mfspr $rd,$idx";
+ }
+};
+
+# PowerISA 2.06 stuff
+sub vsxmem_op {
+ my ($f, $vrt, $ra, $rb, $op) = @_;
+ " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1);
+}
+# made-up unaligned memory reference AltiVec/VMX instructions
+my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x
+my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x
+my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx
+my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx
+my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x
+my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x
+
+# PowerISA 2.07 stuff
+sub vcrypto_op {
+ my ($f, $vrt, $vra, $vrb, $op) = @_;
+ " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op;
+}
+my $vcipher = sub { vcrypto_op(@_, 1288); };
+my $vcipherlast = sub { vcrypto_op(@_, 1289); };
+my $vncipher = sub { vcrypto_op(@_, 1352); };
+my $vncipherlast= sub { vcrypto_op(@_, 1353); };
+my $vsbox = sub { vcrypto_op(@_, 0, 1480); };
+my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); };
+my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); };
+my $vpmsumb = sub { vcrypto_op(@_, 1032); };
+my $vpmsumd = sub { vcrypto_op(@_, 1224); };
+my $vpmsubh = sub { vcrypto_op(@_, 1096); };
+my $vpmsumw = sub { vcrypto_op(@_, 1160); };
+my $vaddudm = sub { vcrypto_op(@_, 192); };
+
+my $mtsle = sub {
+ my ($f, $arg) = @_;
+ " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2);
+};
+
+# PowerISA 3.0 stuff
+my $maddhdu = sub {
+ my ($f, $rt, $ra, $rb, $rc) = @_;
+ " .long ".sprintf "0x%X",(4<<26)|($rt<<21)|($ra<<16)|($rb<<11)|($rc<<6)|49;
+};
+my $maddld = sub {
+ my ($f, $rt, $ra, $rb, $rc) = @_;
+ " .long ".sprintf "0x%X",(4<<26)|($rt<<21)|($ra<<16)|($rb<<11)|($rc<<6)|51;
+};
+
+my $darn = sub {
+ my ($f, $rt, $l) = @_;
+ " .long ".sprintf "0x%X",(31<<26)|($rt<<21)|($l<<16)|(755<<1);
+};
+
+while($line=<>) {
+
+ $line =~ s|[#!;].*$||; # get rid of asm-style comments...
+ $line =~ s|/\*.*\*/||; # ... and C-style comments...
+ $line =~ s|^\s+||; # ... and skip white spaces in beginning...
+ $line =~ s|\s+$||; # ... and at the end
+
+ {
+ $line =~ s|\.L(\w+)|L$1|g; # common denominator for Locallabel
+ $line =~ s|\bL(\w+)|\.L$1|g if ($dotinlocallabels);
+ }
+
+ {
+ $line =~ s|(^[\.\w]+)\:\s*||;
+ my $label = $1;
+ if ($label) {
+ my $xlated = ($GLOBALS{$label} or $label);
+ print "$xlated:";
+ if ($flavour =~ /linux.*64le/) {
+ if ($TYPES{$label} =~ /function/) {
+ printf "\n.localentry %s,0\n",$xlated;
+ }
+ }
+ }
+ }
+
+ {
+ $line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||;
+ my $c = $1; $c = "\t" if ($c eq "");
+ my $mnemonic = $2;
+ my $f = $3;
+ my $opcode = eval("\$$mnemonic");
+ $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/);
+ if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); }
+ elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; }
+ }
+
+ print $line if ($line);
+ print "\n";
+}
+
+close STDOUT;
diff --git a/src/crypto/test/malloc.cc b/src/crypto/test/malloc.cc
index 898f2a7..47ee6da 100644
--- a/src/crypto/test/malloc.cc
+++ b/src/crypto/test/malloc.cc
@@ -26,13 +26,9 @@
// This file isn't built on ARM or Aarch64 because we link statically in those
// builds and trying to override malloc in a static link doesn't work. It also
-// requires glibc. It's also disabled on ASan builds as this interferes with
-// ASan's malloc interceptor.
-//
-// TODO(davidben): See if this and ASan's and MSan's interceptors can be made to
-// coexist.
+// requires glibc.
#if defined(__linux__) && defined(OPENSSL_GLIBC) && !defined(OPENSSL_ARM) && \
- !defined(OPENSSL_AARCH64) && !defined(OPENSSL_ASAN)
+ !defined(OPENSSL_AARCH64)
#include <errno.h>
#include <signal.h>
@@ -58,10 +54,24 @@
static int in_call = 0;
extern "C" {
+
+#if defined(OPENSSL_ASAN)
+#define REAL_MALLOC __interceptor_malloc
+#define REAL_CALLOC __interceptor_calloc
+#define REAL_REALLOC __interceptor_realloc
+#define REAL_FREE __interceptor_free
+#else
+#define REAL_MALLOC __libc_malloc
+#define REAL_CALLOC __libc_calloc
+#define REAL_REALLOC __libc_realloc
+#define REAL_FREE __libc_free
+#endif
+
/* These are other names for the standard allocation functions. */
-extern void *__libc_malloc(size_t size);
-extern void *__libc_calloc(size_t num_elems, size_t size);
-extern void *__libc_realloc(void *ptr, size_t size);
+extern void *REAL_MALLOC(size_t size);
+extern void *REAL_CALLOC(size_t num_elems, size_t size);
+extern void *REAL_REALLOC(void *ptr, size_t size);
+extern void REAL_FREE(void *ptr);
}
static void exit_handler(void) {
@@ -124,7 +134,7 @@
return NULL;
}
- return __libc_malloc(size);
+ return REAL_MALLOC(size);
}
void *calloc(size_t num_elems, size_t size) {
@@ -133,7 +143,7 @@
return NULL;
}
- return __libc_calloc(num_elems, size);
+ return REAL_CALLOC(num_elems, size);
}
void *realloc(void *ptr, size_t size) {
@@ -142,7 +152,11 @@
return NULL;
}
- return __libc_realloc(ptr, size);
+ return REAL_REALLOC(ptr, size);
+}
+
+void free(void *ptr) {
+ REAL_FREE(ptr);
}
} // extern "C"
diff --git a/src/crypto/x509/x509_test.cc b/src/crypto/x509/x509_test.cc
index a62088d..dc5af13 100644
--- a/src/crypto/x509/x509_test.cc
+++ b/src/crypto/x509/x509_test.cc
@@ -222,6 +222,94 @@
"-----END RSA PRIVATE KEY-----\n";
+static const char kCRLTestRoot[] =
+"-----BEGIN CERTIFICATE-----\n"
+"MIIDbzCCAlegAwIBAgIJAODri7v0dDUFMA0GCSqGSIb3DQEBCwUAME4xCzAJBgNV\n"
+"BAYTAlVTMRMwEQYDVQQIDApDYWxpZm9ybmlhMRYwFAYDVQQHDA1Nb3VudGFpbiBW\n"
+"aWV3MRIwEAYDVQQKDAlCb3JpbmdTU0wwHhcNMTYwOTI2MTUwNjI2WhcNMjYwOTI0\n"
+"MTUwNjI2WjBOMQswCQYDVQQGEwJVUzETMBEGA1UECAwKQ2FsaWZvcm5pYTEWMBQG\n"
+"A1UEBwwNTW91bnRhaW4gVmlldzESMBAGA1UECgwJQm9yaW5nU1NMMIIBIjANBgkq\n"
+"hkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAo16WiLWZuaymsD8n5SKPmxV1y6jjgr3B\n"
+"S/dUBpbrzd1aeFzNlI8l2jfAnzUyp+I21RQ+nh/MhqjGElkTtK9xMn1Y+S9GMRh+\n"
+"5R/Du0iCb1tCZIPY07Tgrb0KMNWe0v2QKVVruuYSgxIWodBfxlKO64Z8AJ5IbnWp\n"
+"uRqO6rctN9qUoMlTIAB6dL4G0tDJ/PGFWOJYwOMEIX54bly2wgyYJVBKiRRt4f7n\n"
+"8H922qmvPNA9idmX9G1VAtgV6x97XXi7ULORIQvn9lVQF6nTYDBJhyuPB+mLThbL\n"
+"P2o9orxGx7aCtnnBZUIxUvHNOI0FaSaZH7Fi0xsZ/GkG2HZe7ImPJwIDAQABo1Aw\n"
+"TjAdBgNVHQ4EFgQUWPt3N5cZ/CRvubbrkqfBnAqhq94wHwYDVR0jBBgwFoAUWPt3\n"
+"N5cZ/CRvubbrkqfBnAqhq94wDAYDVR0TBAUwAwEB/zANBgkqhkiG9w0BAQsFAAOC\n"
+"AQEAORu6M0MOwXy+3VEBwNilfTxyqDfruQsc1jA4PT8Oe8zora1WxE1JB4q2FJOz\n"
+"EAuM3H/NXvEnBuN+ITvKZAJUfm4NKX97qmjMJwLKWe1gVv+VQTr63aR7mgWJReQN\n"
+"XdMztlVeZs2dppV6uEg3ia1X0G7LARxGpA9ETbMyCpb39XxlYuTClcbA5ftDN99B\n"
+"3Xg9KNdd++Ew22O3HWRDvdDpTO/JkzQfzi3sYwUtzMEonENhczJhGf7bQMmvL/w5\n"
+"24Wxj4Z7KzzWIHsNqE/RIs6RV3fcW61j/mRgW2XyoWnMVeBzvcJr9NXp4VQYmFPw\n"
+"amd8GKMZQvP0ufGnUn7D7uartA==\n"
+"-----END CERTIFICATE-----\n";
+
+static const char kCRLTestLeaf[] =
+"-----BEGIN CERTIFICATE-----\n"
+"MIIDkDCCAnigAwIBAgICEAAwDQYJKoZIhvcNAQELBQAwTjELMAkGA1UEBhMCVVMx\n"
+"EzARBgNVBAgMCkNhbGlmb3JuaWExFjAUBgNVBAcMDU1vdW50YWluIFZpZXcxEjAQ\n"
+"BgNVBAoMCUJvcmluZ1NTTDAeFw0xNjA5MjYxNTA4MzFaFw0xNzA5MjYxNTA4MzFa\n"
+"MEsxCzAJBgNVBAYTAlVTMRMwEQYDVQQIDApDYWxpZm9ybmlhMRIwEAYDVQQKDAlC\n"
+"b3JpbmdTU0wxEzARBgNVBAMMCmJvcmluZy5zc2wwggEiMA0GCSqGSIb3DQEBAQUA\n"
+"A4IBDwAwggEKAoIBAQDc5v1S1M0W+QWM+raWfO0LH8uvqEwuJQgODqMaGnSlWUx9\n"
+"8iQcnWfjyPja3lWg9K62hSOFDuSyEkysKHDxijz5R93CfLcfnVXjWQDJe7EJTTDP\n"
+"ozEvxN6RjAeYv7CF000euYr3QT5iyBjg76+bon1p0jHZBJeNPP1KqGYgyxp+hzpx\n"
+"e0gZmTlGAXd8JQK4v8kpdYwD6PPifFL/jpmQpqOtQmH/6zcLjY4ojmqpEdBqIKIX\n"
+"+saA29hMq0+NK3K+wgg31RU+cVWxu3tLOIiesETkeDgArjWRS1Vkzbi4v9SJxtNu\n"
+"OZuAxWiynRJw3JwH/OFHYZIvQqz68ZBoj96cepjPAgMBAAGjezB5MAkGA1UdEwQC\n"
+"MAAwLAYJYIZIAYb4QgENBB8WHU9wZW5TU0wgR2VuZXJhdGVkIENlcnRpZmljYXRl\n"
+"MB0GA1UdDgQWBBTGn0OVVh/aoYt0bvEKG+PIERqnDzAfBgNVHSMEGDAWgBRY+3c3\n"
+"lxn8JG+5tuuSp8GcCqGr3jANBgkqhkiG9w0BAQsFAAOCAQEAd2nM8gCQN2Dc8QJw\n"
+"XSZXyuI3DBGGCHcay/3iXu0JvTC3EiQo8J6Djv7WLI0N5KH8mkm40u89fJAB2lLZ\n"
+"ShuHVtcC182bOKnePgwp9CNwQ21p0rDEu/P3X46ZvFgdxx82E9xLa0tBB8PiPDWh\n"
+"lV16jbaKTgX5AZqjnsyjR5o9/mbZVupZJXx5Syq+XA8qiJfstSYJs4KyKK9UOjql\n"
+"ICkJVKpi2ahDBqX4MOH4SLfzVk8pqSpviS6yaA1RXqjpkxiN45WWaXDldVHMSkhC\n"
+"5CNXsXi4b1nAntu89crwSLA3rEwzCWeYj+BX7e1T9rr3oJdwOU/2KQtW1js1yQUG\n"
+"tjJMFw==\n"
+"-----END CERTIFICATE-----\n";
+
+static const char kBasicCRL[] =
+"-----BEGIN X509 CRL-----\n"
+"MIIBpzCBkAIBATANBgkqhkiG9w0BAQsFADBOMQswCQYDVQQGEwJVUzETMBEGA1UE\n"
+"CAwKQ2FsaWZvcm5pYTEWMBQGA1UEBwwNTW91bnRhaW4gVmlldzESMBAGA1UECgwJ\n"
+"Qm9yaW5nU1NMFw0xNjA5MjYxNTEwNTVaFw0xNjEwMjYxNTEwNTVaoA4wDDAKBgNV\n"
+"HRQEAwIBATANBgkqhkiG9w0BAQsFAAOCAQEAnrBKKgvd9x9zwK9rtUvVeFeJ7+LN\n"
+"ZEAc+a5oxpPNEsJx6hXoApYEbzXMxuWBQoCs5iEBycSGudct21L+MVf27M38KrWo\n"
+"eOkq0a2siqViQZO2Fb/SUFR0k9zb8xl86Zf65lgPplALun0bV/HT7MJcl04Tc4os\n"
+"dsAReBs5nqTGNEd5AlC1iKHvQZkM//MD51DspKnDpsDiUVi54h9C1SpfZmX8H2Vv\n"
+"diyu0fZ/bPAM3VAGawatf/SyWfBMyKpoPXEG39oAzmjjOj8en82psn7m474IGaho\n"
+"/vBbhl1ms5qQiLYPjm4YELtnXQoFyC72tBjbdFd/ZE9k4CNKDbxFUXFbkw==\n"
+"-----END X509 CRL-----\n";
+
+static const char kRevokedCRL[] =
+"-----BEGIN X509 CRL-----\n"
+"MIIBvjCBpwIBATANBgkqhkiG9w0BAQsFADBOMQswCQYDVQQGEwJVUzETMBEGA1UE\n"
+"CAwKQ2FsaWZvcm5pYTEWMBQGA1UEBwwNTW91bnRhaW4gVmlldzESMBAGA1UECgwJ\n"
+"Qm9yaW5nU1NMFw0xNjA5MjYxNTEyNDRaFw0xNjEwMjYxNTEyNDRaMBUwEwICEAAX\n"
+"DTE2MDkyNjE1MTIyNlqgDjAMMAoGA1UdFAQDAgECMA0GCSqGSIb3DQEBCwUAA4IB\n"
+"AQCUGaM4DcWzlQKrcZvI8TMeR8BpsvQeo5BoI/XZu2a8h//PyRyMwYeaOM+3zl0d\n"
+"sjgCT8b3C1FPgT+P2Lkowv7rJ+FHJRNQkogr+RuqCSPTq65ha4WKlRGWkMFybzVH\n"
+"NloxC+aU3lgp/NlX9yUtfqYmJek1CDrOOGPrAEAwj1l/BUeYKNGqfBWYJQtPJu+5\n"
+"OaSvIYGpETCZJscUWODmLEb/O3DM438vLvxonwGqXqS0KX37+CHpUlyhnSovxXxp\n"
+"Pz4aF+L7OtczxL0GYtD2fR9B7TDMqsNmHXgQrixvvOY7MUdLGbd4RfJL3yA53hyO\n"
+"xzfKY2TzxLiOmctG0hXFkH5J\n"
+"-----END X509 CRL-----\n";
+
+static const char kBadIssuerCRL[] =
+"-----BEGIN X509 CRL-----\n"
+"MIIBwjCBqwIBATANBgkqhkiG9w0BAQsFADBSMQswCQYDVQQGEwJVUzETMBEGA1UE\n"
+"CAwKQ2FsaWZvcm5pYTEWMBQGA1UEBwwNTW91bnRhaW4gVmlldzEWMBQGA1UECgwN\n"
+"Tm90IEJvcmluZ1NTTBcNMTYwOTI2MTUxMjQ0WhcNMTYxMDI2MTUxMjQ0WjAVMBMC\n"
+"AhAAFw0xNjA5MjYxNTEyMjZaoA4wDDAKBgNVHRQEAwIBAjANBgkqhkiG9w0BAQsF\n"
+"AAOCAQEAlBmjOA3Fs5UCq3GbyPEzHkfAabL0HqOQaCP12btmvIf/z8kcjMGHmjjP\n"
+"t85dHbI4Ak/G9wtRT4E/j9i5KML+6yfhRyUTUJKIK/kbqgkj06uuYWuFipURlpDB\n"
+"cm81RzZaMQvmlN5YKfzZV/clLX6mJiXpNQg6zjhj6wBAMI9ZfwVHmCjRqnwVmCUL\n"
+"TybvuTmkryGBqREwmSbHFFjg5ixG/ztwzON/Ly78aJ8Bql6ktCl9+/gh6VJcoZ0q\n"
+"L8V8aT8+Ghfi+zrXM8S9BmLQ9n0fQe0wzKrDZh14EK4sb7zmOzFHSxm3eEXyS98g\n"
+"Od4cjsc3ymNk88S4jpnLRtIVxZB+SQ==\n"
+"-----END X509 CRL-----\n";
+
// CertFromPEM parses the given, NUL-terminated pem block and returns an
// |X509*|.
static bssl::UniquePtr<X509> CertFromPEM(const char *pem) {
@@ -230,6 +318,14 @@
PEM_read_bio_X509(bio.get(), nullptr, nullptr, nullptr));
}
+// CRLFromPEM parses the given, NUL-terminated pem block and returns an
+// |X509_CRL*|.
+static bssl::UniquePtr<X509_CRL> CRLFromPEM(const char *pem) {
+ bssl::UniquePtr<BIO> bio(BIO_new_mem_buf(pem, strlen(pem)));
+ return bssl::UniquePtr<X509_CRL>(
+ PEM_read_bio_X509_CRL(bio.get(), nullptr, nullptr, nullptr));
+}
+
// PrivateKeyFromPEM parses the given, NUL-terminated pem block and returns an
// |EVP_PKEY*|.
static bssl::UniquePtr<EVP_PKEY> PrivateKeyFromPEM(const char *pem) {
@@ -256,34 +352,58 @@
return stack.release();
}
-static bool Verify(X509 *leaf, const std::vector<X509 *> &roots,
+// CRLsToStack converts a vector of |X509_CRL*| to an OpenSSL STACK_OF(X509_CRL*),
+// bumping the reference counts for each CRL in question.
+static STACK_OF(X509_CRL)* CRLsToStack(const std::vector<X509_CRL*> &crls) {
+ bssl::UniquePtr<STACK_OF(X509_CRL)> stack(sk_X509_CRL_new_null());
+ if (!stack) {
+ return nullptr;
+ }
+ for (auto crl : crls) {
+ if (!sk_X509_CRL_push(stack.get(), crl)) {
+ return nullptr;
+ }
+ X509_CRL_up_ref(crl);
+ }
+
+ return stack.release();
+}
+
+static int Verify(X509 *leaf, const std::vector<X509 *> &roots,
const std::vector<X509 *> &intermediates,
+ const std::vector<X509_CRL *> &crls,
unsigned long flags = 0) {
bssl::UniquePtr<STACK_OF(X509)> roots_stack(CertsToStack(roots));
bssl::UniquePtr<STACK_OF(X509)> intermediates_stack(
CertsToStack(intermediates));
+ bssl::UniquePtr<STACK_OF(X509_CRL)> crls_stack(CRLsToStack(crls));
if (!roots_stack ||
- !intermediates_stack) {
- return false;
+ !intermediates_stack ||
+ !crls_stack) {
+ return X509_V_ERR_UNSPECIFIED;
}
bssl::UniquePtr<X509_STORE_CTX> ctx(X509_STORE_CTX_new());
- if (!ctx) {
- return false;
+ bssl::UniquePtr<X509_STORE> store(X509_STORE_new());
+ if (!ctx ||
+ !store) {
+ return X509_V_ERR_UNSPECIFIED;
}
- if (!X509_STORE_CTX_init(ctx.get(), nullptr /* no X509_STORE */, leaf,
+
+ if (!X509_STORE_CTX_init(ctx.get(), store.get(), leaf,
intermediates_stack.get())) {
- return false;
+ return X509_V_ERR_UNSPECIFIED;
}
X509_STORE_CTX_trusted_stack(ctx.get(), roots_stack.get());
+ X509_STORE_CTX_set0_crls(ctx.get(), crls_stack.get());
X509_VERIFY_PARAM *param = X509_VERIFY_PARAM_new();
if (param == nullptr) {
- return false;
+ return X509_V_ERR_UNSPECIFIED;
}
- X509_VERIFY_PARAM_set_time(param, 1452807555 /* Jan 14th, 2016 */);
+ X509_VERIFY_PARAM_set_time(param, 1474934400 /* Sep 27th, 2016 */);
X509_VERIFY_PARAM_set_depth(param, 16);
if (flags) {
X509_VERIFY_PARAM_set_flags(param, flags);
@@ -291,7 +411,11 @@
X509_STORE_CTX_set0_param(ctx.get(), param);
ERR_clear_error();
- return X509_verify_cert(ctx.get()) == 1;
+ if (X509_verify_cert(ctx.get()) != 1) {
+ return X509_STORE_CTX_get_error(ctx.get());
+ }
+
+ return X509_V_OK;
}
static bool TestVerify() {
@@ -318,31 +442,37 @@
}
std::vector<X509*> empty;
- if (Verify(leaf.get(), empty, empty)) {
+ std::vector<X509_CRL*> empty_crls;
+ if (Verify(leaf.get(), empty, empty, empty_crls) !=
+ X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY) {
fprintf(stderr, "Leaf verified with no roots!\n");
return false;
}
- if (Verify(leaf.get(), empty, {intermediate.get()})) {
+ if (Verify(leaf.get(), empty, {intermediate.get()}, empty_crls) !=
+ X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY) {
fprintf(stderr, "Leaf verified with no roots!\n");
return false;
}
- if (!Verify(leaf.get(), {root.get()}, {intermediate.get()})) {
+ if (Verify(leaf.get(), {root.get()}, {intermediate.get()}, empty_crls) !=
+ X509_V_OK) {
ERR_print_errors_fp(stderr);
fprintf(stderr, "Basic chain didn't verify.\n");
return false;
}
- if (!Verify(leaf.get(), {cross_signing_root.get()},
- {intermediate.get(), root_cross_signed.get()})) {
+ if (Verify(leaf.get(), {cross_signing_root.get()},
+ {intermediate.get(), root_cross_signed.get()},
+ empty_crls) != X509_V_OK) {
ERR_print_errors_fp(stderr);
fprintf(stderr, "Cross-signed chain didn't verify.\n");
return false;
}
- if (!Verify(leaf.get(), {cross_signing_root.get(), root.get()},
- {intermediate.get(), root_cross_signed.get()})) {
+ if (Verify(leaf.get(), {cross_signing_root.get(), root.get()},
+ {intermediate.get(), root_cross_signed.get()},
+ empty_crls) != X509_V_OK) {
ERR_print_errors_fp(stderr);
fprintf(stderr, "Cross-signed chain with root didn't verify.\n");
return false;
@@ -350,22 +480,25 @@
/* This is the “altchains” test – we remove the cross-signing CA but include
* the cross-sign in the intermediates. */
- if (!Verify(leaf.get(), {root.get()},
- {intermediate.get(), root_cross_signed.get()})) {
+ if (Verify(leaf.get(), {root.get()},
+ {intermediate.get(), root_cross_signed.get()},
+ empty_crls) != X509_V_OK) {
ERR_print_errors_fp(stderr);
fprintf(stderr, "Chain with cross-sign didn't backtrack to find root.\n");
return false;
}
if (Verify(leaf.get(), {root.get()},
- {intermediate.get(), root_cross_signed.get()},
- X509_V_FLAG_NO_ALT_CHAINS)) {
+ {intermediate.get(), root_cross_signed.get()}, empty_crls,
+ X509_V_FLAG_NO_ALT_CHAINS) !=
+ X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY) {
fprintf(stderr, "Altchains test still passed when disabled.\n");
return false;
}
if (Verify(forgery.get(), {intermediate_self_signed.get()},
- {leaf_no_key_usage.get()})) {
+ {leaf_no_key_usage.get()},
+ empty_crls) != X509_V_ERR_INVALID_CA) {
fprintf(stderr, "Basic constraints weren't checked.\n");
return false;
}
@@ -374,7 +507,8 @@
* of roots and intermediates. This is a regression test for CVE-2015-1793. */
if (Verify(forgery.get(),
{intermediate_self_signed.get(), root_cross_signed.get()},
- {leaf_no_key_usage.get(), intermediate.get()})) {
+ {leaf_no_key_usage.get(), intermediate.get()},
+ empty_crls) != X509_V_ERR_INVALID_CA) {
fprintf(stderr, "Basic constraints weren't checked.\n");
return false;
}
@@ -382,6 +516,51 @@
return true;
}
+static bool TestCRL() {
+ bssl::UniquePtr<X509> root(CertFromPEM(kCRLTestRoot));
+ bssl::UniquePtr<X509> leaf(CertFromPEM(kCRLTestLeaf));
+ bssl::UniquePtr<X509_CRL> basic_crl(CRLFromPEM(kBasicCRL));
+ bssl::UniquePtr<X509_CRL> revoked_crl(CRLFromPEM(kRevokedCRL));
+ bssl::UniquePtr<X509_CRL> bad_issuer_crl(CRLFromPEM(kBadIssuerCRL));
+
+ if (!root ||
+ !leaf ||
+ !basic_crl ||
+ !revoked_crl ||
+ !bad_issuer_crl) {
+ fprintf(stderr, "Failed to parse certificates\n");
+ return false;
+ }
+
+ if (Verify(leaf.get(), {root.get()}, {root.get()}, {basic_crl.get()},
+ X509_V_FLAG_CRL_CHECK) != X509_V_OK) {
+ fprintf(stderr, "Cert with CRL didn't verify.\n");
+ return false;
+ }
+
+ if (Verify(leaf.get(), {root.get()}, {root.get()},
+ {basic_crl.get(), revoked_crl.get()},
+ X509_V_FLAG_CRL_CHECK) != X509_V_ERR_CERT_REVOKED) {
+ fprintf(stderr, "Revoked CRL wasn't checked.\n");
+ return false;
+ }
+
+ std::vector<X509_CRL *> empty_crls;
+ if (Verify(leaf.get(), {root.get()}, {root.get()}, empty_crls,
+ X509_V_FLAG_CRL_CHECK) != X509_V_ERR_UNABLE_TO_GET_CRL) {
+ fprintf(stderr, "CRLs were not required.\n");
+ return false;
+ }
+
+ if (Verify(leaf.get(), {root.get()}, {root.get()}, {bad_issuer_crl.get()},
+ X509_V_FLAG_CRL_CHECK) != X509_V_ERR_UNABLE_TO_GET_CRL) {
+ fprintf(stderr, "Bad CRL issuer was unnoticed.\n");
+ return false;
+ }
+
+ return true;
+}
+
static bool TestPSS() {
bssl::UniquePtr<X509> cert(CertFromPEM(kExamplePSSCert));
if (!cert) {
@@ -464,6 +643,7 @@
CRYPTO_library_init();
if (!TestVerify() ||
+ !TestCRL() ||
!TestPSS() ||
!TestBadPSSParameters() ||
!TestSignCtx()) {
diff --git a/src/crypto/x509/x509_vfy.c b/src/crypto/x509/x509_vfy.c
index 8d51703..fa6a5c5 100644
--- a/src/crypto/x509/x509_vfy.c
+++ b/src/crypto/x509/x509_vfy.c
@@ -1001,10 +1001,10 @@
crl = sk_X509_CRL_value(crls, i);
reasons = *preasons;
crl_score = get_crl_score(ctx, &crl_issuer, &reasons, crl, x);
- if (crl_score < best_score)
+ if (crl_score < best_score || crl_score == 0)
continue;
/* If current CRL is equivalent use it if it is newer */
- if (crl_score == best_score) {
+ if (crl_score == best_score && best_crl != NULL) {
int day, sec;
if (ASN1_TIME_diff(&day, &sec, X509_CRL_get_lastUpdate(best_crl),
X509_CRL_get_lastUpdate(crl)) == 0)
diff --git a/src/crypto/x509/x509_vpm.c b/src/crypto/x509/x509_vpm.c
index b51bc17..9e9dbf5 100644
--- a/src/crypto/x509/x509_vpm.c
+++ b/src/crypto/x509/x509_vpm.c
@@ -192,25 +192,36 @@
OPENSSL_free(param);
}
-/*
+/*-
* This function determines how parameters are "inherited" from one structure
- * to another. There are several different ways this can happen. 1. If a
- * child structure needs to have its values initialized from a parent they are
- * simply copied across. For example SSL_CTX copied to SSL. 2. If the
- * structure should take on values only if they are currently unset. For
- * example the values in an SSL structure will take appropriate value for SSL
- * servers or clients but only if the application has not set new ones. The
- * "inh_flags" field determines how this function behaves. Normally any
- * values which are set in the default are not copied from the destination and
- * verify flags are ORed together. If X509_VP_FLAG_DEFAULT is set then
- * anything set in the source is copied to the destination. Effectively the
- * values in "to" become default values which will be used only if nothing new
- * is set in "from". If X509_VP_FLAG_OVERWRITE is set then all value are
- * copied across whether they are set or not. Flags is still Ored though. If
- * X509_VP_FLAG_RESET_FLAGS is set then the flags value is copied instead of
- * ORed. If X509_VP_FLAG_LOCKED is set then no values are copied. If
- * X509_VP_FLAG_ONCE is set then the current inh_flags setting is zeroed after
- * the next call.
+ * to another. There are several different ways this can happen.
+ *
+ * 1. If a child structure needs to have its values initialized from a parent
+ * they are simply copied across. For example SSL_CTX copied to SSL.
+ * 2. If the structure should take on values only if they are currently unset.
+ * For example the values in an SSL structure will take appropriate value
+ * for SSL servers or clients but only if the application has not set new
+ * ones.
+ *
+ * The "inh_flags" field determines how this function behaves.
+ *
+ * Normally any values which are set in the default are not copied from the
+ * destination and verify flags are ORed together.
+ *
+ * If X509_VP_FLAG_DEFAULT is set then anything set in the source is copied
+ * to the destination. Effectively the values in "to" become default values
+ * which will be used only if nothing new is set in "from".
+ *
+ * If X509_VP_FLAG_OVERWRITE is set then all value are copied across whether
+ * they are set or not. Flags is still Ored though.
+ *
+ * If X509_VP_FLAG_RESET_FLAGS is set then the flags value is copied instead
+ * of ORed.
+ *
+ * If X509_VP_FLAG_LOCKED is set then no values are copied.
+ *
+ * If X509_VP_FLAG_ONCE is set then the current inh_flags setting is zeroed
+ * after the next call.
*/
/* Macro to test if a field should be copied from src to dest */
diff --git a/src/crypto/x509/x_crl.c b/src/crypto/x509/x_crl.c
index 934571d..89cd5d1 100644
--- a/src/crypto/x509/x_crl.c
+++ b/src/crypto/x509/x_crl.c
@@ -299,7 +299,9 @@
break;
case ASN1_OP_FREE_POST:
- if (crl->meth->crl_free) {
+ /* |crl->meth| may be NULL if constructing the object failed before
+ * |ASN1_OP_NEW_POST| was run. */
+ if (crl->meth && crl->meth->crl_free) {
if (!crl->meth->crl_free(crl))
return 0;
}