openssl-1.0.1 upgrade
Bug: 6168278
Change-Id: I648f9172828120df5d19a14425e9ceec92647921
diff --git a/patches/README b/patches/README
index f70135c..f7d886d 100644
--- a/patches/README
+++ b/patches/README
@@ -33,7 +33,3 @@
sha1_armv4_large.patch
This patch eliminates memory stores to addresses below SP.
-
-mips_asm.patch
-
-MIPS assembly routines (AES, BN, SHA1, SHA256)
diff --git a/patches/apps_Android.mk b/patches/apps_Android.mk
index c2dc2d7..9110490 100644
--- a/patches/apps_Android.mk
+++ b/patches/apps_Android.mk
@@ -48,6 +48,7 @@
smime.c \
speed.c \
spkac.c \
+ srp.c \
verify.c \
version.c \
x509.c
diff --git a/patches/crypto_Android.mk b/patches/crypto_Android.mk
index 8090c12..fb599ce 100644
--- a/patches/crypto_Android.mk
+++ b/patches/crypto_Android.mk
@@ -169,7 +169,11 @@
bn/bn_sqrt.c \
bn/bn_word.c \
buffer/buf_err.c \
+ buffer/buf_str.c \
buffer/buffer.c \
+ cmac/cm_ameth.c \
+ cmac/cm_pmeth.c \
+ cmac/cmac.c \
comp/c_rle.c \
comp/c_zlib.c \
comp/comp_err.c \
@@ -235,6 +239,7 @@
dso/dso_null.c \
dso/dso_openssl.c \
ec/ec2_mult.c \
+ ec/ec2_oct.c \
ec/ec2_smpl.c \
ec/ec_ameth.c \
ec/ec_asn1.c \
@@ -245,11 +250,13 @@
ec/ec_key.c \
ec/ec_lib.c \
ec/ec_mult.c \
+ ec/ec_oct.c \
ec/ec_pmeth.c \
ec/ec_print.c \
ec/eck_prn.c \
ec/ecp_mont.c \
ec/ecp_nist.c \
+ ec/ecp_oct.c \
ec/ecp_smpl.c \
ecdh/ech_err.c \
ecdh/ech_key.c \
@@ -295,6 +302,7 @@
evp/c_alld.c \
evp/digest.c \
evp/e_aes.c \
+ evp/e_aes_cbc_hmac_sha1.c \
evp/e_bf.c \
evp/e_des.c \
evp/e_des3.c \
@@ -302,6 +310,7 @@
evp/e_old.c \
evp/e_rc2.c \
evp/e_rc4.c \
+ evp/e_rc4_hmac_md5.c \
evp/e_rc5.c \
evp/e_xcbc_d.c \
evp/encode.c \
@@ -347,9 +356,13 @@
md5/md5_dgst.c \
md5/md5_one.c \
modes/cbc128.c \
+ modes/ccm128.c \
modes/cfb128.c \
modes/ctr128.c \
+ modes/gcm128.c \
modes/ofb128.c \
+ modes/xts128.c \
+ o_init.c \
objects/o_names.c \
objects/obj_dat.c \
objects/obj_err.c \
@@ -398,6 +411,7 @@
pkcs7/pk7_mime.c \
pkcs7/pk7_smime.c \
pkcs7/pkcs7err.c \
+ pqueue/pqueue.c \
rand/md_rand.c \
rand/rand_egd.c \
rand/rand_err.c \
@@ -411,11 +425,13 @@
rc2/rc2ofb64.c \
rc4/rc4_enc.c \
rc4/rc4_skey.c \
+ rc4/rc4_utl.c \
ripemd/rmd_dgst.c \
ripemd/rmd_one.c \
rsa/rsa_ameth.c \
rsa/rsa_asn1.c \
rsa/rsa_chk.c \
+ rsa/rsa_crpt.c \
rsa/rsa_eay.c \
rsa/rsa_err.c \
rsa/rsa_gen.c \
@@ -436,6 +452,8 @@
sha/sha256.c \
sha/sha512.c \
sha/sha_dgst.c \
+ srp/srp_lib.c \
+ srp/srp_vfy.c \
stack/stack.c \
ts/ts_err.c \
txt_db/txt_db.c \
@@ -507,12 +525,15 @@
external/openssl \
external/openssl/crypto/asn1 \
external/openssl/crypto/evp \
+ external/openssl/crypto/modes \
external/openssl/include \
external/openssl/include/openssl \
external/zlib
local_c_flags := -DNO_WINDOWS_BRAINDEATH
+local_as_flags := -x assembler-with-cpp
+
#######################################
# target static library
include $(CLEAR_VARS)
@@ -525,6 +546,7 @@
LOCAL_SRC_FILES += $(local_src_files)
LOCAL_CFLAGS += $(local_c_flags)
+LOCAL_ASFLAGS += $(local_as_flags)
LOCAL_C_INCLUDES += $(local_c_includes)
ifeq ($(TARGET_ARCH),arm)
LOCAL_SRC_FILES += $(arm_src_files)
@@ -561,6 +583,7 @@
LOCAL_SRC_FILES += $(local_src_files)
LOCAL_CFLAGS += $(local_c_flags)
+LOCAL_ASFLAGS += $(local_as_flags)
LOCAL_C_INCLUDES += $(local_c_includes)
ifeq ($(TARGET_ARCH),arm)
LOCAL_SRC_FILES += $(arm_src_files)
@@ -587,6 +610,7 @@
include $(LOCAL_PATH)/../android-config.mk
LOCAL_SRC_FILES += $(local_src_files)
LOCAL_CFLAGS += $(local_c_flags) -DPURIFY
+LOCAL_ASFLAGS += $(local_as_flags)
LOCAL_C_INCLUDES += $(local_c_includes)
LOCAL_SRC_FILES += $(other_arch_src_files)
LOCAL_STATIC_LIBRARIES += libz
@@ -602,6 +626,7 @@
include $(LOCAL_PATH)/../android-config.mk
LOCAL_SRC_FILES += $(local_src_files)
LOCAL_CFLAGS += $(local_c_flags) -DPURIFY
+LOCAL_ASFLAGS += $(local_as_flags)
LOCAL_C_INCLUDES += $(local_c_includes)
LOCAL_SRC_FILES += $(other_arch_src_files)
LOCAL_STATIC_LIBRARIES += libz
diff --git a/patches/handshake_cutthrough.patch b/patches/handshake_cutthrough.patch
index 4f29839..57c4c78 100644
--- a/patches/handshake_cutthrough.patch
+++ b/patches/handshake_cutthrough.patch
@@ -6,9 +6,9 @@
BIO_printf(bio_err," -status - request certificate status from server\n");
BIO_printf(bio_err," -no_ticket - disable use of RFC4507bis session tickets\n");
+ BIO_printf(bio_err," -cutthrough - enable 1-RTT full-handshake for strong ciphers\n");
- #endif
- }
-
+ # if !defined(OPENSSL_NO_NEXTPROTONEG)
+ BIO_printf(bio_err," -nextprotoneg arg - enable NPN extension, considering named protocols supported (comma-separated list)\n");
+ # endif
@@ -304,6 +305,7 @@ int MAIN(int argc, char **argv)
EVP_PKEY *key = NULL;
char *CApath=NULL,*CAfile=NULL,*cipher=NULL;
@@ -191,9 +191,9 @@
/* extra state */
#define SSL3_ST_CW_FLUSH (0x100|SSL_ST_CONNECT)
+#define SSL3_ST_CUTTHROUGH_COMPLETE (0x101|SSL_ST_CONNECT)
- /* write to server */
- #define SSL3_ST_CW_CLNT_HELLO_A (0x110|SSL_ST_CONNECT)
- #define SSL3_ST_CW_CLNT_HELLO_B (0x111|SSL_ST_CONNECT)
+ #ifndef OPENSSL_NO_SCTP
+ #define DTLS1_SCTP_ST_CW_WRITE_SOCK (0x310|SSL_ST_CONNECT)
+ #define DTLS1_SCTP_ST_CR_READ_SOCK (0x320|SSL_ST_CONNECT)
diff -uarp openssl-1.0.0.orig/ssl/ssl_lib.c openssl-1.0.0/ssl/ssl_lib.c
--- openssl-1.0.0.orig/ssl/ssl_lib.c 2010-02-17 14:43:46.000000000 -0500
+++ openssl-1.0.0/ssl/ssl_lib.c 2010-04-21 17:02:45.000000000 -0400
diff --git a/patches/jsse.patch b/patches/jsse.patch
index 249fb5b..80e5357 100644
--- a/patches/jsse.patch
+++ b/patches/jsse.patch
@@ -10,14 +10,6 @@
/* Default generate session ID callback. */
GEN_SESSION_CB generate_session_id;
-@@ -1546,6 +1549,7 @@ const SSL_CIPHER *SSL_get_current_cipher
- int SSL_CIPHER_get_bits(const SSL_CIPHER *c,int *alg_bits);
- char * SSL_CIPHER_get_version(const SSL_CIPHER *c);
- const char * SSL_CIPHER_get_name(const SSL_CIPHER *c);
-+const char * SSL_CIPHER_authentication_method(const SSL_CIPHER *c);
-
- int SSL_get_fd(const SSL *s);
- int SSL_get_rfd(const SSL *s);
@@ -1554,6 +1558,7 @@ const char * SSL_get_cipher_list(const
char * SSL_get_shared_ciphers(const SSL *s, char *buf, int len);
int SSL_get_read_ahead(const SSL * s);
@@ -48,9 +40,9 @@
const unsigned char *SSL_SESSION_get_id(const SSL_SESSION *s,
unsigned int *len);
+const char * SSL_SESSION_get_version(const SSL_SESSION *s);
+ unsigned int SSL_SESSION_get_compress_id(const SSL_SESSION *s);
#ifndef OPENSSL_NO_FP_API
int SSL_SESSION_print_fp(FILE *fp,const SSL_SESSION *ses);
- #endif
@@ -1624,6 +1633,7 @@ int SSL_SESSION_print(BIO *fp,const SSL_
void SSL_SESSION_free(SSL_SESSION *ses);
int i2d_SSL_SESSION(SSL_SESSION *in,unsigned char **pp);
@@ -296,13 +288,19 @@
/* works well for SSLv2, not so good for SSLv3 */
char *SSL_get_shared_ciphers(const SSL *s,char *buf,int len)
{
-@@ -2551,18 +2578,45 @@ SSL_METHOD *ssl_bad_method(int ver)
+@@ -2551,22 +2578,45 @@ SSL_METHOD *ssl_bad_method(int ver)
return(NULL);
}
-const char *SSL_get_version(const SSL *s)
+static const char *ssl_get_version(int version)
{
+- if (s->version == TLS1_2_VERSION)
++ if (version == TLS1_2_VERSION)
+ return("TLSv1.2");
+- else if (s->version == TLS1_1_VERSION)
++ else if (version == TLS1_1_VERSION)
+ return("TLSv1.1");
- if (s->version == TLS1_VERSION)
+ if (version == TLS1_VERSION)
return("TLSv1");
@@ -334,12 +332,8 @@
+ {
+ case SSL2_VERSION:
+ return SSL_TXT_RSA;
-+ case SSL3_VERSION:
-+ case TLS1_VERSION:
-+ case DTLS1_VERSION:
-+ return SSL_CIPHER_authentication_method(ssl->s3->tmp.new_cipher);
+ default:
-+ return "UNKNOWN";
++ return SSL_CIPHER_authentication_method(ssl->s3->tmp.new_cipher);
+ }
+ }
+
diff --git a/patches/mips_asm.patch b/patches/mips_asm.patch
deleted file mode 100644
index 68a80f1..0000000
--- a/patches/mips_asm.patch
+++ /dev/null
@@ -1,5461 +0,0 @@
-diff --git a/crypto/aes/asm/aes-mips.pl b/crypto/aes/asm/aes-mips.pl
-new file mode 100644
-index 0000000..2ce6def
---- /dev/null
-+++ b/crypto/aes/asm/aes-mips.pl
-@@ -0,0 +1,1611 @@
-+#!/usr/bin/env perl
-+
-+# ====================================================================
-+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-+# project. The module is, however, dual licensed under OpenSSL and
-+# CRYPTOGAMS licenses depending on where you obtain it. For further
-+# details see http://www.openssl.org/~appro/cryptogams/.
-+# ====================================================================
-+
-+# AES for MIPS
-+
-+# October 2010
-+#
-+# Code uses 1K[+256B] S-box and on single-issue core [such as R5000]
-+# spends ~68 cycles per byte processed with 128-bit key. This is ~16%
-+# faster than gcc-generated code, which is not very impressive. But
-+# recall that compressed S-box requires extra processing, namely
-+# additional rotations. Rotations are implemented with lwl/lwr pairs,
-+# which is normally used for loading unaligned data. Another cool
-+# thing about this module is its endian neutrality, which means that
-+# it processes data without ever changing byte order...
-+
-+######################################################################
-+# There is a number of MIPS ABI in use, O32 and N32/64 are most
-+# widely used. Then there is a new contender: NUBI. It appears that if
-+# one picks the latter, it's possible to arrange code in ABI neutral
-+# manner. Therefore let's stick to NUBI register layout:
-+#
-+($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
-+($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
-+($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
-+($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
-+#
-+# The return value is placed in $a0. Following coding rules facilitate
-+# interoperability:
-+#
-+# - never ever touch $tp, "thread pointer", former $gp;
-+# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
-+# old code];
-+# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
-+#
-+# For reference here is register layout for N32/64 MIPS ABIs:
-+#
-+# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
-+# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
-+# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
-+# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
-+# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
-+#
-+$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64
-+
-+if ($flavour =~ /64|n32/i) {
-+ $PTR_ADD="dadd"; # incidentally works even on n32
-+ $PTR_SUB="dsub"; # incidentally works even on n32
-+ $REG_S="sd";
-+ $REG_L="ld";
-+ $PTR_SLL="dsll"; # incidentally works even on n32
-+ $SZREG=8;
-+} else {
-+ $PTR_ADD="add";
-+ $PTR_SUB="sub";
-+ $REG_S="sw";
-+ $REG_L="lw";
-+ $PTR_SLL="sll";
-+ $SZREG=4;
-+}
-+$pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
-+#
-+# <appro@openssl.org>
-+#
-+######################################################################
-+
-+$big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0;
-+
-+for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); }
-+open STDOUT,">$output";
-+
-+if (!defined($big_endian))
-+{ $big_endian=(unpack('L',pack('N',1))==1); }
-+
-+while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
-+open STDOUT,">$output";
-+
-+my ($MSB,$LSB)=(0,3); # automatically converted to little-endian
-+
-+$code.=<<___;
-+.text
-+#ifdef OPENSSL_FIPSCANISTER
-+# include <openssl/fipssyms.h>
-+#endif
-+
-+#if !defined(__vxworks) || defined(__pic__)
-+.option pic2
-+#endif
-+.set noat
-+___
-+
-+{{{
-+my $FRAMESIZE=16*$SZREG;
-+my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000;
-+
-+my ($inp,$out,$key,$Tbl,$s0,$s1,$s2,$s3)=($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7);
-+my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
-+my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23));
-+my ($key0,$cnt)=($gp,$fp);
-+
-+# instuction ordering is "stolen" from output from MIPSpro assembler
-+# invoked with -mips3 -O3 arguments...
-+$code.=<<___;
-+.align 5
-+.ent _mips_AES_encrypt
-+_mips_AES_encrypt:
-+ .frame $sp,0,$ra
-+ .set reorder
-+ lw $t0,0($key)
-+ lw $t1,4($key)
-+ lw $t2,8($key)
-+ lw $t3,12($key)
-+ lw $cnt,240($key)
-+ $PTR_ADD $key0,$key,16
-+
-+ xor $s0,$t0
-+ xor $s1,$t1
-+ xor $s2,$t2
-+ xor $s3,$t3
-+
-+ sub $cnt,1
-+ _xtr $i0,$s1,16-2
-+.Loop_enc:
-+ _xtr $i1,$s2,16-2
-+ _xtr $i2,$s3,16-2
-+ _xtr $i3,$s0,16-2
-+ and $i0,0x3fc
-+ and $i1,0x3fc
-+ and $i2,0x3fc
-+ and $i3,0x3fc
-+ $PTR_ADD $i0,$Tbl
-+ $PTR_ADD $i1,$Tbl
-+ $PTR_ADD $i2,$Tbl
-+ $PTR_ADD $i3,$Tbl
-+ lwl $t0,3($i0) # Te1[s1>>16]
-+ lwl $t1,3($i1) # Te1[s2>>16]
-+ lwl $t2,3($i2) # Te1[s3>>16]
-+ lwl $t3,3($i3) # Te1[s0>>16]
-+ lwr $t0,2($i0) # Te1[s1>>16]
-+ lwr $t1,2($i1) # Te1[s2>>16]
-+ lwr $t2,2($i2) # Te1[s3>>16]
-+ lwr $t3,2($i3) # Te1[s0>>16]
-+
-+ _xtr $i0,$s2,8-2
-+ _xtr $i1,$s3,8-2
-+ _xtr $i2,$s0,8-2
-+ _xtr $i3,$s1,8-2
-+ and $i0,0x3fc
-+ and $i1,0x3fc
-+ and $i2,0x3fc
-+ and $i3,0x3fc
-+ $PTR_ADD $i0,$Tbl
-+ $PTR_ADD $i1,$Tbl
-+ $PTR_ADD $i2,$Tbl
-+ $PTR_ADD $i3,$Tbl
-+ lwl $t4,2($i0) # Te2[s2>>8]
-+ lwl $t5,2($i1) # Te2[s3>>8]
-+ lwl $t6,2($i2) # Te2[s0>>8]
-+ lwl $t7,2($i3) # Te2[s1>>8]
-+ lwr $t4,1($i0) # Te2[s2>>8]
-+ lwr $t5,1($i1) # Te2[s3>>8]
-+ lwr $t6,1($i2) # Te2[s0>>8]
-+ lwr $t7,1($i3) # Te2[s1>>8]
-+
-+ _xtr $i0,$s3,0-2
-+ _xtr $i1,$s0,0-2
-+ _xtr $i2,$s1,0-2
-+ _xtr $i3,$s2,0-2
-+ and $i0,0x3fc
-+ and $i1,0x3fc
-+ and $i2,0x3fc
-+ and $i3,0x3fc
-+ $PTR_ADD $i0,$Tbl
-+ $PTR_ADD $i1,$Tbl
-+ $PTR_ADD $i2,$Tbl
-+ $PTR_ADD $i3,$Tbl
-+ lwl $t8,1($i0) # Te3[s3]
-+ lwl $t9,1($i1) # Te3[s0]
-+ lwl $t10,1($i2) # Te3[s1]
-+ lwl $t11,1($i3) # Te3[s2]
-+ lwr $t8,0($i0) # Te3[s3]
-+ lwr $t9,0($i1) # Te3[s0]
-+ lwr $t10,0($i2) # Te3[s1]
-+ lwr $t11,0($i3) # Te3[s2]
-+
-+ _xtr $i0,$s0,24-2
-+ _xtr $i1,$s1,24-2
-+ _xtr $i2,$s2,24-2
-+ _xtr $i3,$s3,24-2
-+ and $i0,0x3fc
-+ and $i1,0x3fc
-+ and $i2,0x3fc
-+ and $i3,0x3fc
-+ $PTR_ADD $i0,$Tbl
-+ $PTR_ADD $i1,$Tbl
-+ $PTR_ADD $i2,$Tbl
-+ $PTR_ADD $i3,$Tbl
-+ xor $t0,$t4
-+ xor $t1,$t5
-+ xor $t2,$t6
-+ xor $t3,$t7
-+ lw $t4,0($i0) # Te0[s0>>24]
-+ lw $t5,0($i1) # Te0[s1>>24]
-+ lw $t6,0($i2) # Te0[s2>>24]
-+ lw $t7,0($i3) # Te0[s3>>24]
-+
-+ lw $s0,0($key0)
-+ lw $s1,4($key0)
-+ lw $s2,8($key0)
-+ lw $s3,12($key0)
-+
-+ xor $t0,$t8
-+ xor $t1,$t9
-+ xor $t2,$t10
-+ xor $t3,$t11
-+
-+ xor $t0,$t4
-+ xor $t1,$t5
-+ xor $t2,$t6
-+ xor $t3,$t7
-+
-+ sub $cnt,1
-+ $PTR_ADD $key0,16
-+ xor $s0,$t0
-+ xor $s1,$t1
-+ xor $s2,$t2
-+ xor $s3,$t3
-+ .set noreorder
-+ bnez $cnt,.Loop_enc
-+ _xtr $i0,$s1,16-2
-+
-+ .set reorder
-+ _xtr $i1,$s2,16-2
-+ _xtr $i2,$s3,16-2
-+ _xtr $i3,$s0,16-2
-+ and $i0,0x3fc
-+ and $i1,0x3fc
-+ and $i2,0x3fc
-+ and $i3,0x3fc
-+ $PTR_ADD $i0,$Tbl
-+ $PTR_ADD $i1,$Tbl
-+ $PTR_ADD $i2,$Tbl
-+ $PTR_ADD $i3,$Tbl
-+ lbu $t0,2($i0) # Te4[s1>>16]
-+ lbu $t1,2($i1) # Te4[s2>>16]
-+ lbu $t2,2($i2) # Te4[s3>>16]
-+ lbu $t3,2($i3) # Te4[s0>>16]
-+
-+ _xtr $i0,$s2,8-2
-+ _xtr $i1,$s3,8-2
-+ _xtr $i2,$s0,8-2
-+ _xtr $i3,$s1,8-2
-+ and $i0,0x3fc
-+ and $i1,0x3fc
-+ and $i2,0x3fc
-+ and $i3,0x3fc
-+ $PTR_ADD $i0,$Tbl
-+ $PTR_ADD $i1,$Tbl
-+ $PTR_ADD $i2,$Tbl
-+ $PTR_ADD $i3,$Tbl
-+ lbu $t4,2($i0) # Te4[s2>>8]
-+ lbu $t5,2($i1) # Te4[s3>>8]
-+ lbu $t6,2($i2) # Te4[s0>>8]
-+ lbu $t7,2($i3) # Te4[s1>>8]
-+
-+ _xtr $i0,$s0,24-2
-+ _xtr $i1,$s1,24-2
-+ _xtr $i2,$s2,24-2
-+ _xtr $i3,$s3,24-2
-+ and $i0,0x3fc
-+ and $i1,0x3fc
-+ and $i2,0x3fc
-+ and $i3,0x3fc
-+ $PTR_ADD $i0,$Tbl
-+ $PTR_ADD $i1,$Tbl
-+ $PTR_ADD $i2,$Tbl
-+ $PTR_ADD $i3,$Tbl
-+ lbu $t8,2($i0) # Te4[s0>>24]
-+ lbu $t9,2($i1) # Te4[s1>>24]
-+ lbu $t10,2($i2) # Te4[s2>>24]
-+ lbu $t11,2($i3) # Te4[s3>>24]
-+
-+ _xtr $i0,$s3,0-2
-+ _xtr $i1,$s0,0-2
-+ _xtr $i2,$s1,0-2
-+ _xtr $i3,$s2,0-2
-+ and $i0,0x3fc
-+ and $i1,0x3fc
-+ and $i2,0x3fc
-+ and $i3,0x3fc
-+
-+ _ins $t0,16
-+ _ins $t1,16
-+ _ins $t2,16
-+ _ins $t3,16
-+
-+ _ins $t4,8
-+ _ins $t5,8
-+ _ins $t6,8
-+ _ins $t7,8
-+
-+ xor $t0,$t4
-+ xor $t1,$t5
-+ xor $t2,$t6
-+ xor $t3,$t7
-+
-+ $PTR_ADD $i0,$Tbl
-+ $PTR_ADD $i1,$Tbl
-+ $PTR_ADD $i2,$Tbl
-+ $PTR_ADD $i3,$Tbl
-+ lbu $t4,2($i0) # Te4[s3]
-+ lbu $t5,2($i1) # Te4[s0]
-+ lbu $t6,2($i2) # Te4[s1]
-+ lbu $t7,2($i3) # Te4[s2]
-+
-+ _ins $t8,24
-+ _ins $t9,24
-+ _ins $t10,24
-+ _ins $t11,24
-+
-+ lw $s0,0($key0)
-+ lw $s1,4($key0)
-+ lw $s2,8($key0)
-+ lw $s3,12($key0)
-+
-+ xor $t0,$t8
-+ xor $t1,$t9
-+ xor $t2,$t10
-+ xor $t3,$t11
-+
-+ _ins $t4,0
-+ _ins $t5,0
-+ _ins $t6,0
-+ _ins $t7,0
-+
-+ xor $t0,$t4
-+ xor $t1,$t5
-+ xor $t2,$t6
-+ xor $t3,$t7
-+
-+ xor $s0,$t0
-+ xor $s1,$t1
-+ xor $s2,$t2
-+ xor $s3,$t3
-+
-+ jr $ra
-+.end _mips_AES_encrypt
-+
-+.align 5
-+.globl AES_encrypt
-+.ent AES_encrypt
-+AES_encrypt:
-+ .frame $sp,$FRAMESIZE,$ra
-+ .mask $SAVED_REGS_MASK,-$SZREG
-+ .set noreorder
-+___
-+$code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
-+ .cpload $pf
-+___
-+$code.=<<___;
-+ $PTR_SUB $sp,$FRAMESIZE
-+ $REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
-+ $REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
-+ $REG_S $s11,$FRAMESIZE-3*$SZREG($sp)
-+ $REG_S $s10,$FRAMESIZE-4*$SZREG($sp)
-+ $REG_S $s9,$FRAMESIZE-5*$SZREG($sp)
-+ $REG_S $s8,$FRAMESIZE-6*$SZREG($sp)
-+ $REG_S $s7,$FRAMESIZE-7*$SZREG($sp)
-+ $REG_S $s6,$FRAMESIZE-8*$SZREG($sp)
-+ $REG_S $s5,$FRAMESIZE-9*$SZREG($sp)
-+ $REG_S $s4,$FRAMESIZE-10*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
-+ $REG_S \$15,$FRAMESIZE-11*$SZREG($sp)
-+ $REG_S \$14,$FRAMESIZE-12*$SZREG($sp)
-+ $REG_S \$13,$FRAMESIZE-13*$SZREG($sp)
-+ $REG_S \$12,$FRAMESIZE-14*$SZREG($sp)
-+ $REG_S $gp,$FRAMESIZE-15*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
-+ .cplocal $Tbl
-+ .cpsetup $pf,$zero,AES_encrypt
-+___
-+$code.=<<___;
-+ .set reorder
-+ la $Tbl,AES_Te # PIC-ified 'load address'
-+
-+ lwl $s0,0+$MSB($inp)
-+ lwl $s1,4+$MSB($inp)
-+ lwl $s2,8+$MSB($inp)
-+ lwl $s3,12+$MSB($inp)
-+ lwr $s0,0+$LSB($inp)
-+ lwr $s1,4+$LSB($inp)
-+ lwr $s2,8+$LSB($inp)
-+ lwr $s3,12+$LSB($inp)
-+
-+ bal _mips_AES_encrypt
-+
-+ swr $s0,0+$LSB($out)
-+ swr $s1,4+$LSB($out)
-+ swr $s2,8+$LSB($out)
-+ swr $s3,12+$LSB($out)
-+ swl $s0,0+$MSB($out)
-+ swl $s1,4+$MSB($out)
-+ swl $s2,8+$MSB($out)
-+ swl $s3,12+$MSB($out)
-+
-+ .set noreorder
-+ $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
-+ $REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
-+ $REG_L $s11,$FRAMESIZE-3*$SZREG($sp)
-+ $REG_L $s10,$FRAMESIZE-4*$SZREG($sp)
-+ $REG_L $s9,$FRAMESIZE-5*$SZREG($sp)
-+ $REG_L $s8,$FRAMESIZE-6*$SZREG($sp)
-+ $REG_L $s7,$FRAMESIZE-7*$SZREG($sp)
-+ $REG_L $s6,$FRAMESIZE-8*$SZREG($sp)
-+ $REG_L $s5,$FRAMESIZE-9*$SZREG($sp)
-+ $REG_L $s4,$FRAMESIZE-10*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+ $REG_L \$15,$FRAMESIZE-11*$SZREG($sp)
-+ $REG_L \$14,$FRAMESIZE-12*$SZREG($sp)
-+ $REG_L \$13,$FRAMESIZE-13*$SZREG($sp)
-+ $REG_L \$12,$FRAMESIZE-14*$SZREG($sp)
-+ $REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
-+___
-+$code.=<<___;
-+ jr $ra
-+ $PTR_ADD $sp,$FRAMESIZE
-+.end AES_encrypt
-+___
-+
-+$code.=<<___;
-+.align 5
-+.ent _mips_AES_decrypt
-+_mips_AES_decrypt:
-+ .frame $sp,0,$ra
-+ .set reorder
-+ lw $t0,0($key)
-+ lw $t1,4($key)
-+ lw $t2,8($key)
-+ lw $t3,12($key)
-+ lw $cnt,240($key)
-+ $PTR_ADD $key0,$key,16
-+
-+ xor $s0,$t0
-+ xor $s1,$t1
-+ xor $s2,$t2
-+ xor $s3,$t3
-+
-+ sub $cnt,1
-+ _xtr $i0,$s3,16-2
-+.Loop_dec:
-+ _xtr $i1,$s0,16-2
-+ _xtr $i2,$s1,16-2
-+ _xtr $i3,$s2,16-2
-+ and $i0,0x3fc
-+ and $i1,0x3fc
-+ and $i2,0x3fc
-+ and $i3,0x3fc
-+ $PTR_ADD $i0,$Tbl
-+ $PTR_ADD $i1,$Tbl
-+ $PTR_ADD $i2,$Tbl
-+ $PTR_ADD $i3,$Tbl
-+ lwl $t0,3($i0) # Td1[s3>>16]
-+ lwl $t1,3($i1) # Td1[s0>>16]
-+ lwl $t2,3($i2) # Td1[s1>>16]
-+ lwl $t3,3($i3) # Td1[s2>>16]
-+ lwr $t0,2($i0) # Td1[s3>>16]
-+ lwr $t1,2($i1) # Td1[s0>>16]
-+ lwr $t2,2($i2) # Td1[s1>>16]
-+ lwr $t3,2($i3) # Td1[s2>>16]
-+
-+ _xtr $i0,$s2,8-2
-+ _xtr $i1,$s3,8-2
-+ _xtr $i2,$s0,8-2
-+ _xtr $i3,$s1,8-2
-+ and $i0,0x3fc
-+ and $i1,0x3fc
-+ and $i2,0x3fc
-+ and $i3,0x3fc
-+ $PTR_ADD $i0,$Tbl
-+ $PTR_ADD $i1,$Tbl
-+ $PTR_ADD $i2,$Tbl
-+ $PTR_ADD $i3,$Tbl
-+ lwl $t4,2($i0) # Td2[s2>>8]
-+ lwl $t5,2($i1) # Td2[s3>>8]
-+ lwl $t6,2($i2) # Td2[s0>>8]
-+ lwl $t7,2($i3) # Td2[s1>>8]
-+ lwr $t4,1($i0) # Td2[s2>>8]
-+ lwr $t5,1($i1) # Td2[s3>>8]
-+ lwr $t6,1($i2) # Td2[s0>>8]
-+ lwr $t7,1($i3) # Td2[s1>>8]
-+
-+ _xtr $i0,$s1,0-2
-+ _xtr $i1,$s2,0-2
-+ _xtr $i2,$s3,0-2
-+ _xtr $i3,$s0,0-2
-+ and $i0,0x3fc
-+ and $i1,0x3fc
-+ and $i2,0x3fc
-+ and $i3,0x3fc
-+ $PTR_ADD $i0,$Tbl
-+ $PTR_ADD $i1,$Tbl
-+ $PTR_ADD $i2,$Tbl
-+ $PTR_ADD $i3,$Tbl
-+ lwl $t8,1($i0) # Td3[s1]
-+ lwl $t9,1($i1) # Td3[s2]
-+ lwl $t10,1($i2) # Td3[s3]
-+ lwl $t11,1($i3) # Td3[s0]
-+ lwr $t8,0($i0) # Td3[s1]
-+ lwr $t9,0($i1) # Td3[s2]
-+ lwr $t10,0($i2) # Td3[s3]
-+ lwr $t11,0($i3) # Td3[s0]
-+
-+ _xtr $i0,$s0,24-2
-+ _xtr $i1,$s1,24-2
-+ _xtr $i2,$s2,24-2
-+ _xtr $i3,$s3,24-2
-+ and $i0,0x3fc
-+ and $i1,0x3fc
-+ and $i2,0x3fc
-+ and $i3,0x3fc
-+ $PTR_ADD $i0,$Tbl
-+ $PTR_ADD $i1,$Tbl
-+ $PTR_ADD $i2,$Tbl
-+ $PTR_ADD $i3,$Tbl
-+
-+ xor $t0,$t4
-+ xor $t1,$t5
-+ xor $t2,$t6
-+ xor $t3,$t7
-+
-+
-+ lw $t4,0($i0) # Td0[s0>>24]
-+ lw $t5,0($i1) # Td0[s1>>24]
-+ lw $t6,0($i2) # Td0[s2>>24]
-+ lw $t7,0($i3) # Td0[s3>>24]
-+
-+ lw $s0,0($key0)
-+ lw $s1,4($key0)
-+ lw $s2,8($key0)
-+ lw $s3,12($key0)
-+
-+ xor $t0,$t8
-+ xor $t1,$t9
-+ xor $t2,$t10
-+ xor $t3,$t11
-+
-+ xor $t0,$t4
-+ xor $t1,$t5
-+ xor $t2,$t6
-+ xor $t3,$t7
-+
-+ sub $cnt,1
-+ $PTR_ADD $key0,16
-+ xor $s0,$t0
-+ xor $s1,$t1
-+ xor $s2,$t2
-+ xor $s3,$t3
-+ .set noreorder
-+ bnez $cnt,.Loop_dec
-+ _xtr $i0,$s3,16-2
-+
-+ .set reorder
-+ lw $t4,1024($Tbl) # prefetch Td4
-+ lw $t5,1024+32($Tbl)
-+ lw $t6,1024+64($Tbl)
-+ lw $t7,1024+96($Tbl)
-+ lw $t8,1024+128($Tbl)
-+ lw $t9,1024+160($Tbl)
-+ lw $t10,1024+192($Tbl)
-+ lw $t11,1024+224($Tbl)
-+
-+ _xtr $i0,$s3,16
-+ _xtr $i1,$s0,16
-+ _xtr $i2,$s1,16
-+ _xtr $i3,$s2,16
-+ and $i0,0xff
-+ and $i1,0xff
-+ and $i2,0xff
-+ and $i3,0xff
-+ $PTR_ADD $i0,$Tbl
-+ $PTR_ADD $i1,$Tbl
-+ $PTR_ADD $i2,$Tbl
-+ $PTR_ADD $i3,$Tbl
-+ lbu $t0,1024($i0) # Td4[s3>>16]
-+ lbu $t1,1024($i1) # Td4[s0>>16]
-+ lbu $t2,1024($i2) # Td4[s1>>16]
-+ lbu $t3,1024($i3) # Td4[s2>>16]
-+
-+ _xtr $i0,$s2,8
-+ _xtr $i1,$s3,8
-+ _xtr $i2,$s0,8
-+ _xtr $i3,$s1,8
-+ and $i0,0xff
-+ and $i1,0xff
-+ and $i2,0xff
-+ and $i3,0xff
-+ $PTR_ADD $i0,$Tbl
-+ $PTR_ADD $i1,$Tbl
-+ $PTR_ADD $i2,$Tbl
-+ $PTR_ADD $i3,$Tbl
-+ lbu $t4,1024($i0) # Td4[s2>>8]
-+ lbu $t5,1024($i1) # Td4[s3>>8]
-+ lbu $t6,1024($i2) # Td4[s0>>8]
-+ lbu $t7,1024($i3) # Td4[s1>>8]
-+
-+ _xtr $i0,$s0,24
-+ _xtr $i1,$s1,24
-+ _xtr $i2,$s2,24
-+ _xtr $i3,$s3,24
-+ $PTR_ADD $i0,$Tbl
-+ $PTR_ADD $i1,$Tbl
-+ $PTR_ADD $i2,$Tbl
-+ $PTR_ADD $i3,$Tbl
-+ lbu $t8,1024($i0) # Td4[s0>>24]
-+ lbu $t9,1024($i1) # Td4[s1>>24]
-+ lbu $t10,1024($i2) # Td4[s2>>24]
-+ lbu $t11,1024($i3) # Td4[s3>>24]
-+
-+ _xtr $i0,$s1,0
-+ _xtr $i1,$s2,0
-+ _xtr $i2,$s3,0
-+ _xtr $i3,$s0,0
-+
-+ _ins $t0,16
-+ _ins $t1,16
-+ _ins $t2,16
-+ _ins $t3,16
-+
-+ _ins $t4,8
-+ _ins $t5,8
-+ _ins $t6,8
-+ _ins $t7,8
-+
-+ xor $t0,$t4
-+ xor $t1,$t5
-+ xor $t2,$t6
-+ xor $t3,$t7
-+
-+ $PTR_ADD $i0,$Tbl
-+ $PTR_ADD $i1,$Tbl
-+ $PTR_ADD $i2,$Tbl
-+ $PTR_ADD $i3,$Tbl
-+ lbu $t4,1024($i0) # Td4[s1]
-+ lbu $t5,1024($i1) # Td4[s2]
-+ lbu $t6,1024($i2) # Td4[s3]
-+ lbu $t7,1024($i3) # Td4[s0]
-+
-+ _ins $t8,24
-+ _ins $t9,24
-+ _ins $t10,24
-+ _ins $t11,24
-+
-+ lw $s0,0($key0)
-+ lw $s1,4($key0)
-+ lw $s2,8($key0)
-+ lw $s3,12($key0)
-+
-+ _ins $t4,0
-+ _ins $t5,0
-+ _ins $t6,0
-+ _ins $t7,0
-+
-+
-+ xor $t0,$t8
-+ xor $t1,$t9
-+ xor $t2,$t10
-+ xor $t3,$t11
-+
-+ xor $t0,$t4
-+ xor $t1,$t5
-+ xor $t2,$t6
-+ xor $t3,$t7
-+
-+ xor $s0,$t0
-+ xor $s1,$t1
-+ xor $s2,$t2
-+ xor $s3,$t3
-+
-+ jr $ra
-+.end _mips_AES_decrypt
-+
-+.align 5
-+.globl AES_decrypt
-+.ent AES_decrypt
-+AES_decrypt:
-+ .frame $sp,$FRAMESIZE,$ra
-+ .mask $SAVED_REGS_MASK,-$SZREG
-+ .set noreorder
-+___
-+$code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
-+ .cpload $pf
-+___
-+$code.=<<___;
-+ $PTR_SUB $sp,$FRAMESIZE
-+ $REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
-+ $REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
-+ $REG_S $s11,$FRAMESIZE-3*$SZREG($sp)
-+ $REG_S $s10,$FRAMESIZE-4*$SZREG($sp)
-+ $REG_S $s9,$FRAMESIZE-5*$SZREG($sp)
-+ $REG_S $s8,$FRAMESIZE-6*$SZREG($sp)
-+ $REG_S $s7,$FRAMESIZE-7*$SZREG($sp)
-+ $REG_S $s6,$FRAMESIZE-8*$SZREG($sp)
-+ $REG_S $s5,$FRAMESIZE-9*$SZREG($sp)
-+ $REG_S $s4,$FRAMESIZE-10*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
-+ $REG_S \$15,$FRAMESIZE-11*$SZREG($sp)
-+ $REG_S \$14,$FRAMESIZE-12*$SZREG($sp)
-+ $REG_S \$13,$FRAMESIZE-13*$SZREG($sp)
-+ $REG_S \$12,$FRAMESIZE-14*$SZREG($sp)
-+ $REG_S $gp,$FRAMESIZE-15*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
-+ .cplocal $Tbl
-+ .cpsetup $pf,$zero,AES_decrypt
-+___
-+$code.=<<___;
-+ .set reorder
-+ la $Tbl,AES_Td # PIC-ified 'load address'
-+
-+ lwl $s0,0+$MSB($inp)
-+ lwl $s1,4+$MSB($inp)
-+ lwl $s2,8+$MSB($inp)
-+ lwl $s3,12+$MSB($inp)
-+ lwr $s0,0+$LSB($inp)
-+ lwr $s1,4+$LSB($inp)
-+ lwr $s2,8+$LSB($inp)
-+ lwr $s3,12+$LSB($inp)
-+
-+ bal _mips_AES_decrypt
-+
-+ swr $s0,0+$LSB($out)
-+ swr $s1,4+$LSB($out)
-+ swr $s2,8+$LSB($out)
-+ swr $s3,12+$LSB($out)
-+ swl $s0,0+$MSB($out)
-+ swl $s1,4+$MSB($out)
-+ swl $s2,8+$MSB($out)
-+ swl $s3,12+$MSB($out)
-+
-+ .set noreorder
-+ $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
-+ $REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
-+ $REG_L $s11,$FRAMESIZE-3*$SZREG($sp)
-+ $REG_L $s10,$FRAMESIZE-4*$SZREG($sp)
-+ $REG_L $s9,$FRAMESIZE-5*$SZREG($sp)
-+ $REG_L $s8,$FRAMESIZE-6*$SZREG($sp)
-+ $REG_L $s7,$FRAMESIZE-7*$SZREG($sp)
-+ $REG_L $s6,$FRAMESIZE-8*$SZREG($sp)
-+ $REG_L $s5,$FRAMESIZE-9*$SZREG($sp)
-+ $REG_L $s4,$FRAMESIZE-10*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+ $REG_L \$15,$FRAMESIZE-11*$SZREG($sp)
-+ $REG_L \$14,$FRAMESIZE-12*$SZREG($sp)
-+ $REG_L \$13,$FRAMESIZE-13*$SZREG($sp)
-+ $REG_L \$12,$FRAMESIZE-14*$SZREG($sp)
-+ $REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
-+___
-+$code.=<<___;
-+ jr $ra
-+ $PTR_ADD $sp,$FRAMESIZE
-+.end AES_decrypt
-+___
-+}}}
-+
-+{{{
-+my $FRAMESIZE=8*$SZREG;
-+my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc000f008 : 0xc0000000;
-+
-+my ($inp,$bits,$key,$Tbl)=($a0,$a1,$a2,$a3);
-+my ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
-+my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
-+my ($rcon,$cnt)=($gp,$fp);
-+
-+$code.=<<___;
-+.align 5
-+.ent _mips_AES_set_encrypt_key
-+_mips_AES_set_encrypt_key:
-+ .frame $sp,0,$ra
-+ .set noreorder
-+ beqz $inp,.Lekey_done
-+ li $t0,-1
-+ beqz $key,.Lekey_done
-+ $PTR_ADD $rcon,$Tbl,1024+256
-+
-+ .set reorder
-+ lwl $rk0,0+$MSB($inp) # load 128 bits
-+ lwl $rk1,4+$MSB($inp)
-+ lwl $rk2,8+$MSB($inp)
-+ lwl $rk3,12+$MSB($inp)
-+ li $at,128
-+ lwr $rk0,0+$LSB($inp)
-+ lwr $rk1,4+$LSB($inp)
-+ lwr $rk2,8+$LSB($inp)
-+ lwr $rk3,12+$LSB($inp)
-+ .set noreorder
-+ beq $bits,$at,.L128bits
-+ li $cnt,10
-+
-+ .set reorder
-+ lwl $rk4,16+$MSB($inp) # load 192 bits
-+ lwl $rk5,20+$MSB($inp)
-+ li $at,192
-+ lwr $rk4,16+$LSB($inp)
-+ lwr $rk5,20+$LSB($inp)
-+ .set noreorder
-+ beq $bits,$at,.L192bits
-+ li $cnt,8
-+
-+ .set reorder
-+ lwl $rk6,24+$MSB($inp) # load 256 bits
-+ lwl $rk7,28+$MSB($inp)
-+ li $at,256
-+ lwr $rk6,24+$LSB($inp)
-+ lwr $rk7,28+$LSB($inp)
-+ .set noreorder
-+ beq $bits,$at,.L256bits
-+ li $cnt,7
-+
-+ b .Lekey_done
-+ li $t0,-2
-+
-+.align 4
-+.L128bits:
-+ .set reorder
-+ srl $i0,$rk3,16
-+ srl $i1,$rk3,8
-+ and $i0,0xff
-+ and $i1,0xff
-+ and $i2,$rk3,0xff
-+ srl $i3,$rk3,24
-+ $PTR_ADD $i0,$Tbl
-+ $PTR_ADD $i1,$Tbl
-+ $PTR_ADD $i2,$Tbl
-+ $PTR_ADD $i3,$Tbl
-+ lbu $i0,1024($i0)
-+ lbu $i1,1024($i1)
-+ lbu $i2,1024($i2)
-+ lbu $i3,1024($i3)
-+
-+ sw $rk0,0($key)
-+ sw $rk1,4($key)
-+ sw $rk2,8($key)
-+ sw $rk3,12($key)
-+ sub $cnt,1
-+ $PTR_ADD $key,16
-+
-+ _bias $i0,24
-+ _bias $i1,16
-+ _bias $i2,8
-+ _bias $i3,0
-+
-+ xor $rk0,$i0
-+ lw $i0,0($rcon)
-+ xor $rk0,$i1
-+ xor $rk0,$i2
-+ xor $rk0,$i3
-+ xor $rk0,$i0
-+
-+ xor $rk1,$rk0
-+ xor $rk2,$rk1
-+ xor $rk3,$rk2
-+
-+ .set noreorder
-+ bnez $cnt,.L128bits
-+ $PTR_ADD $rcon,4
-+
-+ sw $rk0,0($key)
-+ sw $rk1,4($key)
-+ sw $rk2,8($key)
-+ li $cnt,10
-+ sw $rk3,12($key)
-+ li $t0,0
-+ sw $cnt,80($key)
-+ b .Lekey_done
-+ $PTR_SUB $key,10*16
-+
-+.align 4
-+.L192bits:
-+ .set reorder
-+ srl $i0,$rk5,16
-+ srl $i1,$rk5,8
-+ and $i0,0xff
-+ and $i1,0xff
-+ and $i2,$rk5,0xff
-+ srl $i3,$rk5,24
-+ $PTR_ADD $i0,$Tbl
-+ $PTR_ADD $i1,$Tbl
-+ $PTR_ADD $i2,$Tbl
-+ $PTR_ADD $i3,$Tbl
-+ lbu $i0,1024($i0)
-+ lbu $i1,1024($i1)
-+ lbu $i2,1024($i2)
-+ lbu $i3,1024($i3)
-+
-+ sw $rk0,0($key)
-+ sw $rk1,4($key)
-+ sw $rk2,8($key)
-+ sw $rk3,12($key)
-+ sw $rk4,16($key)
-+ sw $rk5,20($key)
-+ sub $cnt,1
-+ $PTR_ADD $key,24
-+
-+ _bias $i0,24
-+ _bias $i1,16
-+ _bias $i2,8
-+ _bias $i3,0
-+
-+ xor $rk0,$i0
-+ lw $i0,0($rcon)
-+ xor $rk0,$i1
-+ xor $rk0,$i2
-+ xor $rk0,$i3
-+ xor $rk0,$i0
-+
-+ xor $rk1,$rk0
-+ xor $rk2,$rk1
-+ xor $rk3,$rk2
-+ xor $rk4,$rk3
-+ xor $rk5,$rk4
-+
-+ .set noreorder
-+ bnez $cnt,.L192bits
-+ $PTR_ADD $rcon,4
-+
-+ sw $rk0,0($key)
-+ sw $rk1,4($key)
-+ sw $rk2,8($key)
-+ li $cnt,12
-+ sw $rk3,12($key)
-+ li $t0,0
-+ sw $cnt,48($key)
-+ b .Lekey_done
-+ $PTR_SUB $key,12*16
-+
-+.align 4
-+.L256bits:
-+ .set reorder
-+ srl $i0,$rk7,16
-+ srl $i1,$rk7,8
-+ and $i0,0xff
-+ and $i1,0xff
-+ and $i2,$rk7,0xff
-+ srl $i3,$rk7,24
-+ $PTR_ADD $i0,$Tbl
-+ $PTR_ADD $i1,$Tbl
-+ $PTR_ADD $i2,$Tbl
-+ $PTR_ADD $i3,$Tbl
-+ lbu $i0,1024($i0)
-+ lbu $i1,1024($i1)
-+ lbu $i2,1024($i2)
-+ lbu $i3,1024($i3)
-+
-+ sw $rk0,0($key)
-+ sw $rk1,4($key)
-+ sw $rk2,8($key)
-+ sw $rk3,12($key)
-+ sw $rk4,16($key)
-+ sw $rk5,20($key)
-+ sw $rk6,24($key)
-+ sw $rk7,28($key)
-+ sub $cnt,1
-+
-+ _bias $i0,24
-+ _bias $i1,16
-+ _bias $i2,8
-+ _bias $i3,0
-+
-+ xor $rk0,$i0
-+ lw $i0,0($rcon)
-+ xor $rk0,$i1
-+ xor $rk0,$i2
-+ xor $rk0,$i3
-+ xor $rk0,$i0
-+
-+ xor $rk1,$rk0
-+ xor $rk2,$rk1
-+ xor $rk3,$rk2
-+ beqz $cnt,.L256bits_done
-+
-+ srl $i0,$rk3,24
-+ srl $i1,$rk3,16
-+ srl $i2,$rk3,8
-+ and $i3,$rk3,0xff
-+ and $i1,0xff
-+ and $i2,0xff
-+ $PTR_ADD $i0,$Tbl
-+ $PTR_ADD $i1,$Tbl
-+ $PTR_ADD $i2,$Tbl
-+ $PTR_ADD $i3,$Tbl
-+ lbu $i0,1024($i0)
-+ lbu $i1,1024($i1)
-+ lbu $i2,1024($i2)
-+ lbu $i3,1024($i3)
-+ sll $i0,24
-+ sll $i1,16
-+ sll $i2,8
-+
-+ xor $rk4,$i0
-+ xor $rk4,$i1
-+ xor $rk4,$i2
-+ xor $rk4,$i3
-+
-+ xor $rk5,$rk4
-+ xor $rk6,$rk5
-+ xor $rk7,$rk6
-+
-+ $PTR_ADD $key,32
-+ .set noreorder
-+ b .L256bits
-+ $PTR_ADD $rcon,4
-+
-+.L256bits_done:
-+ sw $rk0,32($key)
-+ sw $rk1,36($key)
-+ sw $rk2,40($key)
-+ li $cnt,14
-+ sw $rk3,44($key)
-+ li $t0,0
-+ sw $cnt,48($key)
-+ $PTR_SUB $key,12*16
-+
-+.Lekey_done:
-+ jr $ra
-+ nop
-+.end _mips_AES_set_encrypt_key
-+
-+.globl AES_set_encrypt_key
-+.ent AES_set_encrypt_key
-+AES_set_encrypt_key:
-+ .frame $sp,$FRAMESIZE,$ra
-+ .mask $SAVED_REGS_MASK,-$SZREG
-+ .set noreorder
-+___
-+$code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
-+ .cpload $pf
-+___
-+$code.=<<___;
-+ $PTR_SUB $sp,$FRAMESIZE
-+ $REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
-+ $REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
-+ $REG_S $s3,$FRAMESIZE-3*$SZREG($sp)
-+ $REG_S $s2,$FRAMESIZE-4*$SZREG($sp)
-+ $REG_S $s1,$FRAMESIZE-5*$SZREG($sp)
-+ $REG_S $s0,$FRAMESIZE-6*$SZREG($sp)
-+ $REG_S $gp,$FRAMESIZE-7*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
-+ .cplocal $Tbl
-+ .cpsetup $pf,$zero,AES_set_encrypt_key
-+___
-+$code.=<<___;
-+ .set reorder
-+ la $Tbl,AES_Te # PIC-ified 'load address'
-+
-+ bal _mips_AES_set_encrypt_key
-+
-+ .set noreorder
-+ move $a0,$t0
-+ $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
-+ $REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+ $REG_L $s3,$FRAMESIZE-11*$SZREG($sp)
-+ $REG_L $s2,$FRAMESIZE-12*$SZREG($sp)
-+ $REG_L $s1,$FRAMESIZE-13*$SZREG($sp)
-+ $REG_L $s0,$FRAMESIZE-14*$SZREG($sp)
-+ $REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
-+___
-+$code.=<<___;
-+ jr $ra
-+ $PTR_ADD $sp,$FRAMESIZE
-+.end AES_set_encrypt_key
-+___
-+
-+my ($head,$tail)=($inp,$bits);
-+my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
-+my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2);
-+$code.=<<___;
-+.align 5
-+.globl AES_set_decrypt_key
-+.ent AES_set_decrypt_key
-+AES_set_decrypt_key:
-+ .frame $sp,$FRAMESIZE,$ra
-+ .mask $SAVED_REGS_MASK,-$SZREG
-+ .set noreorder
-+___
-+$code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
-+ .cpload $pf
-+___
-+$code.=<<___;
-+ $PTR_SUB $sp,$FRAMESIZE
-+ $REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
-+ $REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
-+ $REG_S $s3,$FRAMESIZE-3*$SZREG($sp)
-+ $REG_S $s2,$FRAMESIZE-4*$SZREG($sp)
-+ $REG_S $s1,$FRAMESIZE-5*$SZREG($sp)
-+ $REG_S $s0,$FRAMESIZE-6*$SZREG($sp)
-+ $REG_S $gp,$FRAMESIZE-7*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
-+ .cplocal $Tbl
-+ .cpsetup $pf,$zero,AES_set_decrypt_key
-+___
-+$code.=<<___;
-+ .set reorder
-+ la $Tbl,AES_Te # PIC-ified 'load address'
-+
-+ bal _mips_AES_set_encrypt_key
-+
-+ bltz $t0,.Ldkey_done
-+
-+ sll $at,$cnt,4
-+ $PTR_ADD $head,$key,0
-+ $PTR_ADD $tail,$key,$at
-+.align 4
-+.Lswap:
-+ lw $rk0,0($head)
-+ lw $rk1,4($head)
-+ lw $rk2,8($head)
-+ lw $rk3,12($head)
-+ lw $rk4,0($tail)
-+ lw $rk5,4($tail)
-+ lw $rk6,8($tail)
-+ lw $rk7,12($tail)
-+ sw $rk0,0($tail)
-+ sw $rk1,4($tail)
-+ sw $rk2,8($tail)
-+ sw $rk3,12($tail)
-+ $PTR_ADD $head,16
-+ $PTR_SUB $tail,16
-+ sw $rk4,-16($head)
-+ sw $rk5,-12($head)
-+ sw $rk6,-8($head)
-+ sw $rk7,-4($head)
-+ bne $head,$tail,.Lswap
-+
-+ lw $tp1,16($key) # modulo-scheduled
-+ lui $x80808080,0x8080
-+ sub $cnt,1
-+ or $x80808080,0x8080
-+ sll $cnt,2
-+ $PTR_ADD $key,16
-+ lui $x1b1b1b1b,0x1b1b
-+ nor $x7f7f7f7f,$zero,$x80808080
-+ or $x1b1b1b1b,0x1b1b
-+.align 4
-+.Lmix:
-+ and $m,$tp1,$x80808080
-+ and $tp2,$tp1,$x7f7f7f7f
-+ srl $tp4,$m,7
-+ addu $tp2,$tp2 # tp2<<1
-+ subu $m,$tp4
-+ and $m,$x1b1b1b1b
-+ xor $tp2,$m
-+
-+ and $m,$tp2,$x80808080
-+ and $tp4,$tp2,$x7f7f7f7f
-+ srl $tp8,$m,7
-+ addu $tp4,$tp4 # tp4<<1
-+ subu $m,$tp8
-+ and $m,$x1b1b1b1b
-+ xor $tp4,$m
-+
-+ and $m,$tp4,$x80808080
-+ and $tp8,$tp4,$x7f7f7f7f
-+ srl $tp9,$m,7
-+ addu $tp8,$tp8 # tp8<<1
-+ subu $m,$tp9
-+ and $m,$x1b1b1b1b
-+ xor $tp8,$m
-+
-+ xor $tp9,$tp8,$tp1
-+ xor $tpe,$tp8,$tp4
-+ xor $tpb,$tp9,$tp2
-+ xor $tpd,$tp9,$tp4
-+
-+ _ror $tp1,$tpd,16
-+ xor $tpe,$tp2
-+ _ror $tp2,$tpd,-16
-+ xor $tpe,$tp1
-+ _ror $tp1,$tp9,8
-+ xor $tpe,$tp2
-+ _ror $tp2,$tp9,-24
-+ xor $tpe,$tp1
-+ _ror $tp1,$tpb,24
-+ xor $tpe,$tp2
-+ _ror $tp2,$tpb,-8
-+ xor $tpe,$tp1
-+ lw $tp1,4($key) # modulo-scheduled
-+ xor $tpe,$tp2
-+ sub $cnt,1
-+ sw $tpe,0($key)
-+ $PTR_ADD $key,4
-+ bnez $cnt,.Lmix
-+
-+ li $t0,0
-+.Ldkey_done:
-+ .set noreorder
-+ move $a0,$t0
-+ $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
-+ $REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+ $REG_L $s3,$FRAMESIZE-11*$SZREG($sp)
-+ $REG_L $s2,$FRAMESIZE-12*$SZREG($sp)
-+ $REG_L $s1,$FRAMESIZE-13*$SZREG($sp)
-+ $REG_L $s0,$FRAMESIZE-14*$SZREG($sp)
-+ $REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
-+___
-+$code.=<<___;
-+ jr $ra
-+ $PTR_ADD $sp,$FRAMESIZE
-+.end AES_set_decrypt_key
-+___
-+}}}
-+
-+######################################################################
-+# Tables are kept in endian-neutral manner
-+$code.=<<___;
-+.rdata
-+.align 6
-+AES_Te:
-+.byte 0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84 # Te0
-+.byte 0xee,0x77,0x77,0x99, 0xf6,0x7b,0x7b,0x8d
-+.byte 0xff,0xf2,0xf2,0x0d, 0xd6,0x6b,0x6b,0xbd
-+.byte 0xde,0x6f,0x6f,0xb1, 0x91,0xc5,0xc5,0x54
-+.byte 0x60,0x30,0x30,0x50, 0x02,0x01,0x01,0x03
-+.byte 0xce,0x67,0x67,0xa9, 0x56,0x2b,0x2b,0x7d
-+.byte 0xe7,0xfe,0xfe,0x19, 0xb5,0xd7,0xd7,0x62
-+.byte 0x4d,0xab,0xab,0xe6, 0xec,0x76,0x76,0x9a
-+.byte 0x8f,0xca,0xca,0x45, 0x1f,0x82,0x82,0x9d
-+.byte 0x89,0xc9,0xc9,0x40, 0xfa,0x7d,0x7d,0x87
-+.byte 0xef,0xfa,0xfa,0x15, 0xb2,0x59,0x59,0xeb
-+.byte 0x8e,0x47,0x47,0xc9, 0xfb,0xf0,0xf0,0x0b
-+.byte 0x41,0xad,0xad,0xec, 0xb3,0xd4,0xd4,0x67
-+.byte 0x5f,0xa2,0xa2,0xfd, 0x45,0xaf,0xaf,0xea
-+.byte 0x23,0x9c,0x9c,0xbf, 0x53,0xa4,0xa4,0xf7
-+.byte 0xe4,0x72,0x72,0x96, 0x9b,0xc0,0xc0,0x5b
-+.byte 0x75,0xb7,0xb7,0xc2, 0xe1,0xfd,0xfd,0x1c
-+.byte 0x3d,0x93,0x93,0xae, 0x4c,0x26,0x26,0x6a
-+.byte 0x6c,0x36,0x36,0x5a, 0x7e,0x3f,0x3f,0x41
-+.byte 0xf5,0xf7,0xf7,0x02, 0x83,0xcc,0xcc,0x4f
-+.byte 0x68,0x34,0x34,0x5c, 0x51,0xa5,0xa5,0xf4
-+.byte 0xd1,0xe5,0xe5,0x34, 0xf9,0xf1,0xf1,0x08
-+.byte 0xe2,0x71,0x71,0x93, 0xab,0xd8,0xd8,0x73
-+.byte 0x62,0x31,0x31,0x53, 0x2a,0x15,0x15,0x3f
-+.byte 0x08,0x04,0x04,0x0c, 0x95,0xc7,0xc7,0x52
-+.byte 0x46,0x23,0x23,0x65, 0x9d,0xc3,0xc3,0x5e
-+.byte 0x30,0x18,0x18,0x28, 0x37,0x96,0x96,0xa1
-+.byte 0x0a,0x05,0x05,0x0f, 0x2f,0x9a,0x9a,0xb5
-+.byte 0x0e,0x07,0x07,0x09, 0x24,0x12,0x12,0x36
-+.byte 0x1b,0x80,0x80,0x9b, 0xdf,0xe2,0xe2,0x3d
-+.byte 0xcd,0xeb,0xeb,0x26, 0x4e,0x27,0x27,0x69
-+.byte 0x7f,0xb2,0xb2,0xcd, 0xea,0x75,0x75,0x9f
-+.byte 0x12,0x09,0x09,0x1b, 0x1d,0x83,0x83,0x9e
-+.byte 0x58,0x2c,0x2c,0x74, 0x34,0x1a,0x1a,0x2e
-+.byte 0x36,0x1b,0x1b,0x2d, 0xdc,0x6e,0x6e,0xb2
-+.byte 0xb4,0x5a,0x5a,0xee, 0x5b,0xa0,0xa0,0xfb
-+.byte 0xa4,0x52,0x52,0xf6, 0x76,0x3b,0x3b,0x4d
-+.byte 0xb7,0xd6,0xd6,0x61, 0x7d,0xb3,0xb3,0xce
-+.byte 0x52,0x29,0x29,0x7b, 0xdd,0xe3,0xe3,0x3e
-+.byte 0x5e,0x2f,0x2f,0x71, 0x13,0x84,0x84,0x97
-+.byte 0xa6,0x53,0x53,0xf5, 0xb9,0xd1,0xd1,0x68
-+.byte 0x00,0x00,0x00,0x00, 0xc1,0xed,0xed,0x2c
-+.byte 0x40,0x20,0x20,0x60, 0xe3,0xfc,0xfc,0x1f
-+.byte 0x79,0xb1,0xb1,0xc8, 0xb6,0x5b,0x5b,0xed
-+.byte 0xd4,0x6a,0x6a,0xbe, 0x8d,0xcb,0xcb,0x46
-+.byte 0x67,0xbe,0xbe,0xd9, 0x72,0x39,0x39,0x4b
-+.byte 0x94,0x4a,0x4a,0xde, 0x98,0x4c,0x4c,0xd4
-+.byte 0xb0,0x58,0x58,0xe8, 0x85,0xcf,0xcf,0x4a
-+.byte 0xbb,0xd0,0xd0,0x6b, 0xc5,0xef,0xef,0x2a
-+.byte 0x4f,0xaa,0xaa,0xe5, 0xed,0xfb,0xfb,0x16
-+.byte 0x86,0x43,0x43,0xc5, 0x9a,0x4d,0x4d,0xd7
-+.byte 0x66,0x33,0x33,0x55, 0x11,0x85,0x85,0x94
-+.byte 0x8a,0x45,0x45,0xcf, 0xe9,0xf9,0xf9,0x10
-+.byte 0x04,0x02,0x02,0x06, 0xfe,0x7f,0x7f,0x81
-+.byte 0xa0,0x50,0x50,0xf0, 0x78,0x3c,0x3c,0x44
-+.byte 0x25,0x9f,0x9f,0xba, 0x4b,0xa8,0xa8,0xe3
-+.byte 0xa2,0x51,0x51,0xf3, 0x5d,0xa3,0xa3,0xfe
-+.byte 0x80,0x40,0x40,0xc0, 0x05,0x8f,0x8f,0x8a
-+.byte 0x3f,0x92,0x92,0xad, 0x21,0x9d,0x9d,0xbc
-+.byte 0x70,0x38,0x38,0x48, 0xf1,0xf5,0xf5,0x04
-+.byte 0x63,0xbc,0xbc,0xdf, 0x77,0xb6,0xb6,0xc1
-+.byte 0xaf,0xda,0xda,0x75, 0x42,0x21,0x21,0x63
-+.byte 0x20,0x10,0x10,0x30, 0xe5,0xff,0xff,0x1a
-+.byte 0xfd,0xf3,0xf3,0x0e, 0xbf,0xd2,0xd2,0x6d
-+.byte 0x81,0xcd,0xcd,0x4c, 0x18,0x0c,0x0c,0x14
-+.byte 0x26,0x13,0x13,0x35, 0xc3,0xec,0xec,0x2f
-+.byte 0xbe,0x5f,0x5f,0xe1, 0x35,0x97,0x97,0xa2
-+.byte 0x88,0x44,0x44,0xcc, 0x2e,0x17,0x17,0x39
-+.byte 0x93,0xc4,0xc4,0x57, 0x55,0xa7,0xa7,0xf2
-+.byte 0xfc,0x7e,0x7e,0x82, 0x7a,0x3d,0x3d,0x47
-+.byte 0xc8,0x64,0x64,0xac, 0xba,0x5d,0x5d,0xe7
-+.byte 0x32,0x19,0x19,0x2b, 0xe6,0x73,0x73,0x95
-+.byte 0xc0,0x60,0x60,0xa0, 0x19,0x81,0x81,0x98
-+.byte 0x9e,0x4f,0x4f,0xd1, 0xa3,0xdc,0xdc,0x7f
-+.byte 0x44,0x22,0x22,0x66, 0x54,0x2a,0x2a,0x7e
-+.byte 0x3b,0x90,0x90,0xab, 0x0b,0x88,0x88,0x83
-+.byte 0x8c,0x46,0x46,0xca, 0xc7,0xee,0xee,0x29
-+.byte 0x6b,0xb8,0xb8,0xd3, 0x28,0x14,0x14,0x3c
-+.byte 0xa7,0xde,0xde,0x79, 0xbc,0x5e,0x5e,0xe2
-+.byte 0x16,0x0b,0x0b,0x1d, 0xad,0xdb,0xdb,0x76
-+.byte 0xdb,0xe0,0xe0,0x3b, 0x64,0x32,0x32,0x56
-+.byte 0x74,0x3a,0x3a,0x4e, 0x14,0x0a,0x0a,0x1e
-+.byte 0x92,0x49,0x49,0xdb, 0x0c,0x06,0x06,0x0a
-+.byte 0x48,0x24,0x24,0x6c, 0xb8,0x5c,0x5c,0xe4
-+.byte 0x9f,0xc2,0xc2,0x5d, 0xbd,0xd3,0xd3,0x6e
-+.byte 0x43,0xac,0xac,0xef, 0xc4,0x62,0x62,0xa6
-+.byte 0x39,0x91,0x91,0xa8, 0x31,0x95,0x95,0xa4
-+.byte 0xd3,0xe4,0xe4,0x37, 0xf2,0x79,0x79,0x8b
-+.byte 0xd5,0xe7,0xe7,0x32, 0x8b,0xc8,0xc8,0x43
-+.byte 0x6e,0x37,0x37,0x59, 0xda,0x6d,0x6d,0xb7
-+.byte 0x01,0x8d,0x8d,0x8c, 0xb1,0xd5,0xd5,0x64
-+.byte 0x9c,0x4e,0x4e,0xd2, 0x49,0xa9,0xa9,0xe0
-+.byte 0xd8,0x6c,0x6c,0xb4, 0xac,0x56,0x56,0xfa
-+.byte 0xf3,0xf4,0xf4,0x07, 0xcf,0xea,0xea,0x25
-+.byte 0xca,0x65,0x65,0xaf, 0xf4,0x7a,0x7a,0x8e
-+.byte 0x47,0xae,0xae,0xe9, 0x10,0x08,0x08,0x18
-+.byte 0x6f,0xba,0xba,0xd5, 0xf0,0x78,0x78,0x88
-+.byte 0x4a,0x25,0x25,0x6f, 0x5c,0x2e,0x2e,0x72
-+.byte 0x38,0x1c,0x1c,0x24, 0x57,0xa6,0xa6,0xf1
-+.byte 0x73,0xb4,0xb4,0xc7, 0x97,0xc6,0xc6,0x51
-+.byte 0xcb,0xe8,0xe8,0x23, 0xa1,0xdd,0xdd,0x7c
-+.byte 0xe8,0x74,0x74,0x9c, 0x3e,0x1f,0x1f,0x21
-+.byte 0x96,0x4b,0x4b,0xdd, 0x61,0xbd,0xbd,0xdc
-+.byte 0x0d,0x8b,0x8b,0x86, 0x0f,0x8a,0x8a,0x85
-+.byte 0xe0,0x70,0x70,0x90, 0x7c,0x3e,0x3e,0x42
-+.byte 0x71,0xb5,0xb5,0xc4, 0xcc,0x66,0x66,0xaa
-+.byte 0x90,0x48,0x48,0xd8, 0x06,0x03,0x03,0x05
-+.byte 0xf7,0xf6,0xf6,0x01, 0x1c,0x0e,0x0e,0x12
-+.byte 0xc2,0x61,0x61,0xa3, 0x6a,0x35,0x35,0x5f
-+.byte 0xae,0x57,0x57,0xf9, 0x69,0xb9,0xb9,0xd0
-+.byte 0x17,0x86,0x86,0x91, 0x99,0xc1,0xc1,0x58
-+.byte 0x3a,0x1d,0x1d,0x27, 0x27,0x9e,0x9e,0xb9
-+.byte 0xd9,0xe1,0xe1,0x38, 0xeb,0xf8,0xf8,0x13
-+.byte 0x2b,0x98,0x98,0xb3, 0x22,0x11,0x11,0x33
-+.byte 0xd2,0x69,0x69,0xbb, 0xa9,0xd9,0xd9,0x70
-+.byte 0x07,0x8e,0x8e,0x89, 0x33,0x94,0x94,0xa7
-+.byte 0x2d,0x9b,0x9b,0xb6, 0x3c,0x1e,0x1e,0x22
-+.byte 0x15,0x87,0x87,0x92, 0xc9,0xe9,0xe9,0x20
-+.byte 0x87,0xce,0xce,0x49, 0xaa,0x55,0x55,0xff
-+.byte 0x50,0x28,0x28,0x78, 0xa5,0xdf,0xdf,0x7a
-+.byte 0x03,0x8c,0x8c,0x8f, 0x59,0xa1,0xa1,0xf8
-+.byte 0x09,0x89,0x89,0x80, 0x1a,0x0d,0x0d,0x17
-+.byte 0x65,0xbf,0xbf,0xda, 0xd7,0xe6,0xe6,0x31
-+.byte 0x84,0x42,0x42,0xc6, 0xd0,0x68,0x68,0xb8
-+.byte 0x82,0x41,0x41,0xc3, 0x29,0x99,0x99,0xb0
-+.byte 0x5a,0x2d,0x2d,0x77, 0x1e,0x0f,0x0f,0x11
-+.byte 0x7b,0xb0,0xb0,0xcb, 0xa8,0x54,0x54,0xfc
-+.byte 0x6d,0xbb,0xbb,0xd6, 0x2c,0x16,0x16,0x3a
-+
-+.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 # Te4
-+.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
-+.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
-+.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
-+.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
-+.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
-+.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
-+.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
-+.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
-+.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
-+.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
-+.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
-+.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
-+.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
-+.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
-+.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
-+.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
-+.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
-+.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
-+.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
-+.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
-+.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
-+.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
-+.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
-+.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
-+.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
-+.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
-+.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
-+.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
-+.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
-+.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
-+.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
-+
-+.byte 0x01,0x00,0x00,0x00, 0x02,0x00,0x00,0x00 # rcon
-+.byte 0x04,0x00,0x00,0x00, 0x08,0x00,0x00,0x00
-+.byte 0x10,0x00,0x00,0x00, 0x20,0x00,0x00,0x00
-+.byte 0x40,0x00,0x00,0x00, 0x80,0x00,0x00,0x00
-+.byte 0x1B,0x00,0x00,0x00, 0x36,0x00,0x00,0x00
-+
-+.align 6
-+AES_Td:
-+.byte 0x51,0xf4,0xa7,0x50, 0x7e,0x41,0x65,0x53 # Td0
-+.byte 0x1a,0x17,0xa4,0xc3, 0x3a,0x27,0x5e,0x96
-+.byte 0x3b,0xab,0x6b,0xcb, 0x1f,0x9d,0x45,0xf1
-+.byte 0xac,0xfa,0x58,0xab, 0x4b,0xe3,0x03,0x93
-+.byte 0x20,0x30,0xfa,0x55, 0xad,0x76,0x6d,0xf6
-+.byte 0x88,0xcc,0x76,0x91, 0xf5,0x02,0x4c,0x25
-+.byte 0x4f,0xe5,0xd7,0xfc, 0xc5,0x2a,0xcb,0xd7
-+.byte 0x26,0x35,0x44,0x80, 0xb5,0x62,0xa3,0x8f
-+.byte 0xde,0xb1,0x5a,0x49, 0x25,0xba,0x1b,0x67
-+.byte 0x45,0xea,0x0e,0x98, 0x5d,0xfe,0xc0,0xe1
-+.byte 0xc3,0x2f,0x75,0x02, 0x81,0x4c,0xf0,0x12
-+.byte 0x8d,0x46,0x97,0xa3, 0x6b,0xd3,0xf9,0xc6
-+.byte 0x03,0x8f,0x5f,0xe7, 0x15,0x92,0x9c,0x95
-+.byte 0xbf,0x6d,0x7a,0xeb, 0x95,0x52,0x59,0xda
-+.byte 0xd4,0xbe,0x83,0x2d, 0x58,0x74,0x21,0xd3
-+.byte 0x49,0xe0,0x69,0x29, 0x8e,0xc9,0xc8,0x44
-+.byte 0x75,0xc2,0x89,0x6a, 0xf4,0x8e,0x79,0x78
-+.byte 0x99,0x58,0x3e,0x6b, 0x27,0xb9,0x71,0xdd
-+.byte 0xbe,0xe1,0x4f,0xb6, 0xf0,0x88,0xad,0x17
-+.byte 0xc9,0x20,0xac,0x66, 0x7d,0xce,0x3a,0xb4
-+.byte 0x63,0xdf,0x4a,0x18, 0xe5,0x1a,0x31,0x82
-+.byte 0x97,0x51,0x33,0x60, 0x62,0x53,0x7f,0x45
-+.byte 0xb1,0x64,0x77,0xe0, 0xbb,0x6b,0xae,0x84
-+.byte 0xfe,0x81,0xa0,0x1c, 0xf9,0x08,0x2b,0x94
-+.byte 0x70,0x48,0x68,0x58, 0x8f,0x45,0xfd,0x19
-+.byte 0x94,0xde,0x6c,0x87, 0x52,0x7b,0xf8,0xb7
-+.byte 0xab,0x73,0xd3,0x23, 0x72,0x4b,0x02,0xe2
-+.byte 0xe3,0x1f,0x8f,0x57, 0x66,0x55,0xab,0x2a
-+.byte 0xb2,0xeb,0x28,0x07, 0x2f,0xb5,0xc2,0x03
-+.byte 0x86,0xc5,0x7b,0x9a, 0xd3,0x37,0x08,0xa5
-+.byte 0x30,0x28,0x87,0xf2, 0x23,0xbf,0xa5,0xb2
-+.byte 0x02,0x03,0x6a,0xba, 0xed,0x16,0x82,0x5c
-+.byte 0x8a,0xcf,0x1c,0x2b, 0xa7,0x79,0xb4,0x92
-+.byte 0xf3,0x07,0xf2,0xf0, 0x4e,0x69,0xe2,0xa1
-+.byte 0x65,0xda,0xf4,0xcd, 0x06,0x05,0xbe,0xd5
-+.byte 0xd1,0x34,0x62,0x1f, 0xc4,0xa6,0xfe,0x8a
-+.byte 0x34,0x2e,0x53,0x9d, 0xa2,0xf3,0x55,0xa0
-+.byte 0x05,0x8a,0xe1,0x32, 0xa4,0xf6,0xeb,0x75
-+.byte 0x0b,0x83,0xec,0x39, 0x40,0x60,0xef,0xaa
-+.byte 0x5e,0x71,0x9f,0x06, 0xbd,0x6e,0x10,0x51
-+.byte 0x3e,0x21,0x8a,0xf9, 0x96,0xdd,0x06,0x3d
-+.byte 0xdd,0x3e,0x05,0xae, 0x4d,0xe6,0xbd,0x46
-+.byte 0x91,0x54,0x8d,0xb5, 0x71,0xc4,0x5d,0x05
-+.byte 0x04,0x06,0xd4,0x6f, 0x60,0x50,0x15,0xff
-+.byte 0x19,0x98,0xfb,0x24, 0xd6,0xbd,0xe9,0x97
-+.byte 0x89,0x40,0x43,0xcc, 0x67,0xd9,0x9e,0x77
-+.byte 0xb0,0xe8,0x42,0xbd, 0x07,0x89,0x8b,0x88
-+.byte 0xe7,0x19,0x5b,0x38, 0x79,0xc8,0xee,0xdb
-+.byte 0xa1,0x7c,0x0a,0x47, 0x7c,0x42,0x0f,0xe9
-+.byte 0xf8,0x84,0x1e,0xc9, 0x00,0x00,0x00,0x00
-+.byte 0x09,0x80,0x86,0x83, 0x32,0x2b,0xed,0x48
-+.byte 0x1e,0x11,0x70,0xac, 0x6c,0x5a,0x72,0x4e
-+.byte 0xfd,0x0e,0xff,0xfb, 0x0f,0x85,0x38,0x56
-+.byte 0x3d,0xae,0xd5,0x1e, 0x36,0x2d,0x39,0x27
-+.byte 0x0a,0x0f,0xd9,0x64, 0x68,0x5c,0xa6,0x21
-+.byte 0x9b,0x5b,0x54,0xd1, 0x24,0x36,0x2e,0x3a
-+.byte 0x0c,0x0a,0x67,0xb1, 0x93,0x57,0xe7,0x0f
-+.byte 0xb4,0xee,0x96,0xd2, 0x1b,0x9b,0x91,0x9e
-+.byte 0x80,0xc0,0xc5,0x4f, 0x61,0xdc,0x20,0xa2
-+.byte 0x5a,0x77,0x4b,0x69, 0x1c,0x12,0x1a,0x16
-+.byte 0xe2,0x93,0xba,0x0a, 0xc0,0xa0,0x2a,0xe5
-+.byte 0x3c,0x22,0xe0,0x43, 0x12,0x1b,0x17,0x1d
-+.byte 0x0e,0x09,0x0d,0x0b, 0xf2,0x8b,0xc7,0xad
-+.byte 0x2d,0xb6,0xa8,0xb9, 0x14,0x1e,0xa9,0xc8
-+.byte 0x57,0xf1,0x19,0x85, 0xaf,0x75,0x07,0x4c
-+.byte 0xee,0x99,0xdd,0xbb, 0xa3,0x7f,0x60,0xfd
-+.byte 0xf7,0x01,0x26,0x9f, 0x5c,0x72,0xf5,0xbc
-+.byte 0x44,0x66,0x3b,0xc5, 0x5b,0xfb,0x7e,0x34
-+.byte 0x8b,0x43,0x29,0x76, 0xcb,0x23,0xc6,0xdc
-+.byte 0xb6,0xed,0xfc,0x68, 0xb8,0xe4,0xf1,0x63
-+.byte 0xd7,0x31,0xdc,0xca, 0x42,0x63,0x85,0x10
-+.byte 0x13,0x97,0x22,0x40, 0x84,0xc6,0x11,0x20
-+.byte 0x85,0x4a,0x24,0x7d, 0xd2,0xbb,0x3d,0xf8
-+.byte 0xae,0xf9,0x32,0x11, 0xc7,0x29,0xa1,0x6d
-+.byte 0x1d,0x9e,0x2f,0x4b, 0xdc,0xb2,0x30,0xf3
-+.byte 0x0d,0x86,0x52,0xec, 0x77,0xc1,0xe3,0xd0
-+.byte 0x2b,0xb3,0x16,0x6c, 0xa9,0x70,0xb9,0x99
-+.byte 0x11,0x94,0x48,0xfa, 0x47,0xe9,0x64,0x22
-+.byte 0xa8,0xfc,0x8c,0xc4, 0xa0,0xf0,0x3f,0x1a
-+.byte 0x56,0x7d,0x2c,0xd8, 0x22,0x33,0x90,0xef
-+.byte 0x87,0x49,0x4e,0xc7, 0xd9,0x38,0xd1,0xc1
-+.byte 0x8c,0xca,0xa2,0xfe, 0x98,0xd4,0x0b,0x36
-+.byte 0xa6,0xf5,0x81,0xcf, 0xa5,0x7a,0xde,0x28
-+.byte 0xda,0xb7,0x8e,0x26, 0x3f,0xad,0xbf,0xa4
-+.byte 0x2c,0x3a,0x9d,0xe4, 0x50,0x78,0x92,0x0d
-+.byte 0x6a,0x5f,0xcc,0x9b, 0x54,0x7e,0x46,0x62
-+.byte 0xf6,0x8d,0x13,0xc2, 0x90,0xd8,0xb8,0xe8
-+.byte 0x2e,0x39,0xf7,0x5e, 0x82,0xc3,0xaf,0xf5
-+.byte 0x9f,0x5d,0x80,0xbe, 0x69,0xd0,0x93,0x7c
-+.byte 0x6f,0xd5,0x2d,0xa9, 0xcf,0x25,0x12,0xb3
-+.byte 0xc8,0xac,0x99,0x3b, 0x10,0x18,0x7d,0xa7
-+.byte 0xe8,0x9c,0x63,0x6e, 0xdb,0x3b,0xbb,0x7b
-+.byte 0xcd,0x26,0x78,0x09, 0x6e,0x59,0x18,0xf4
-+.byte 0xec,0x9a,0xb7,0x01, 0x83,0x4f,0x9a,0xa8
-+.byte 0xe6,0x95,0x6e,0x65, 0xaa,0xff,0xe6,0x7e
-+.byte 0x21,0xbc,0xcf,0x08, 0xef,0x15,0xe8,0xe6
-+.byte 0xba,0xe7,0x9b,0xd9, 0x4a,0x6f,0x36,0xce
-+.byte 0xea,0x9f,0x09,0xd4, 0x29,0xb0,0x7c,0xd6
-+.byte 0x31,0xa4,0xb2,0xaf, 0x2a,0x3f,0x23,0x31
-+.byte 0xc6,0xa5,0x94,0x30, 0x35,0xa2,0x66,0xc0
-+.byte 0x74,0x4e,0xbc,0x37, 0xfc,0x82,0xca,0xa6
-+.byte 0xe0,0x90,0xd0,0xb0, 0x33,0xa7,0xd8,0x15
-+.byte 0xf1,0x04,0x98,0x4a, 0x41,0xec,0xda,0xf7
-+.byte 0x7f,0xcd,0x50,0x0e, 0x17,0x91,0xf6,0x2f
-+.byte 0x76,0x4d,0xd6,0x8d, 0x43,0xef,0xb0,0x4d
-+.byte 0xcc,0xaa,0x4d,0x54, 0xe4,0x96,0x04,0xdf
-+.byte 0x9e,0xd1,0xb5,0xe3, 0x4c,0x6a,0x88,0x1b
-+.byte 0xc1,0x2c,0x1f,0xb8, 0x46,0x65,0x51,0x7f
-+.byte 0x9d,0x5e,0xea,0x04, 0x01,0x8c,0x35,0x5d
-+.byte 0xfa,0x87,0x74,0x73, 0xfb,0x0b,0x41,0x2e
-+.byte 0xb3,0x67,0x1d,0x5a, 0x92,0xdb,0xd2,0x52
-+.byte 0xe9,0x10,0x56,0x33, 0x6d,0xd6,0x47,0x13
-+.byte 0x9a,0xd7,0x61,0x8c, 0x37,0xa1,0x0c,0x7a
-+.byte 0x59,0xf8,0x14,0x8e, 0xeb,0x13,0x3c,0x89
-+.byte 0xce,0xa9,0x27,0xee, 0xb7,0x61,0xc9,0x35
-+.byte 0xe1,0x1c,0xe5,0xed, 0x7a,0x47,0xb1,0x3c
-+.byte 0x9c,0xd2,0xdf,0x59, 0x55,0xf2,0x73,0x3f
-+.byte 0x18,0x14,0xce,0x79, 0x73,0xc7,0x37,0xbf
-+.byte 0x53,0xf7,0xcd,0xea, 0x5f,0xfd,0xaa,0x5b
-+.byte 0xdf,0x3d,0x6f,0x14, 0x78,0x44,0xdb,0x86
-+.byte 0xca,0xaf,0xf3,0x81, 0xb9,0x68,0xc4,0x3e
-+.byte 0x38,0x24,0x34,0x2c, 0xc2,0xa3,0x40,0x5f
-+.byte 0x16,0x1d,0xc3,0x72, 0xbc,0xe2,0x25,0x0c
-+.byte 0x28,0x3c,0x49,0x8b, 0xff,0x0d,0x95,0x41
-+.byte 0x39,0xa8,0x01,0x71, 0x08,0x0c,0xb3,0xde
-+.byte 0xd8,0xb4,0xe4,0x9c, 0x64,0x56,0xc1,0x90
-+.byte 0x7b,0xcb,0x84,0x61, 0xd5,0x32,0xb6,0x70
-+.byte 0x48,0x6c,0x5c,0x74, 0xd0,0xb8,0x57,0x42
-+
-+.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 # Td4
-+.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
-+.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
-+.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
-+.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
-+.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
-+.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
-+.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
-+.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
-+.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
-+.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
-+.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
-+.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
-+.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
-+.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
-+.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
-+.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
-+.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
-+.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
-+.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
-+.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
-+.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
-+.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
-+.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
-+.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
-+.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
-+.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
-+.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
-+.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
-+.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
-+.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
-+.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
-+___
-+
-+foreach (split("\n",$code)) {
-+ s/\`([^\`]*)\`/eval $1/ge;
-+
-+ # made-up _instructions, _xtr, _ins, _ror and _bias, cope
-+ # with byte order dependencies...
-+ if (/^\s+_/) {
-+ s/(_[a-z]+\s+)(\$[0-9]+),([^,]+)(#.*)*$/$1$2,$2,$3/;
-+
-+ s/_xtr\s+(\$[0-9]+),(\$[0-9]+),([0-9]+(\-2)*)/
-+ sprintf("srl\t$1,$2,%d",$big_endian ? eval($3)
-+ : eval("24-$3"))/e or
-+ s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
-+ sprintf("sll\t$1,$2,%d",$big_endian ? eval($3)
-+ : eval("24-$3"))/e or
-+ s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/
-+ sprintf("srl\t$1,$2,%d",$big_endian ? eval($3)
-+ : eval("$3*-1"))/e or
-+ s/_bias\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
-+ sprintf("sll\t$1,$2,%d",$big_endian ? eval($3)
-+ : eval("($3-16)&31"))/e;
-+
-+ s/srl\s+(\$[0-9]+),(\$[0-9]+),\-([0-9]+)/
-+ sprintf("sll\t$1,$2,$3")/e or
-+ s/srl\s+(\$[0-9]+),(\$[0-9]+),0/
-+ sprintf("and\t$1,$2,0xff")/e or
-+ s/(sll\s+\$[0-9]+,\$[0-9]+,0)/#$1/;
-+ }
-+
-+ # convert lwl/lwr and swr/swl to little-endian order
-+ if (!$big_endian && /^\s+[sl]w[lr]\s+/) {
-+ s/([sl]wl.*)([0-9]+)\((\$[0-9]+)\)/
-+ sprintf("$1%d($3)",eval("$2-$2%4+($2%4-1)&3"))/e or
-+ s/([sl]wr.*)([0-9]+)\((\$[0-9]+)\)/
-+ sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
-+ }
-+
-+ print $_,"\n";
-+}
-+
-+close STDOUT;
-diff --git a/crypto/bn/asm/mips-mont.pl b/crypto/bn/asm/mips-mont.pl
-new file mode 100644
-index 0000000..b944a12
---- /dev/null
-+++ b/crypto/bn/asm/mips-mont.pl
-@@ -0,0 +1,426 @@
-+#!/usr/bin/env perl
-+#
-+# ====================================================================
-+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-+# project. The module is, however, dual licensed under OpenSSL and
-+# CRYPTOGAMS licenses depending on where you obtain it. For further
-+# details see http://www.openssl.org/~appro/cryptogams/.
-+# ====================================================================
-+
-+# This module doesn't present direct interest for OpenSSL, because it
-+# doesn't provide better performance for longer keys, at least not on
-+# in-order-execution cores. While 512-bit RSA sign operations can be
-+# 65% faster in 64-bit mode, 1024-bit ones are only 15% faster, and
-+# 4096-bit ones are up to 15% slower. In 32-bit mode it varies from
-+# 16% improvement for 512-bit RSA sign to -33% for 4096-bit RSA
-+# verify:-( All comparisons are against bn_mul_mont-free assembler.
-+# The module might be of interest to embedded system developers, as
-+# the code is smaller than 1KB, yet offers >3x improvement on MIPS64
-+# and 75-30% [less for longer keys] on MIPS32 over compiler-generated
-+# code.
-+
-+######################################################################
-+# There is a number of MIPS ABI in use, O32 and N32/64 are most
-+# widely used. Then there is a new contender: NUBI. It appears that if
-+# one picks the latter, it's possible to arrange code in ABI neutral
-+# manner. Therefore let's stick to NUBI register layout:
-+#
-+($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
-+($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
-+($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
-+($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
-+#
-+# The return value is placed in $a0. Following coding rules facilitate
-+# interoperability:
-+#
-+# - never ever touch $tp, "thread pointer", former $gp;
-+# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
-+# old code];
-+# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
-+#
-+# For reference here is register layout for N32/64 MIPS ABIs:
-+#
-+# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
-+# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
-+# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
-+# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
-+# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
-+#
-+$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64
-+
-+if ($flavour =~ /64|n32/i) {
-+ $PTR_ADD="dadd"; # incidentally works even on n32
-+ $PTR_SUB="dsub"; # incidentally works even on n32
-+ $REG_S="sd";
-+ $REG_L="ld";
-+ $SZREG=8;
-+} else {
-+ $PTR_ADD="add";
-+ $PTR_SUB="sub";
-+ $REG_S="sw";
-+ $REG_L="lw";
-+ $SZREG=4;
-+}
-+$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0x00fff000 : 0x00ff0000;
-+#
-+# <appro@openssl.org>
-+#
-+######################################################################
-+
-+while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
-+open STDOUT,">$output";
-+
-+if ($flavour =~ /64|n32/i) {
-+ $LD="ld";
-+ $ST="sd";
-+ $MULTU="dmultu";
-+ $ADDU="daddu";
-+ $SUBU="dsubu";
-+ $BNSZ=8;
-+} else {
-+ $LD="lw";
-+ $ST="sw";
-+ $MULTU="multu";
-+ $ADDU="addu";
-+ $SUBU="subu";
-+ $BNSZ=4;
-+}
-+
-+# int bn_mul_mont(
-+$rp=$a0; # BN_ULONG *rp,
-+$ap=$a1; # const BN_ULONG *ap,
-+$bp=$a2; # const BN_ULONG *bp,
-+$np=$a3; # const BN_ULONG *np,
-+$n0=$a4; # const BN_ULONG *n0,
-+$num=$a5; # int num);
-+
-+$lo0=$a6;
-+$hi0=$a7;
-+$lo1=$t1;
-+$hi1=$t2;
-+$aj=$s0;
-+$bi=$s1;
-+$nj=$s2;
-+$tp=$s3;
-+$alo=$s4;
-+$ahi=$s5;
-+$nlo=$s6;
-+$nhi=$s7;
-+$tj=$s8;
-+$i=$s9;
-+$j=$s10;
-+$m1=$s11;
-+
-+$FRAMESIZE=14;
-+
-+$code=<<___;
-+.text
-+
-+.set noat
-+.set noreorder
-+
-+.align 5
-+.globl bn_mul_mont
-+.ent bn_mul_mont
-+bn_mul_mont:
-+___
-+$code.=<<___ if ($flavour =~ /o32/i);
-+ lw $n0,16($sp)
-+ lw $num,20($sp)
-+___
-+$code.=<<___;
-+ slt $at,$num,4
-+ bnez $at,1f
-+ li $t0,0
-+ slt $at,$num,17 # on in-order CPU
-+ bnezl $at,bn_mul_mont_internal
-+ nop
-+1: jr $ra
-+ li $a0,0
-+.end bn_mul_mont
-+
-+.align 5
-+.ent bn_mul_mont_internal
-+bn_mul_mont_internal:
-+ .frame $fp,$FRAMESIZE*$SZREG,$ra
-+ .mask 0x40000000|$SAVED_REGS_MASK,-$SZREG
-+ $PTR_SUB $sp,$FRAMESIZE*$SZREG
-+ $REG_S $fp,($FRAMESIZE-1)*$SZREG($sp)
-+ $REG_S $s11,($FRAMESIZE-2)*$SZREG($sp)
-+ $REG_S $s10,($FRAMESIZE-3)*$SZREG($sp)
-+ $REG_S $s9,($FRAMESIZE-4)*$SZREG($sp)
-+ $REG_S $s8,($FRAMESIZE-5)*$SZREG($sp)
-+ $REG_S $s7,($FRAMESIZE-6)*$SZREG($sp)
-+ $REG_S $s6,($FRAMESIZE-7)*$SZREG($sp)
-+ $REG_S $s5,($FRAMESIZE-8)*$SZREG($sp)
-+ $REG_S $s4,($FRAMESIZE-9)*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+ $REG_S $s3,($FRAMESIZE-10)*$SZREG($sp)
-+ $REG_S $s2,($FRAMESIZE-11)*$SZREG($sp)
-+ $REG_S $s1,($FRAMESIZE-12)*$SZREG($sp)
-+ $REG_S $s0,($FRAMESIZE-13)*$SZREG($sp)
-+___
-+$code.=<<___;
-+ move $fp,$sp
-+
-+ .set reorder
-+ $LD $n0,0($n0)
-+ $LD $bi,0($bp) # bp[0]
-+ $LD $aj,0($ap) # ap[0]
-+ $LD $nj,0($np) # np[0]
-+
-+ $PTR_SUB $sp,2*$BNSZ # place for two extra words
-+ sll $num,`log($BNSZ)/log(2)`
-+ li $at,-4096
-+ $PTR_SUB $sp,$num
-+ and $sp,$at
-+
-+ $MULTU $aj,$bi
-+ $LD $alo,$BNSZ($ap)
-+ $LD $nlo,$BNSZ($np)
-+ mflo $lo0
-+ mfhi $hi0
-+ $MULTU $lo0,$n0
-+ mflo $m1
-+
-+ $MULTU $alo,$bi
-+ mflo $alo
-+ mfhi $ahi
-+
-+ $MULTU $nj,$m1
-+ mflo $lo1
-+ mfhi $hi1
-+ $MULTU $nlo,$m1
-+ $ADDU $lo1,$lo0
-+ sltu $at,$lo1,$lo0
-+ $ADDU $hi1,$at
-+ mflo $nlo
-+ mfhi $nhi
-+
-+ move $tp,$sp
-+ li $j,2*$BNSZ
-+.align 4
-+.L1st:
-+ .set noreorder
-+ $PTR_ADD $aj,$ap,$j
-+ $PTR_ADD $nj,$np,$j
-+ $LD $aj,($aj)
-+ $LD $nj,($nj)
-+
-+ $MULTU $aj,$bi
-+ $ADDU $lo0,$alo,$hi0
-+ $ADDU $lo1,$nlo,$hi1
-+ sltu $at,$lo0,$hi0
-+ sltu $t0,$lo1,$hi1
-+ $ADDU $hi0,$ahi,$at
-+ $ADDU $hi1,$nhi,$t0
-+ mflo $alo
-+ mfhi $ahi
-+
-+ $ADDU $lo1,$lo0
-+ sltu $at,$lo1,$lo0
-+ $MULTU $nj,$m1
-+ $ADDU $hi1,$at
-+ addu $j,$BNSZ
-+ $ST $lo1,($tp)
-+ sltu $t0,$j,$num
-+ mflo $nlo
-+ mfhi $nhi
-+
-+ bnez $t0,.L1st
-+ $PTR_ADD $tp,$BNSZ
-+ .set reorder
-+
-+ $ADDU $lo0,$alo,$hi0
-+ sltu $at,$lo0,$hi0
-+ $ADDU $hi0,$ahi,$at
-+
-+ $ADDU $lo1,$nlo,$hi1
-+ sltu $t0,$lo1,$hi1
-+ $ADDU $hi1,$nhi,$t0
-+ $ADDU $lo1,$lo0
-+ sltu $at,$lo1,$lo0
-+ $ADDU $hi1,$at
-+
-+ $ST $lo1,($tp)
-+
-+ $ADDU $hi1,$hi0
-+ sltu $at,$hi1,$hi0
-+ $ST $hi1,$BNSZ($tp)
-+ $ST $at,2*$BNSZ($tp)
-+
-+ li $i,$BNSZ
-+.align 4
-+.Louter:
-+ $PTR_ADD $bi,$bp,$i
-+ $LD $bi,($bi)
-+ $LD $aj,($ap)
-+ $LD $alo,$BNSZ($ap)
-+ $LD $tj,($sp)
-+
-+ $MULTU $aj,$bi
-+ $LD $nj,($np)
-+ $LD $nlo,$BNSZ($np)
-+ mflo $lo0
-+ mfhi $hi0
-+ $ADDU $lo0,$tj
-+ $MULTU $lo0,$n0
-+ sltu $at,$lo0,$tj
-+ $ADDU $hi0,$at
-+ mflo $m1
-+
-+ $MULTU $alo,$bi
-+ mflo $alo
-+ mfhi $ahi
-+
-+ $MULTU $nj,$m1
-+ mflo $lo1
-+ mfhi $hi1
-+
-+ $MULTU $nlo,$m1
-+ $ADDU $lo1,$lo0
-+ sltu $at,$lo1,$lo0
-+ $ADDU $hi1,$at
-+ mflo $nlo
-+ mfhi $nhi
-+
-+ move $tp,$sp
-+ li $j,2*$BNSZ
-+ $LD $tj,$BNSZ($tp)
-+.align 4
-+.Linner:
-+ .set noreorder
-+ $PTR_ADD $aj,$ap,$j
-+ $PTR_ADD $nj,$np,$j
-+ $LD $aj,($aj)
-+ $LD $nj,($nj)
-+
-+ $MULTU $aj,$bi
-+ $ADDU $lo0,$alo,$hi0
-+ $ADDU $lo1,$nlo,$hi1
-+ sltu $at,$lo0,$hi0
-+ sltu $t0,$lo1,$hi1
-+ $ADDU $hi0,$ahi,$at
-+ $ADDU $hi1,$nhi,$t0
-+ mflo $alo
-+ mfhi $ahi
-+
-+ $ADDU $lo0,$tj
-+ addu $j,$BNSZ
-+ $MULTU $nj,$m1
-+ sltu $at,$lo0,$tj
-+ $ADDU $lo1,$lo0
-+ $ADDU $hi0,$at
-+ sltu $t0,$lo1,$lo0
-+ $LD $tj,2*$BNSZ($tp)
-+ $ADDU $hi1,$t0
-+ sltu $at,$j,$num
-+ mflo $nlo
-+ mfhi $nhi
-+ $ST $lo1,($tp)
-+ bnez $at,.Linner
-+ $PTR_ADD $tp,$BNSZ
-+ .set reorder
-+
-+ $ADDU $lo0,$alo,$hi0
-+ sltu $at,$lo0,$hi0
-+ $ADDU $hi0,$ahi,$at
-+ $ADDU $lo0,$tj
-+ sltu $t0,$lo0,$tj
-+ $ADDU $hi0,$t0
-+
-+ $LD $tj,2*$BNSZ($tp)
-+ $ADDU $lo1,$nlo,$hi1
-+ sltu $at,$lo1,$hi1
-+ $ADDU $hi1,$nhi,$at
-+ $ADDU $lo1,$lo0
-+ sltu $t0,$lo1,$lo0
-+ $ADDU $hi1,$t0
-+ $ST $lo1,($tp)
-+
-+ $ADDU $lo1,$hi1,$hi0
-+ sltu $hi1,$lo1,$hi0
-+ $ADDU $lo1,$tj
-+ sltu $at,$lo1,$tj
-+ $ADDU $hi1,$at
-+ $ST $lo1,$BNSZ($tp)
-+ $ST $hi1,2*$BNSZ($tp)
-+
-+ addu $i,$BNSZ
-+ sltu $t0,$i,$num
-+ bnez $t0,.Louter
-+
-+ .set noreorder
-+ $PTR_ADD $tj,$sp,$num # &tp[num]
-+ move $tp,$sp
-+ move $ap,$sp
-+ li $hi0,0 # clear borrow bit
-+
-+.align 4
-+.Lsub: $LD $lo0,($tp)
-+ $LD $lo1,($np)
-+ $PTR_ADD $tp,$BNSZ
-+ $PTR_ADD $np,$BNSZ
-+ $SUBU $lo1,$lo0,$lo1 # tp[i]-np[i]
-+ sgtu $at,$lo1,$lo0
-+ $SUBU $lo0,$lo1,$hi0
-+ sgtu $hi0,$lo0,$lo1
-+ $ST $lo0,($rp)
-+ or $hi0,$at
-+ sltu $at,$tp,$tj
-+ bnez $at,.Lsub
-+ $PTR_ADD $rp,$BNSZ
-+
-+ $SUBU $hi0,$hi1,$hi0 # handle upmost overflow bit
-+ move $tp,$sp
-+ $PTR_SUB $rp,$num # restore rp
-+ not $hi1,$hi0
-+
-+ and $ap,$hi0,$sp
-+ and $bp,$hi1,$rp
-+ or $ap,$ap,$bp # ap=borrow?tp:rp
-+
-+.align 4
-+.Lcopy: $LD $aj,($ap)
-+ $PTR_ADD $ap,$BNSZ
-+ $ST $zero,($tp)
-+ $PTR_ADD $tp,$BNSZ
-+ sltu $at,$tp,$tj
-+ $ST $aj,($rp)
-+ bnez $at,.Lcopy
-+ $PTR_ADD $rp,$BNSZ
-+
-+ li $a0,1
-+ li $t0,1
-+
-+ .set noreorder
-+ move $sp,$fp
-+ $REG_L $fp,($FRAMESIZE-1)*$SZREG($sp)
-+ $REG_L $s11,($FRAMESIZE-2)*$SZREG($sp)
-+ $REG_L $s10,($FRAMESIZE-3)*$SZREG($sp)
-+ $REG_L $s9,($FRAMESIZE-4)*$SZREG($sp)
-+ $REG_L $s8,($FRAMESIZE-5)*$SZREG($sp)
-+ $REG_L $s7,($FRAMESIZE-6)*$SZREG($sp)
-+ $REG_L $s6,($FRAMESIZE-7)*$SZREG($sp)
-+ $REG_L $s5,($FRAMESIZE-8)*$SZREG($sp)
-+ $REG_L $s4,($FRAMESIZE-9)*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+ $REG_L $s3,($FRAMESIZE-10)*$SZREG($sp)
-+ $REG_L $s2,($FRAMESIZE-11)*$SZREG($sp)
-+ $REG_L $s1,($FRAMESIZE-12)*$SZREG($sp)
-+ $REG_L $s0,($FRAMESIZE-13)*$SZREG($sp)
-+___
-+$code.=<<___;
-+ jr $ra
-+ $PTR_ADD $sp,$FRAMESIZE*$SZREG
-+.end bn_mul_mont_internal
-+.rdata
-+.asciiz "Montgomery Multiplication for MIPS, CRYPTOGAMS by <appro\@openssl.org>"
-+___
-+
-+$code =~ s/\`([^\`]*)\`/eval $1/gem;
-+
-+print $code;
-+close STDOUT;
-diff --git a/crypto/bn/asm/mips.pl b/crypto/bn/asm/mips.pl
-new file mode 100644
-index 0000000..f04b3b9
---- /dev/null
-+++ b/crypto/bn/asm/mips.pl
-@@ -0,0 +1,2585 @@
-+#!/usr/bin/env perl
-+#
-+# ====================================================================
-+# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-+# project.
-+#
-+# Rights for redistribution and usage in source and binary forms are
-+# granted according to the OpenSSL license. Warranty of any kind is
-+# disclaimed.
-+# ====================================================================
-+
-+
-+# July 1999
-+#
-+# This is drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c.
-+#
-+# The module is designed to work with either of the "new" MIPS ABI(5),
-+# namely N32 or N64, offered by IRIX 6.x. It's not ment to work under
-+# IRIX 5.x not only because it doesn't support new ABIs but also
-+# because 5.x kernels put R4x00 CPU into 32-bit mode and all those
-+# 64-bit instructions (daddu, dmultu, etc.) found below gonna only
-+# cause illegal instruction exception:-(
-+#
-+# In addition the code depends on preprocessor flags set up by MIPSpro
-+# compiler driver (either as or cc) and therefore (probably?) can't be
-+# compiled by the GNU assembler. GNU C driver manages fine though...
-+# I mean as long as -mmips-as is specified or is the default option,
-+# because then it simply invokes /usr/bin/as which in turn takes
-+# perfect care of the preprocessor definitions. Another neat feature
-+# offered by the MIPSpro assembler is an optimization pass. This gave
-+# me the opportunity to have the code looking more regular as all those
-+# architecture dependent instruction rescheduling details were left to
-+# the assembler. Cool, huh?
-+#
-+# Performance improvement is astonishing! 'apps/openssl speed rsa dsa'
-+# goes way over 3 times faster!
-+#
-+# <appro@fy.chalmers.se>
-+
-+# October 2010
-+#
-+# Adapt the module even for 32-bit ABIs and other OSes. The former was
-+# achieved by mechanical replacement of 64-bit arithmetic instructions
-+# such as dmultu, daddu, etc. with their 32-bit counterparts and
-+# adjusting offsets denoting multiples of BN_ULONG. Above mentioned
-+# >3x performance improvement naturally does not apply to 32-bit code
-+# [because there is no instruction 32-bit compiler can't use], one
-+# has to content with 40-85% improvement depending on benchmark and
-+# key length, more for longer keys.
-+
-+$flavour = shift;
-+while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
-+open STDOUT,">$output";
-+
-+if ($flavour =~ /64|n32/i) {
-+ $LD="ld";
-+ $ST="sd";
-+ $MULTU="dmultu";
-+ $DIVU="ddivu";
-+ $ADDU="daddu";
-+ $SUBU="dsubu";
-+ $SRL="dsrl";
-+ $SLL="dsll";
-+ $BNSZ=8;
-+ $PTR_ADD="daddu";
-+ $PTR_SUB="dsubu";
-+ $SZREG=8;
-+ $REG_S="sd";
-+ $REG_L="ld";
-+} else {
-+ $LD="lw";
-+ $ST="sw";
-+ $MULTU="multu";
-+ $DIVU="divu";
-+ $ADDU="addu";
-+ $SUBU="subu";
-+ $SRL="srl";
-+ $SLL="sll";
-+ $BNSZ=4;
-+ $PTR_ADD="addu";
-+ $PTR_SUB="subu";
-+ $SZREG=4;
-+ $REG_S="sw";
-+ $REG_L="lw";
-+ $code=".set mips2\n";
-+}
-+
-+# Below is N32/64 register layout used in the original module.
-+#
-+($zero,$at,$v0,$v1)=map("\$$_",(0..3));
-+($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
-+($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
-+($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
-+($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
-+($ta0,$ta1,$ta2,$ta3)=($a4,$a5,$a6,$a7);
-+#
-+# No special adaptation is required for O32. NUBI on the other hand
-+# is treated by saving/restoring ($v1,$t0..$t3).
-+
-+$gp=$v1 if ($flavour =~ /nubi/i);
-+
-+$minus4=$v1;
-+
-+$code.=<<___;
-+.rdata
-+.asciiz "mips3.s, Version 1.2"
-+.asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro\@fy.chalmers.se>"
-+
-+.text
-+.set noat
-+
-+.align 5
-+.globl bn_mul_add_words
-+.ent bn_mul_add_words
-+bn_mul_add_words:
-+ .set noreorder
-+ bgtz $a2,bn_mul_add_words_internal
-+ move $v0,$zero
-+ jr $ra
-+ move $a0,$v0
-+.end bn_mul_add_words
-+
-+.align 5
-+.ent bn_mul_add_words_internal
-+bn_mul_add_words_internal:
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+ .frame $sp,6*$SZREG,$ra
-+ .mask 0x8000f008,-$SZREG
-+ .set noreorder
-+ $PTR_SUB $sp,6*$SZREG
-+ $REG_S $ra,5*$SZREG($sp)
-+ $REG_S $t3,4*$SZREG($sp)
-+ $REG_S $t2,3*$SZREG($sp)
-+ $REG_S $t1,2*$SZREG($sp)
-+ $REG_S $t0,1*$SZREG($sp)
-+ $REG_S $gp,0*$SZREG($sp)
-+___
-+$code.=<<___;
-+ .set reorder
-+ li $minus4,-4
-+ and $ta0,$a2,$minus4
-+ $LD $t0,0($a1)
-+ beqz $ta0,.L_bn_mul_add_words_tail
-+
-+.L_bn_mul_add_words_loop:
-+ $MULTU $t0,$a3
-+ $LD $t1,0($a0)
-+ $LD $t2,$BNSZ($a1)
-+ $LD $t3,$BNSZ($a0)
-+ $LD $ta0,2*$BNSZ($a1)
-+ $LD $ta1,2*$BNSZ($a0)
-+ $ADDU $t1,$v0
-+ sltu $v0,$t1,$v0 # All manuals say it "compares 32-bit
-+ # values", but it seems to work fine
-+ # even on 64-bit registers.
-+ mflo $at
-+ mfhi $t0
-+ $ADDU $t1,$at
-+ $ADDU $v0,$t0
-+ $MULTU $t2,$a3
-+ sltu $at,$t1,$at
-+ $ST $t1,0($a0)
-+ $ADDU $v0,$at
-+
-+ $LD $ta2,3*$BNSZ($a1)
-+ $LD $ta3,3*$BNSZ($a0)
-+ $ADDU $t3,$v0
-+ sltu $v0,$t3,$v0
-+ mflo $at
-+ mfhi $t2
-+ $ADDU $t3,$at
-+ $ADDU $v0,$t2
-+ $MULTU $ta0,$a3
-+ sltu $at,$t3,$at
-+ $ST $t3,$BNSZ($a0)
-+ $ADDU $v0,$at
-+
-+ subu $a2,4
-+ $PTR_ADD $a0,4*$BNSZ
-+ $PTR_ADD $a1,4*$BNSZ
-+ $ADDU $ta1,$v0
-+ sltu $v0,$ta1,$v0
-+ mflo $at
-+ mfhi $ta0
-+ $ADDU $ta1,$at
-+ $ADDU $v0,$ta0
-+ $MULTU $ta2,$a3
-+ sltu $at,$ta1,$at
-+ $ST $ta1,-2*$BNSZ($a0)
-+ $ADDU $v0,$at
-+
-+
-+ and $ta0,$a2,$minus4
-+ $ADDU $ta3,$v0
-+ sltu $v0,$ta3,$v0
-+ mflo $at
-+ mfhi $ta2
-+ $ADDU $ta3,$at
-+ $ADDU $v0,$ta2
-+ sltu $at,$ta3,$at
-+ $ST $ta3,-$BNSZ($a0)
-+ $ADDU $v0,$at
-+ .set noreorder
-+ bgtzl $ta0,.L_bn_mul_add_words_loop
-+ $LD $t0,0($a1)
-+
-+ beqz $a2,.L_bn_mul_add_words_return
-+ nop
-+
-+.L_bn_mul_add_words_tail:
-+ .set reorder
-+ $LD $t0,0($a1)
-+ $MULTU $t0,$a3
-+ $LD $t1,0($a0)
-+ subu $a2,1
-+ $ADDU $t1,$v0
-+ sltu $v0,$t1,$v0
-+ mflo $at
-+ mfhi $t0
-+ $ADDU $t1,$at
-+ $ADDU $v0,$t0
-+ sltu $at,$t1,$at
-+ $ST $t1,0($a0)
-+ $ADDU $v0,$at
-+ beqz $a2,.L_bn_mul_add_words_return
-+
-+ $LD $t0,$BNSZ($a1)
-+ $MULTU $t0,$a3
-+ $LD $t1,$BNSZ($a0)
-+ subu $a2,1
-+ $ADDU $t1,$v0
-+ sltu $v0,$t1,$v0
-+ mflo $at
-+ mfhi $t0
-+ $ADDU $t1,$at
-+ $ADDU $v0,$t0
-+ sltu $at,$t1,$at
-+ $ST $t1,$BNSZ($a0)
-+ $ADDU $v0,$at
-+ beqz $a2,.L_bn_mul_add_words_return
-+
-+ $LD $t0,2*$BNSZ($a1)
-+ $MULTU $t0,$a3
-+ $LD $t1,2*$BNSZ($a0)
-+ $ADDU $t1,$v0
-+ sltu $v0,$t1,$v0
-+ mflo $at
-+ mfhi $t0
-+ $ADDU $t1,$at
-+ $ADDU $v0,$t0
-+ sltu $at,$t1,$at
-+ $ST $t1,2*$BNSZ($a0)
-+ $ADDU $v0,$at
-+
-+.L_bn_mul_add_words_return:
-+ .set noreorder
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+ $REG_L $t3,4*$SZREG($sp)
-+ $REG_L $t2,3*$SZREG($sp)
-+ $REG_L $t1,2*$SZREG($sp)
-+ $REG_L $t0,1*$SZREG($sp)
-+ $REG_L $gp,0*$SZREG($sp)
-+ $PTR_ADD $sp,6*$SZREG
-+___
-+$code.=<<___;
-+ jr $ra
-+ move $a0,$v0
-+.end bn_mul_add_words_internal
-+
-+.align 5
-+.globl bn_mul_words
-+.ent bn_mul_words
-+bn_mul_words:
-+ .set noreorder
-+ bgtz $a2,bn_mul_words_internal
-+ move $v0,$zero
-+ jr $ra
-+ move $a0,$v0
-+.end bn_mul_words
-+
-+.align 5
-+.ent bn_mul_words_internal
-+bn_mul_words_internal:
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+ .frame $sp,6*$SZREG,$ra
-+ .mask 0x8000f008,-$SZREG
-+ .set noreorder
-+ $PTR_SUB $sp,6*$SZREG
-+ $REG_S $ra,5*$SZREG($sp)
-+ $REG_S $t3,4*$SZREG($sp)
-+ $REG_S $t2,3*$SZREG($sp)
-+ $REG_S $t1,2*$SZREG($sp)
-+ $REG_S $t0,1*$SZREG($sp)
-+ $REG_S $gp,0*$SZREG($sp)
-+___
-+$code.=<<___;
-+ .set reorder
-+ li $minus4,-4
-+ and $ta0,$a2,$minus4
-+ $LD $t0,0($a1)
-+ beqz $ta0,.L_bn_mul_words_tail
-+
-+.L_bn_mul_words_loop:
-+ $MULTU $t0,$a3
-+ $LD $t2,$BNSZ($a1)
-+ $LD $ta0,2*$BNSZ($a1)
-+ $LD $ta2,3*$BNSZ($a1)
-+ mflo $at
-+ mfhi $t0
-+ $ADDU $v0,$at
-+ sltu $t1,$v0,$at
-+ $MULTU $t2,$a3
-+ $ST $v0,0($a0)
-+ $ADDU $v0,$t1,$t0
-+
-+ subu $a2,4
-+ $PTR_ADD $a0,4*$BNSZ
-+ $PTR_ADD $a1,4*$BNSZ
-+ mflo $at
-+ mfhi $t2
-+ $ADDU $v0,$at
-+ sltu $t3,$v0,$at
-+ $MULTU $ta0,$a3
-+ $ST $v0,-3*$BNSZ($a0)
-+ $ADDU $v0,$t3,$t2
-+
-+ mflo $at
-+ mfhi $ta0
-+ $ADDU $v0,$at
-+ sltu $ta1,$v0,$at
-+ $MULTU $ta2,$a3
-+ $ST $v0,-2*$BNSZ($a0)
-+ $ADDU $v0,$ta1,$ta0
-+
-+ and $ta0,$a2,$minus4
-+ mflo $at
-+ mfhi $ta2
-+ $ADDU $v0,$at
-+ sltu $ta3,$v0,$at
-+ $ST $v0,-$BNSZ($a0)
-+ $ADDU $v0,$ta3,$ta2
-+ .set noreorder
-+ bgtzl $ta0,.L_bn_mul_words_loop
-+ $LD $t0,0($a1)
-+
-+ beqz $a2,.L_bn_mul_words_return
-+ nop
-+
-+.L_bn_mul_words_tail:
-+ .set reorder
-+ $LD $t0,0($a1)
-+ $MULTU $t0,$a3
-+ subu $a2,1
-+ mflo $at
-+ mfhi $t0
-+ $ADDU $v0,$at
-+ sltu $t1,$v0,$at
-+ $ST $v0,0($a0)
-+ $ADDU $v0,$t1,$t0
-+ beqz $a2,.L_bn_mul_words_return
-+
-+ $LD $t0,$BNSZ($a1)
-+ $MULTU $t0,$a3
-+ subu $a2,1
-+ mflo $at
-+ mfhi $t0
-+ $ADDU $v0,$at
-+ sltu $t1,$v0,$at
-+ $ST $v0,$BNSZ($a0)
-+ $ADDU $v0,$t1,$t0
-+ beqz $a2,.L_bn_mul_words_return
-+
-+ $LD $t0,2*$BNSZ($a1)
-+ $MULTU $t0,$a3
-+ mflo $at
-+ mfhi $t0
-+ $ADDU $v0,$at
-+ sltu $t1,$v0,$at
-+ $ST $v0,2*$BNSZ($a0)
-+ $ADDU $v0,$t1,$t0
-+
-+.L_bn_mul_words_return:
-+ .set noreorder
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+ $REG_L $t3,4*$SZREG($sp)
-+ $REG_L $t2,3*$SZREG($sp)
-+ $REG_L $t1,2*$SZREG($sp)
-+ $REG_L $t0,1*$SZREG($sp)
-+ $REG_L $gp,0*$SZREG($sp)
-+ $PTR_ADD $sp,6*$SZREG
-+___
-+$code.=<<___;
-+ jr $ra
-+ move $a0,$v0
-+.end bn_mul_words_internal
-+
-+.align 5
-+.globl bn_sqr_words
-+.ent bn_sqr_words
-+bn_sqr_words:
-+ .set noreorder
-+ bgtz $a2,bn_sqr_words_internal
-+ move $v0,$zero
-+ jr $ra
-+ move $a0,$v0
-+.end bn_sqr_words
-+
-+.align 5
-+.ent bn_sqr_words_internal
-+bn_sqr_words_internal:
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+ .frame $sp,6*$SZREG,$ra
-+ .mask 0x8000f008,-$SZREG
-+ .set noreorder
-+ $PTR_SUB $sp,6*$SZREG
-+ $REG_S $ra,5*$SZREG($sp)
-+ $REG_S $t3,4*$SZREG($sp)
-+ $REG_S $t2,3*$SZREG($sp)
-+ $REG_S $t1,2*$SZREG($sp)
-+ $REG_S $t0,1*$SZREG($sp)
-+ $REG_S $gp,0*$SZREG($sp)
-+___
-+$code.=<<___;
-+ .set reorder
-+ li $minus4,-4
-+ and $ta0,$a2,$minus4
-+ $LD $t0,0($a1)
-+ beqz $ta0,.L_bn_sqr_words_tail
-+
-+.L_bn_sqr_words_loop:
-+ $MULTU $t0,$t0
-+ $LD $t2,$BNSZ($a1)
-+ $LD $ta0,2*$BNSZ($a1)
-+ $LD $ta2,3*$BNSZ($a1)
-+ mflo $t1
-+ mfhi $t0
-+ $ST $t1,0($a0)
-+ $ST $t0,$BNSZ($a0)
-+
-+ $MULTU $t2,$t2
-+ subu $a2,4
-+ $PTR_ADD $a0,8*$BNSZ
-+ $PTR_ADD $a1,4*$BNSZ
-+ mflo $t3
-+ mfhi $t2
-+ $ST $t3,-6*$BNSZ($a0)
-+ $ST $t2,-5*$BNSZ($a0)
-+
-+ $MULTU $ta0,$ta0
-+ mflo $ta1
-+ mfhi $ta0
-+ $ST $ta1,-4*$BNSZ($a0)
-+ $ST $ta0,-3*$BNSZ($a0)
-+
-+
-+ $MULTU $ta2,$ta2
-+ and $ta0,$a2,$minus4
-+ mflo $ta3
-+ mfhi $ta2
-+ $ST $ta3,-2*$BNSZ($a0)
-+ $ST $ta2,-$BNSZ($a0)
-+
-+ .set noreorder
-+ bgtzl $ta0,.L_bn_sqr_words_loop
-+ $LD $t0,0($a1)
-+
-+ beqz $a2,.L_bn_sqr_words_return
-+ nop
-+
-+.L_bn_sqr_words_tail:
-+ .set reorder
-+ $LD $t0,0($a1)
-+ $MULTU $t0,$t0
-+ subu $a2,1
-+ mflo $t1
-+ mfhi $t0
-+ $ST $t1,0($a0)
-+ $ST $t0,$BNSZ($a0)
-+ beqz $a2,.L_bn_sqr_words_return
-+
-+ $LD $t0,$BNSZ($a1)
-+ $MULTU $t0,$t0
-+ subu $a2,1
-+ mflo $t1
-+ mfhi $t0
-+ $ST $t1,2*$BNSZ($a0)
-+ $ST $t0,3*$BNSZ($a0)
-+ beqz $a2,.L_bn_sqr_words_return
-+
-+ $LD $t0,2*$BNSZ($a1)
-+ $MULTU $t0,$t0
-+ mflo $t1
-+ mfhi $t0
-+ $ST $t1,4*$BNSZ($a0)
-+ $ST $t0,5*$BNSZ($a0)
-+
-+.L_bn_sqr_words_return:
-+ .set noreorder
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+ $REG_L $t3,4*$SZREG($sp)
-+ $REG_L $t2,3*$SZREG($sp)
-+ $REG_L $t1,2*$SZREG($sp)
-+ $REG_L $t0,1*$SZREG($sp)
-+ $REG_L $gp,0*$SZREG($sp)
-+ $PTR_ADD $sp,6*$SZREG
-+___
-+$code.=<<___;
-+ jr $ra
-+ move $a0,$v0
-+
-+.end bn_sqr_words_internal
-+
-+.align 5
-+.globl bn_add_words
-+.ent bn_add_words
-+bn_add_words:
-+ .set noreorder
-+ bgtz $a3,bn_add_words_internal
-+ move $v0,$zero
-+ jr $ra
-+ move $a0,$v0
-+.end bn_add_words
-+
-+.align 5
-+.ent bn_add_words_internal
-+bn_add_words_internal:
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+ .frame $sp,6*$SZREG,$ra
-+ .mask 0x8000f008,-$SZREG
-+ .set noreorder
-+ $PTR_SUB $sp,6*$SZREG
-+ $REG_S $ra,5*$SZREG($sp)
-+ $REG_S $t3,4*$SZREG($sp)
-+ $REG_S $t2,3*$SZREG($sp)
-+ $REG_S $t1,2*$SZREG($sp)
-+ $REG_S $t0,1*$SZREG($sp)
-+ $REG_S $gp,0*$SZREG($sp)
-+___
-+$code.=<<___;
-+ .set reorder
-+ li $minus4,-4
-+ and $at,$a3,$minus4
-+ $LD $t0,0($a1)
-+ beqz $at,.L_bn_add_words_tail
-+
-+.L_bn_add_words_loop:
-+ $LD $ta0,0($a2)
-+ subu $a3,4
-+ $LD $t1,$BNSZ($a1)
-+ and $at,$a3,$minus4
-+ $LD $t2,2*$BNSZ($a1)
-+ $PTR_ADD $a2,4*$BNSZ
-+ $LD $t3,3*$BNSZ($a1)
-+ $PTR_ADD $a0,4*$BNSZ
-+ $LD $ta1,-3*$BNSZ($a2)
-+ $PTR_ADD $a1,4*$BNSZ
-+ $LD $ta2,-2*$BNSZ($a2)
-+ $LD $ta3,-$BNSZ($a2)
-+ $ADDU $ta0,$t0
-+ sltu $t8,$ta0,$t0
-+ $ADDU $t0,$ta0,$v0
-+ sltu $v0,$t0,$ta0
-+ $ST $t0,-4*$BNSZ($a0)
-+ $ADDU $v0,$t8
-+
-+ $ADDU $ta1,$t1
-+ sltu $t9,$ta1,$t1
-+ $ADDU $t1,$ta1,$v0
-+ sltu $v0,$t1,$ta1
-+ $ST $t1,-3*$BNSZ($a0)
-+ $ADDU $v0,$t9
-+
-+ $ADDU $ta2,$t2
-+ sltu $t8,$ta2,$t2
-+ $ADDU $t2,$ta2,$v0
-+ sltu $v0,$t2,$ta2
-+ $ST $t2,-2*$BNSZ($a0)
-+ $ADDU $v0,$t8
-+
-+ $ADDU $ta3,$t3
-+ sltu $t9,$ta3,$t3
-+ $ADDU $t3,$ta3,$v0
-+ sltu $v0,$t3,$ta3
-+ $ST $t3,-$BNSZ($a0)
-+ $ADDU $v0,$t9
-+
-+ .set noreorder
-+ bgtzl $at,.L_bn_add_words_loop
-+ $LD $t0,0($a1)
-+
-+ beqz $a3,.L_bn_add_words_return
-+ nop
-+
-+.L_bn_add_words_tail:
-+ .set reorder
-+ $LD $t0,0($a1)
-+ $LD $ta0,0($a2)
-+ $ADDU $ta0,$t0
-+ subu $a3,1
-+ sltu $t8,$ta0,$t0
-+ $ADDU $t0,$ta0,$v0
-+ sltu $v0,$t0,$ta0
-+ $ST $t0,0($a0)
-+ $ADDU $v0,$t8
-+ beqz $a3,.L_bn_add_words_return
-+
-+ $LD $t1,$BNSZ($a1)
-+ $LD $ta1,$BNSZ($a2)
-+ $ADDU $ta1,$t1
-+ subu $a3,1
-+ sltu $t9,$ta1,$t1
-+ $ADDU $t1,$ta1,$v0
-+ sltu $v0,$t1,$ta1
-+ $ST $t1,$BNSZ($a0)
-+ $ADDU $v0,$t9
-+ beqz $a3,.L_bn_add_words_return
-+
-+ $LD $t2,2*$BNSZ($a1)
-+ $LD $ta2,2*$BNSZ($a2)
-+ $ADDU $ta2,$t2
-+ sltu $t8,$ta2,$t2
-+ $ADDU $t2,$ta2,$v0
-+ sltu $v0,$t2,$ta2
-+ $ST $t2,2*$BNSZ($a0)
-+ $ADDU $v0,$t8
-+
-+.L_bn_add_words_return:
-+ .set noreorder
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+ $REG_L $t3,4*$SZREG($sp)
-+ $REG_L $t2,3*$SZREG($sp)
-+ $REG_L $t1,2*$SZREG($sp)
-+ $REG_L $t0,1*$SZREG($sp)
-+ $REG_L $gp,0*$SZREG($sp)
-+ $PTR_ADD $sp,6*$SZREG
-+___
-+$code.=<<___;
-+ jr $ra
-+ move $a0,$v0
-+
-+.end bn_add_words_internal
-+
-+.align 5
-+.globl bn_sub_words
-+.ent bn_sub_words
-+bn_sub_words:
-+ .set noreorder
-+ bgtz $a3,bn_sub_words_internal
-+ move $v0,$zero
-+ jr $ra
-+ move $a0,$zero
-+.end bn_sub_words
-+
-+.align 5
-+.ent bn_sub_words_internal
-+bn_sub_words_internal:
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+ .frame $sp,6*$SZREG,$ra
-+ .mask 0x8000f008,-$SZREG
-+ .set noreorder
-+ $PTR_SUB $sp,6*$SZREG
-+ $REG_S $ra,5*$SZREG($sp)
-+ $REG_S $t3,4*$SZREG($sp)
-+ $REG_S $t2,3*$SZREG($sp)
-+ $REG_S $t1,2*$SZREG($sp)
-+ $REG_S $t0,1*$SZREG($sp)
-+ $REG_S $gp,0*$SZREG($sp)
-+___
-+$code.=<<___;
-+ .set reorder
-+ li $minus4,-4
-+ and $at,$a3,$minus4
-+ $LD $t0,0($a1)
-+ beqz $at,.L_bn_sub_words_tail
-+
-+.L_bn_sub_words_loop:
-+ $LD $ta0,0($a2)
-+ subu $a3,4
-+ $LD $t1,$BNSZ($a1)
-+ and $at,$a3,$minus4
-+ $LD $t2,2*$BNSZ($a1)
-+ $PTR_ADD $a2,4*$BNSZ
-+ $LD $t3,3*$BNSZ($a1)
-+ $PTR_ADD $a0,4*$BNSZ
-+ $LD $ta1,-3*$BNSZ($a2)
-+ $PTR_ADD $a1,4*$BNSZ
-+ $LD $ta2,-2*$BNSZ($a2)
-+ $LD $ta3,-$BNSZ($a2)
-+ sltu $t8,$t0,$ta0
-+ $SUBU $ta0,$t0,$ta0
-+ $SUBU $t0,$ta0,$v0
-+ sgtu $v0,$t0,$ta0
-+ $ST $t0,-4*$BNSZ($a0)
-+ $ADDU $v0,$t8
-+
-+ sltu $t9,$t1,$ta1
-+ $SUBU $ta1,$t1,$ta1
-+ $SUBU $t1,$ta1,$v0
-+ sgtu $v0,$t1,$ta1
-+ $ST $t1,-3*$BNSZ($a0)
-+ $ADDU $v0,$t9
-+
-+
-+ sltu $t8,$t2,$ta2
-+ $SUBU $ta2,$t2,$ta2
-+ $SUBU $t2,$ta2,$v0
-+ sgtu $v0,$t2,$ta2
-+ $ST $t2,-2*$BNSZ($a0)
-+ $ADDU $v0,$t8
-+
-+ sltu $t9,$t3,$ta3
-+ $SUBU $ta3,$t3,$ta3
-+ $SUBU $t3,$ta3,$v0
-+ sgtu $v0,$t3,$ta3
-+ $ST $t3,-$BNSZ($a0)
-+ $ADDU $v0,$t9
-+
-+ .set noreorder
-+ bgtzl $at,.L_bn_sub_words_loop
-+ $LD $t0,0($a1)
-+
-+ beqz $a3,.L_bn_sub_words_return
-+ nop
-+
-+.L_bn_sub_words_tail:
-+ .set reorder
-+ $LD $t0,0($a1)
-+ $LD $ta0,0($a2)
-+ subu $a3,1
-+ sltu $t8,$t0,$ta0
-+ $SUBU $ta0,$t0,$ta0
-+ $SUBU $t0,$ta0,$v0
-+ sgtu $v0,$t0,$ta0
-+ $ST $t0,0($a0)
-+ $ADDU $v0,$t8
-+ beqz $a3,.L_bn_sub_words_return
-+
-+ $LD $t1,$BNSZ($a1)
-+ subu $a3,1
-+ $LD $ta1,$BNSZ($a2)
-+ sltu $t9,$t1,$ta1
-+ $SUBU $ta1,$t1,$ta1
-+ $SUBU $t1,$ta1,$v0
-+ sgtu $v0,$t1,$ta1
-+ $ST $t1,$BNSZ($a0)
-+ $ADDU $v0,$t9
-+ beqz $a3,.L_bn_sub_words_return
-+
-+ $LD $t2,2*$BNSZ($a1)
-+ $LD $ta2,2*$BNSZ($a2)
-+ sltu $t8,$t2,$ta2
-+ $SUBU $ta2,$t2,$ta2
-+ $SUBU $t2,$ta2,$v0
-+ sgtu $v0,$t2,$ta2
-+ $ST $t2,2*$BNSZ($a0)
-+ $ADDU $v0,$t8
-+
-+.L_bn_sub_words_return:
-+ .set noreorder
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+ $REG_L $t3,4*$SZREG($sp)
-+ $REG_L $t2,3*$SZREG($sp)
-+ $REG_L $t1,2*$SZREG($sp)
-+ $REG_L $t0,1*$SZREG($sp)
-+ $REG_L $gp,0*$SZREG($sp)
-+ $PTR_ADD $sp,6*$SZREG
-+___
-+$code.=<<___;
-+ jr $ra
-+ move $a0,$v0
-+.end bn_sub_words_internal
-+
-+.align 5
-+.globl bn_div_3_words
-+.ent bn_div_3_words
-+bn_div_3_words:
-+ .set noreorder
-+ move $a3,$a0 # we know that bn_div_words does not
-+ # touch $a3, $ta2, $ta3 and preserves $a2
-+ # so that we can save two arguments
-+ # and return address in registers
-+ # instead of stack:-)
-+
-+ $LD $a0,($a3)
-+ move $ta2,$a1
-+ bne $a0,$a2,bn_div_3_words_internal
-+ $LD $a1,-$BNSZ($a3)
-+ li $v0,-1
-+ jr $ra
-+ move $a0,$v0
-+.end bn_div_3_words
-+
-+.align 5
-+.ent bn_div_3_words_internal
-+bn_div_3_words_internal:
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+ .frame $sp,6*$SZREG,$ra
-+ .mask 0x8000f008,-$SZREG
-+ .set noreorder
-+ $PTR_SUB $sp,6*$SZREG
-+ $REG_S $ra,5*$SZREG($sp)
-+ $REG_S $t3,4*$SZREG($sp)
-+ $REG_S $t2,3*$SZREG($sp)
-+ $REG_S $t1,2*$SZREG($sp)
-+ $REG_S $t0,1*$SZREG($sp)
-+ $REG_S $gp,0*$SZREG($sp)
-+___
-+$code.=<<___;
-+ .set reorder
-+ move $ta3,$ra
-+ bal bn_div_words
-+ move $ra,$ta3
-+ $MULTU $ta2,$v0
-+ $LD $t2,-2*$BNSZ($a3)
-+ move $ta0,$zero
-+ mfhi $t1
-+ mflo $t0
-+ sltu $t8,$t1,$a1
-+.L_bn_div_3_words_inner_loop:
-+ bnez $t8,.L_bn_div_3_words_inner_loop_done
-+ sgeu $at,$t2,$t0
-+ seq $t9,$t1,$a1
-+ and $at,$t9
-+ sltu $t3,$t0,$ta2
-+ $ADDU $a1,$a2
-+ $SUBU $t1,$t3
-+ $SUBU $t0,$ta2
-+ sltu $t8,$t1,$a1
-+ sltu $ta0,$a1,$a2
-+ or $t8,$ta0
-+ .set noreorder
-+ beqzl $at,.L_bn_div_3_words_inner_loop
-+ $SUBU $v0,1
-+ .set reorder
-+.L_bn_div_3_words_inner_loop_done:
-+ .set noreorder
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+ $REG_L $t3,4*$SZREG($sp)
-+ $REG_L $t2,3*$SZREG($sp)
-+ $REG_L $t1,2*$SZREG($sp)
-+ $REG_L $t0,1*$SZREG($sp)
-+ $REG_L $gp,0*$SZREG($sp)
-+ $PTR_ADD $sp,6*$SZREG
-+___
-+$code.=<<___;
-+ jr $ra
-+ move $a0,$v0
-+.end bn_div_3_words_internal
-+
-+.align 5
-+.globl bn_div_words
-+.ent bn_div_words
-+bn_div_words:
-+ .set noreorder
-+ bnez $a2,bn_div_words_internal
-+ li $v0,-1 # I would rather signal div-by-zero
-+ # which can be done with 'break 7'
-+ jr $ra
-+ move $a0,$v0
-+.end bn_div_words
-+
-+.align 5
-+.ent bn_div_words_internal
-+bn_div_words_internal:
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+ .frame $sp,6*$SZREG,$ra
-+ .mask 0x8000f008,-$SZREG
-+ .set noreorder
-+ $PTR_SUB $sp,6*$SZREG
-+ $REG_S $ra,5*$SZREG($sp)
-+ $REG_S $t3,4*$SZREG($sp)
-+ $REG_S $t2,3*$SZREG($sp)
-+ $REG_S $t1,2*$SZREG($sp)
-+ $REG_S $t0,1*$SZREG($sp)
-+ $REG_S $gp,0*$SZREG($sp)
-+___
-+$code.=<<___;
-+ move $v1,$zero
-+ bltz $a2,.L_bn_div_words_body
-+ move $t9,$v1
-+ $SLL $a2,1
-+ bgtz $a2,.-4
-+ addu $t9,1
-+
-+ .set reorder
-+ negu $t1,$t9
-+ li $t2,-1
-+ $SLL $t2,$t1
-+ and $t2,$a0
-+ $SRL $at,$a1,$t1
-+ .set noreorder
-+ bnezl $t2,.+8
-+ break 6 # signal overflow
-+ .set reorder
-+ $SLL $a0,$t9
-+ $SLL $a1,$t9
-+ or $a0,$at
-+___
-+$QT=$ta0;
-+$HH=$ta1;
-+$DH=$v1;
-+$code.=<<___;
-+.L_bn_div_words_body:
-+ $SRL $DH,$a2,4*$BNSZ # bits
-+ sgeu $at,$a0,$a2
-+ .set noreorder
-+ bnezl $at,.+8
-+ $SUBU $a0,$a2
-+ .set reorder
-+
-+ li $QT,-1
-+ $SRL $HH,$a0,4*$BNSZ # bits
-+ $SRL $QT,4*$BNSZ # q=0xffffffff
-+ beq $DH,$HH,.L_bn_div_words_skip_div1
-+ $DIVU $zero,$a0,$DH
-+ mflo $QT
-+.L_bn_div_words_skip_div1:
-+ $MULTU $a2,$QT
-+ $SLL $t3,$a0,4*$BNSZ # bits
-+ $SRL $at,$a1,4*$BNSZ # bits
-+ or $t3,$at
-+ mflo $t0
-+ mfhi $t1
-+.L_bn_div_words_inner_loop1:
-+ sltu $t2,$t3,$t0
-+ seq $t8,$HH,$t1
-+ sltu $at,$HH,$t1
-+ and $t2,$t8
-+ sltu $v0,$t0,$a2
-+ or $at,$t2
-+ .set noreorder
-+ beqz $at,.L_bn_div_words_inner_loop1_done
-+ $SUBU $t1,$v0
-+ $SUBU $t0,$a2
-+ b .L_bn_div_words_inner_loop1
-+ $SUBU $QT,1
-+ .set reorder
-+.L_bn_div_words_inner_loop1_done:
-+
-+ $SLL $a1,4*$BNSZ # bits
-+ $SUBU $a0,$t3,$t0
-+ $SLL $v0,$QT,4*$BNSZ # bits
-+
-+ li $QT,-1
-+ $SRL $HH,$a0,4*$BNSZ # bits
-+ $SRL $QT,4*$BNSZ # q=0xffffffff
-+ beq $DH,$HH,.L_bn_div_words_skip_div2
-+ $DIVU $zero,$a0,$DH
-+ mflo $QT
-+.L_bn_div_words_skip_div2:
-+ $MULTU $a2,$QT
-+ $SLL $t3,$a0,4*$BNSZ # bits
-+ $SRL $at,$a1,4*$BNSZ # bits
-+ or $t3,$at
-+ mflo $t0
-+ mfhi $t1
-+.L_bn_div_words_inner_loop2:
-+ sltu $t2,$t3,$t0
-+ seq $t8,$HH,$t1
-+ sltu $at,$HH,$t1
-+ and $t2,$t8
-+ sltu $v1,$t0,$a2
-+ or $at,$t2
-+ .set noreorder
-+ beqz $at,.L_bn_div_words_inner_loop2_done
-+ $SUBU $t1,$v1
-+ $SUBU $t0,$a2
-+ b .L_bn_div_words_inner_loop2
-+ $SUBU $QT,1
-+ .set reorder
-+.L_bn_div_words_inner_loop2_done:
-+
-+ $SUBU $a0,$t3,$t0
-+ or $v0,$QT
-+ $SRL $v1,$a0,$t9 # $v1 contains remainder if anybody wants it
-+ $SRL $a2,$t9 # restore $a2
-+
-+ .set noreorder
-+ move $a1,$v1
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+ $REG_L $t3,4*$SZREG($sp)
-+ $REG_L $t2,3*$SZREG($sp)
-+ $REG_L $t1,2*$SZREG($sp)
-+ $REG_L $t0,1*$SZREG($sp)
-+ $REG_L $gp,0*$SZREG($sp)
-+ $PTR_ADD $sp,6*$SZREG
-+___
-+$code.=<<___;
-+ jr $ra
-+ move $a0,$v0
-+.end bn_div_words_internal
-+___
-+undef $HH; undef $QT; undef $DH;
-+
-+($a_0,$a_1,$a_2,$a_3)=($t0,$t1,$t2,$t3);
-+($b_0,$b_1,$b_2,$b_3)=($ta0,$ta1,$ta2,$ta3);
-+
-+($a_4,$a_5,$a_6,$a_7)=($s0,$s2,$s4,$a1); # once we load a[7], no use for $a1
-+($b_4,$b_5,$b_6,$b_7)=($s1,$s3,$s5,$a2); # once we load b[7], no use for $a2
-+
-+($t_1,$t_2,$c_1,$c_2,$c_3)=($t8,$t9,$v0,$v1,$a3);
-+
-+$code.=<<___;
-+
-+.align 5
-+.globl bn_mul_comba8
-+.ent bn_mul_comba8
-+bn_mul_comba8:
-+ .set noreorder
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+ .frame $sp,12*$SZREG,$ra
-+ .mask 0x803ff008,-$SZREG
-+ $PTR_SUB $sp,12*$SZREG
-+ $REG_S $ra,11*$SZREG($sp)
-+ $REG_S $s5,10*$SZREG($sp)
-+ $REG_S $s4,9*$SZREG($sp)
-+ $REG_S $s3,8*$SZREG($sp)
-+ $REG_S $s2,7*$SZREG($sp)
-+ $REG_S $s1,6*$SZREG($sp)
-+ $REG_S $s0,5*$SZREG($sp)
-+ $REG_S $t3,4*$SZREG($sp)
-+ $REG_S $t2,3*$SZREG($sp)
-+ $REG_S $t1,2*$SZREG($sp)
-+ $REG_S $t0,1*$SZREG($sp)
-+ $REG_S $gp,0*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour !~ /nubi/i);
-+ .frame $sp,6*$SZREG,$ra
-+ .mask 0x003f0000,-$SZREG
-+ $PTR_SUB $sp,6*$SZREG
-+ $REG_S $s5,5*$SZREG($sp)
-+ $REG_S $s4,4*$SZREG($sp)
-+ $REG_S $s3,3*$SZREG($sp)
-+ $REG_S $s2,2*$SZREG($sp)
-+ $REG_S $s1,1*$SZREG($sp)
-+ $REG_S $s0,0*$SZREG($sp)
-+___
-+$code.=<<___;
-+
-+ .set reorder
-+ $LD $a_0,0($a1) # If compiled with -mips3 option on
-+ # R5000 box assembler barks on this
-+ # 1ine with "should not have mult/div
-+ # as last instruction in bb (R10K
-+ # bug)" warning. If anybody out there
-+ # has a clue about how to circumvent
-+ # this do send me a note.
-+ # <appro\@fy.chalmers.se>
-+
-+ $LD $b_0,0($a2)
-+ $LD $a_1,$BNSZ($a1)
-+ $LD $a_2,2*$BNSZ($a1)
-+ $MULTU $a_0,$b_0 # mul_add_c(a[0],b[0],c1,c2,c3);
-+ $LD $a_3,3*$BNSZ($a1)
-+ $LD $b_1,$BNSZ($a2)
-+ $LD $b_2,2*$BNSZ($a2)
-+ $LD $b_3,3*$BNSZ($a2)
-+ mflo $c_1
-+ mfhi $c_2
-+
-+ $LD $a_4,4*$BNSZ($a1)
-+ $LD $a_5,5*$BNSZ($a1)
-+ $MULTU $a_0,$b_1 # mul_add_c(a[0],b[1],c2,c3,c1);
-+ $LD $a_6,6*$BNSZ($a1)
-+ $LD $a_7,7*$BNSZ($a1)
-+ $LD $b_4,4*$BNSZ($a2)
-+ $LD $b_5,5*$BNSZ($a2)
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $MULTU $a_1,$b_0 # mul_add_c(a[1],b[0],c2,c3,c1);
-+ $ADDU $c_3,$t_2,$at
-+ $LD $b_6,6*$BNSZ($a2)
-+ $LD $b_7,7*$BNSZ($a2)
-+ $ST $c_1,0($a0) # r[0]=c1;
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $MULTU $a_2,$b_0 # mul_add_c(a[2],b[0],c3,c1,c2);
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $c_1,$c_3,$t_2
-+ $ST $c_2,$BNSZ($a0) # r[1]=c2;
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $MULTU $a_1,$b_1 # mul_add_c(a[1],b[1],c3,c1,c2);
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $MULTU $a_0,$b_2 # mul_add_c(a[0],b[2],c3,c1,c2);
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $c_2,$c_1,$t_2
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $MULTU $a_0,$b_3 # mul_add_c(a[0],b[3],c1,c2,c3);
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $at,$c_1,$t_2
-+ $ADDU $c_2,$at
-+ $ST $c_3,2*$BNSZ($a0) # r[2]=c3;
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $MULTU $a_1,$b_2 # mul_add_c(a[1],b[2],c1,c2,c3);
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $c_3,$c_2,$t_2
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $MULTU $a_2,$b_1 # mul_add_c(a[2],b[1],c1,c2,c3);
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $MULTU $a_3,$b_0 # mul_add_c(a[3],b[0],c1,c2,c3);
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $MULTU $a_4,$b_0 # mul_add_c(a[4],b[0],c2,c3,c1);
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ $ST $c_1,3*$BNSZ($a0) # r[3]=c1;
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $MULTU $a_3,$b_1 # mul_add_c(a[3],b[1],c2,c3,c1);
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $c_1,$c_3,$t_2
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $MULTU $a_2,$b_2 # mul_add_c(a[2],b[2],c2,c3,c1);
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $at,$c_3,$t_2
-+ $ADDU $c_1,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $MULTU $a_1,$b_3 # mul_add_c(a[1],b[3],c2,c3,c1);
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $at,$c_3,$t_2
-+ $ADDU $c_1,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $MULTU $a_0,$b_4 # mul_add_c(a[0],b[4],c2,c3,c1);
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $at,$c_3,$t_2
-+ $ADDU $c_1,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $MULTU $a_0,$b_5 # mul_add_c(a[0],b[5],c3,c1,c2);
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $at,$c_3,$t_2
-+ $ADDU $c_1,$at
-+ $ST $c_2,4*$BNSZ($a0) # r[4]=c2;
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $MULTU $a_1,$b_4 # mul_add_c(a[1],b[4],c3,c1,c2);
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $c_2,$c_1,$t_2
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $MULTU $a_2,$b_3 # mul_add_c(a[2],b[3],c3,c1,c2);
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $at,$c_1,$t_2
-+ $ADDU $c_2,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $MULTU $a_3,$b_2 # mul_add_c(a[3],b[2],c3,c1,c2);
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $at,$c_1,$t_2
-+ $ADDU $c_2,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $MULTU $a_4,$b_1 # mul_add_c(a[4],b[1],c3,c1,c2);
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $at,$c_1,$t_2
-+ $ADDU $c_2,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $MULTU $a_5,$b_0 # mul_add_c(a[5],b[0],c3,c1,c2);
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $at,$c_1,$t_2
-+ $ADDU $c_2,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $MULTU $a_6,$b_0 # mul_add_c(a[6],b[0],c1,c2,c3);
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $at,$c_1,$t_2
-+ $ADDU $c_2,$at
-+ $ST $c_3,5*$BNSZ($a0) # r[5]=c3;
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $MULTU $a_5,$b_1 # mul_add_c(a[5],b[1],c1,c2,c3);
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $c_3,$c_2,$t_2
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $MULTU $a_4,$b_2 # mul_add_c(a[4],b[2],c1,c2,c3);
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $MULTU $a_3,$b_3 # mul_add_c(a[3],b[3],c1,c2,c3);
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $MULTU $a_2,$b_4 # mul_add_c(a[2],b[4],c1,c2,c3);
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $MULTU $a_1,$b_5 # mul_add_c(a[1],b[5],c1,c2,c3);
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $MULTU $a_0,$b_6 # mul_add_c(a[0],b[6],c1,c2,c3);
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $MULTU $a_0,$b_7 # mul_add_c(a[0],b[7],c2,c3,c1);
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ $ST $c_1,6*$BNSZ($a0) # r[6]=c1;
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $MULTU $a_1,$b_6 # mul_add_c(a[1],b[6],c2,c3,c1);
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $c_1,$c_3,$t_2
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $MULTU $a_2,$b_5 # mul_add_c(a[2],b[5],c2,c3,c1);
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $at,$c_3,$t_2
-+ $ADDU $c_1,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $MULTU $a_3,$b_4 # mul_add_c(a[3],b[4],c2,c3,c1);
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $at,$c_3,$t_2
-+ $ADDU $c_1,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $MULTU $a_4,$b_3 # mul_add_c(a[4],b[3],c2,c3,c1);
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $at,$c_3,$t_2
-+ $ADDU $c_1,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $MULTU $a_5,$b_2 # mul_add_c(a[5],b[2],c2,c3,c1);
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $at,$c_3,$t_2
-+ $ADDU $c_1,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $MULTU $a_6,$b_1 # mul_add_c(a[6],b[1],c2,c3,c1);
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $at,$c_3,$t_2
-+ $ADDU $c_1,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $MULTU $a_7,$b_0 # mul_add_c(a[7],b[0],c2,c3,c1);
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $at,$c_3,$t_2
-+ $ADDU $c_1,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $MULTU $a_7,$b_1 # mul_add_c(a[7],b[1],c3,c1,c2);
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $at,$c_3,$t_2
-+ $ADDU $c_1,$at
-+ $ST $c_2,7*$BNSZ($a0) # r[7]=c2;
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $MULTU $a_6,$b_2 # mul_add_c(a[6],b[2],c3,c1,c2);
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $c_2,$c_1,$t_2
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $MULTU $a_5,$b_3 # mul_add_c(a[5],b[3],c3,c1,c2);
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $at,$c_1,$t_2
-+ $ADDU $c_2,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $MULTU $a_4,$b_4 # mul_add_c(a[4],b[4],c3,c1,c2);
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $at,$c_1,$t_2
-+ $ADDU $c_2,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $MULTU $a_3,$b_5 # mul_add_c(a[3],b[5],c3,c1,c2);
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $at,$c_1,$t_2
-+ $ADDU $c_2,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $MULTU $a_2,$b_6 # mul_add_c(a[2],b[6],c3,c1,c2);
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $at,$c_1,$t_2
-+ $ADDU $c_2,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $MULTU $a_1,$b_7 # mul_add_c(a[1],b[7],c3,c1,c2);
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $at,$c_1,$t_2
-+ $ADDU $c_2,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $MULTU $a_2,$b_7 # mul_add_c(a[2],b[7],c1,c2,c3);
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $at,$c_1,$t_2
-+ $ADDU $c_2,$at
-+ $ST $c_3,8*$BNSZ($a0) # r[8]=c3;
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $MULTU $a_3,$b_6 # mul_add_c(a[3],b[6],c1,c2,c3);
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $c_3,$c_2,$t_2
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $MULTU $a_4,$b_5 # mul_add_c(a[4],b[5],c1,c2,c3);
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $MULTU $a_5,$b_4 # mul_add_c(a[5],b[4],c1,c2,c3);
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $MULTU $a_6,$b_3 # mul_add_c(a[6],b[3],c1,c2,c3);
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $MULTU $a_7,$b_2 # mul_add_c(a[7],b[2],c1,c2,c3);
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $MULTU $a_7,$b_3 # mul_add_c(a[7],b[3],c2,c3,c1);
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ $ST $c_1,9*$BNSZ($a0) # r[9]=c1;
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $MULTU $a_6,$b_4 # mul_add_c(a[6],b[4],c2,c3,c1);
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $c_1,$c_3,$t_2
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $MULTU $a_5,$b_5 # mul_add_c(a[5],b[5],c2,c3,c1);
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $at,$c_3,$t_2
-+ $ADDU $c_1,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $MULTU $a_4,$b_6 # mul_add_c(a[4],b[6],c2,c3,c1);
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $at,$c_3,$t_2
-+ $ADDU $c_1,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $MULTU $a_3,$b_7 # mul_add_c(a[3],b[7],c2,c3,c1);
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $at,$c_3,$t_2
-+ $ADDU $c_1,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $MULTU $a_4,$b_7 # mul_add_c(a[4],b[7],c3,c1,c2);
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $at,$c_3,$t_2
-+ $ADDU $c_1,$at
-+ $ST $c_2,10*$BNSZ($a0) # r[10]=c2;
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $MULTU $a_5,$b_6 # mul_add_c(a[5],b[6],c3,c1,c2);
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $c_2,$c_1,$t_2
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $MULTU $a_6,$b_5 # mul_add_c(a[6],b[5],c3,c1,c2);
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $at,$c_1,$t_2
-+ $ADDU $c_2,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $MULTU $a_7,$b_4 # mul_add_c(a[7],b[4],c3,c1,c2);
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $at,$c_1,$t_2
-+ $ADDU $c_2,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $MULTU $a_7,$b_5 # mul_add_c(a[7],b[5],c1,c2,c3);
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $at,$c_1,$t_2
-+ $ADDU $c_2,$at
-+ $ST $c_3,11*$BNSZ($a0) # r[11]=c3;
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $MULTU $a_6,$b_6 # mul_add_c(a[6],b[6],c1,c2,c3);
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $c_3,$c_2,$t_2
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $MULTU $a_5,$b_7 # mul_add_c(a[5],b[7],c1,c2,c3);
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $MULTU $a_6,$b_7 # mul_add_c(a[6],b[7],c2,c3,c1);
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ $ST $c_1,12*$BNSZ($a0) # r[12]=c1;
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $MULTU $a_7,$b_6 # mul_add_c(a[7],b[6],c2,c3,c1);
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $c_1,$c_3,$t_2
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $MULTU $a_7,$b_7 # mul_add_c(a[7],b[7],c3,c1,c2);
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $at,$c_3,$t_2
-+ $ADDU $c_1,$at
-+ $ST $c_2,13*$BNSZ($a0) # r[13]=c2;
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ $ST $c_3,14*$BNSZ($a0) # r[14]=c3;
-+ $ST $c_1,15*$BNSZ($a0) # r[15]=c1;
-+
-+ .set noreorder
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+ $REG_L $s5,10*$SZREG($sp)
-+ $REG_L $s4,9*$SZREG($sp)
-+ $REG_L $s3,8*$SZREG($sp)
-+ $REG_L $s2,7*$SZREG($sp)
-+ $REG_L $s1,6*$SZREG($sp)
-+ $REG_L $s0,5*$SZREG($sp)
-+ $REG_L $t3,4*$SZREG($sp)
-+ $REG_L $t2,3*$SZREG($sp)
-+ $REG_L $t1,2*$SZREG($sp)
-+ $REG_L $t0,1*$SZREG($sp)
-+ $REG_L $gp,0*$SZREG($sp)
-+ jr $ra
-+ $PTR_ADD $sp,12*$SZREG
-+___
-+$code.=<<___ if ($flavour !~ /nubi/i);
-+ $REG_L $s5,5*$SZREG($sp)
-+ $REG_L $s4,4*$SZREG($sp)
-+ $REG_L $s3,3*$SZREG($sp)
-+ $REG_L $s2,2*$SZREG($sp)
-+ $REG_L $s1,1*$SZREG($sp)
-+ $REG_L $s0,0*$SZREG($sp)
-+ jr $ra
-+ $PTR_ADD $sp,6*$SZREG
-+___
-+$code.=<<___;
-+.end bn_mul_comba8
-+
-+.align 5
-+.globl bn_mul_comba4
-+.ent bn_mul_comba4
-+bn_mul_comba4:
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+ .frame $sp,6*$SZREG,$ra
-+ .mask 0x8000f008,-$SZREG
-+ .set noreorder
-+ $PTR_SUB $sp,6*$SZREG
-+ $REG_S $ra,5*$SZREG($sp)
-+ $REG_S $t3,4*$SZREG($sp)
-+ $REG_S $t2,3*$SZREG($sp)
-+ $REG_S $t1,2*$SZREG($sp)
-+ $REG_S $t0,1*$SZREG($sp)
-+ $REG_S $gp,0*$SZREG($sp)
-+___
-+$code.=<<___;
-+ .set reorder
-+ $LD $a_0,0($a1)
-+ $LD $b_0,0($a2)
-+ $LD $a_1,$BNSZ($a1)
-+ $LD $a_2,2*$BNSZ($a1)
-+ $MULTU $a_0,$b_0 # mul_add_c(a[0],b[0],c1,c2,c3);
-+ $LD $a_3,3*$BNSZ($a1)
-+ $LD $b_1,$BNSZ($a2)
-+ $LD $b_2,2*$BNSZ($a2)
-+ $LD $b_3,3*$BNSZ($a2)
-+ mflo $c_1
-+ mfhi $c_2
-+ $ST $c_1,0($a0)
-+
-+ $MULTU $a_0,$b_1 # mul_add_c(a[0],b[1],c2,c3,c1);
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $MULTU $a_1,$b_0 # mul_add_c(a[1],b[0],c2,c3,c1);
-+ $ADDU $c_3,$t_2,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $MULTU $a_2,$b_0 # mul_add_c(a[2],b[0],c3,c1,c2);
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $c_1,$c_3,$t_2
-+ $ST $c_2,$BNSZ($a0)
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $MULTU $a_1,$b_1 # mul_add_c(a[1],b[1],c3,c1,c2);
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $MULTU $a_0,$b_2 # mul_add_c(a[0],b[2],c3,c1,c2);
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $c_2,$c_1,$t_2
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $MULTU $a_0,$b_3 # mul_add_c(a[0],b[3],c1,c2,c3);
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $at,$c_1,$t_2
-+ $ADDU $c_2,$at
-+ $ST $c_3,2*$BNSZ($a0)
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $MULTU $a_1,$b_2 # mul_add_c(a[1],b[2],c1,c2,c3);
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $c_3,$c_2,$t_2
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $MULTU $a_2,$b_1 # mul_add_c(a[2],b[1],c1,c2,c3);
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $MULTU $a_3,$b_0 # mul_add_c(a[3],b[0],c1,c2,c3);
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $MULTU $a_3,$b_1 # mul_add_c(a[3],b[1],c2,c3,c1);
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ $ST $c_1,3*$BNSZ($a0)
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $MULTU $a_2,$b_2 # mul_add_c(a[2],b[2],c2,c3,c1);
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $c_1,$c_3,$t_2
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $MULTU $a_1,$b_3 # mul_add_c(a[1],b[3],c2,c3,c1);
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $at,$c_3,$t_2
-+ $ADDU $c_1,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $MULTU $a_2,$b_3 # mul_add_c(a[2],b[3],c3,c1,c2);
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $at,$c_3,$t_2
-+ $ADDU $c_1,$at
-+ $ST $c_2,4*$BNSZ($a0)
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $MULTU $a_3,$b_2 # mul_add_c(a[3],b[2],c3,c1,c2);
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $c_2,$c_1,$t_2
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $MULTU $a_3,$b_3 # mul_add_c(a[3],b[3],c1,c2,c3);
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $at,$c_1,$t_2
-+ $ADDU $c_2,$at
-+ $ST $c_3,5*$BNSZ($a0)
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ $ST $c_1,6*$BNSZ($a0)
-+ $ST $c_2,7*$BNSZ($a0)
-+
-+ .set noreorder
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+ $REG_L $t3,4*$SZREG($sp)
-+ $REG_L $t2,3*$SZREG($sp)
-+ $REG_L $t1,2*$SZREG($sp)
-+ $REG_L $t0,1*$SZREG($sp)
-+ $REG_L $gp,0*$SZREG($sp)
-+ $PTR_ADD $sp,6*$SZREG
-+___
-+$code.=<<___;
-+ jr $ra
-+ nop
-+.end bn_mul_comba4
-+___
-+
-+($a_4,$a_5,$a_6,$a_7)=($b_0,$b_1,$b_2,$b_3);
-+
-+$code.=<<___;
-+
-+.align 5
-+.globl bn_sqr_comba8
-+.ent bn_sqr_comba8
-+bn_sqr_comba8:
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+ .frame $sp,6*$SZREG,$ra
-+ .mask 0x8000f008,-$SZREG
-+ .set noreorder
-+ $PTR_SUB $sp,6*$SZREG
-+ $REG_S $ra,5*$SZREG($sp)
-+ $REG_S $t3,4*$SZREG($sp)
-+ $REG_S $t2,3*$SZREG($sp)
-+ $REG_S $t1,2*$SZREG($sp)
-+ $REG_S $t0,1*$SZREG($sp)
-+ $REG_S $gp,0*$SZREG($sp)
-+___
-+$code.=<<___;
-+ .set reorder
-+ $LD $a_0,0($a1)
-+ $LD $a_1,$BNSZ($a1)
-+ $LD $a_2,2*$BNSZ($a1)
-+ $LD $a_3,3*$BNSZ($a1)
-+
-+ $MULTU $a_0,$a_0 # mul_add_c(a[0],b[0],c1,c2,c3);
-+ $LD $a_4,4*$BNSZ($a1)
-+ $LD $a_5,5*$BNSZ($a1)
-+ $LD $a_6,6*$BNSZ($a1)
-+ $LD $a_7,7*$BNSZ($a1)
-+ mflo $c_1
-+ mfhi $c_2
-+ $ST $c_1,0($a0)
-+
-+ $MULTU $a_0,$a_1 # mul_add_c2(a[0],b[1],c2,c3,c1);
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $c_1,$t_2,$zero
-+ $SLL $t_2,1
-+ $MULTU $a_2,$a_0 # mul_add_c2(a[2],b[0],c3,c1,c2);
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $ADDU $c_3,$t_2,$at
-+ $ST $c_2,$BNSZ($a0)
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $c_2,$t_2,$zero
-+ $SLL $t_2,1
-+ $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2);
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $at,$c_1,$t_2
-+ $ADDU $c_2,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $MULTU $a_0,$a_3 # mul_add_c2(a[0],b[3],c1,c2,c3);
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $at,$c_1,$t_2
-+ $ADDU $c_2,$at
-+ $ST $c_3,2*$BNSZ($a0)
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $c_3,$t_2,$zero
-+ $SLL $t_2,1
-+ $MULTU $a_1,$a_2 # mul_add_c2(a[1],b[2],c1,c2,c3);
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $at,$t_2,$zero
-+ $ADDU $c_3,$at
-+ $MULTU $a_4,$a_0 # mul_add_c2(a[4],b[0],c2,c3,c1);
-+ $SLL $t_2,1
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ $ST $c_1,3*$BNSZ($a0)
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $c_1,$t_2,$zero
-+ $SLL $t_2,1
-+ $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1);
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $at,$c_3,$t_2
-+ $ADDU $c_1,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $at,$t_2,$zero
-+ $ADDU $c_1,$at
-+ $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1);
-+ $SLL $t_2,1
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $at,$c_3,$t_2
-+ $ADDU $c_1,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $MULTU $a_0,$a_5 # mul_add_c2(a[0],b[5],c3,c1,c2);
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $at,$c_3,$t_2
-+ $ADDU $c_1,$at
-+ $ST $c_2,4*$BNSZ($a0)
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $c_2,$t_2,$zero
-+ $SLL $t_2,1
-+ $MULTU $a_1,$a_4 # mul_add_c2(a[1],b[4],c3,c1,c2);
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $at,$c_1,$t_2
-+ $ADDU $c_2,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $at,$t_2,$zero
-+ $ADDU $c_2,$at
-+ $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2);
-+ $SLL $t_2,1
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $at,$c_1,$t_2
-+ $ADDU $c_2,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $at,$t_2,$zero
-+ $MULTU $a_6,$a_0 # mul_add_c2(a[6],b[0],c1,c2,c3);
-+ $ADDU $c_2,$at
-+ $SLL $t_2,1
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $at,$c_1,$t_2
-+ $ADDU $c_2,$at
-+ $ST $c_3,5*$BNSZ($a0)
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $c_3,$t_2,$zero
-+ $SLL $t_2,1
-+ $MULTU $a_5,$a_1 # mul_add_c2(a[5],b[1],c1,c2,c3);
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $at,$t_2,$zero
-+ $ADDU $c_3,$at
-+ $MULTU $a_4,$a_2 # mul_add_c2(a[4],b[2],c1,c2,c3);
-+ $SLL $t_2,1
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $at,$t_2,$zero
-+ $ADDU $c_3,$at
-+ $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3);
-+ $SLL $t_2,1
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $MULTU $a_0,$a_7 # mul_add_c2(a[0],b[7],c2,c3,c1);
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ $ST $c_1,6*$BNSZ($a0)
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $c_1,$t_2,$zero
-+ $SLL $t_2,1
-+ $MULTU $a_1,$a_6 # mul_add_c2(a[1],b[6],c2,c3,c1);
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $at,$c_3,$t_2
-+ $ADDU $c_1,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $at,$t_2,$zero
-+ $ADDU $c_1,$at
-+ $MULTU $a_2,$a_5 # mul_add_c2(a[2],b[5],c2,c3,c1);
-+ $SLL $t_2,1
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $at,$c_3,$t_2
-+ $ADDU $c_1,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $at,$t_2,$zero
-+ $ADDU $c_1,$at
-+ $MULTU $a_3,$a_4 # mul_add_c2(a[3],b[4],c2,c3,c1);
-+ $SLL $t_2,1
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $at,$c_3,$t_2
-+ $ADDU $c_1,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $at,$t_2,$zero
-+ $ADDU $c_1,$at
-+ $MULTU $a_7,$a_1 # mul_add_c2(a[7],b[1],c3,c1,c2);
-+ $SLL $t_2,1
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $at,$c_3,$t_2
-+ $ADDU $c_1,$at
-+ $ST $c_2,7*$BNSZ($a0)
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $c_2,$t_2,$zero
-+ $SLL $t_2,1
-+ $MULTU $a_6,$a_2 # mul_add_c2(a[6],b[2],c3,c1,c2);
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $at,$c_1,$t_2
-+ $ADDU $c_2,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $at,$t_2,$zero
-+ $ADDU $c_2,$at
-+ $MULTU $a_5,$a_3 # mul_add_c2(a[5],b[3],c3,c1,c2);
-+ $SLL $t_2,1
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $at,$c_1,$t_2
-+ $ADDU $c_2,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $at,$t_2,$zero
-+ $ADDU $c_2,$at
-+ $MULTU $a_4,$a_4 # mul_add_c(a[4],b[4],c3,c1,c2);
-+ $SLL $t_2,1
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $at,$c_1,$t_2
-+ $ADDU $c_2,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $MULTU $a_2,$a_7 # mul_add_c2(a[2],b[7],c1,c2,c3);
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $at,$c_1,$t_2
-+ $ADDU $c_2,$at
-+ $ST $c_3,8*$BNSZ($a0)
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $c_3,$t_2,$zero
-+ $SLL $t_2,1
-+ $MULTU $a_3,$a_6 # mul_add_c2(a[3],b[6],c1,c2,c3);
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $at,$t_2,$zero
-+ $ADDU $c_3,$at
-+ $MULTU $a_4,$a_5 # mul_add_c2(a[4],b[5],c1,c2,c3);
-+ $SLL $t_2,1
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $at,$t_2,$zero
-+ $ADDU $c_3,$at
-+ $MULTU $a_7,$a_3 # mul_add_c2(a[7],b[3],c2,c3,c1);
-+ $SLL $t_2,1
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ $ST $c_1,9*$BNSZ($a0)
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $c_1,$t_2,$zero
-+ $SLL $t_2,1
-+ $MULTU $a_6,$a_4 # mul_add_c2(a[6],b[4],c2,c3,c1);
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $at,$c_3,$t_2
-+ $ADDU $c_1,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $at,$t_2,$zero
-+ $ADDU $c_1,$at
-+ $MULTU $a_5,$a_5 # mul_add_c(a[5],b[5],c2,c3,c1);
-+ $SLL $t_2,1
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $at,$c_3,$t_2
-+ $ADDU $c_1,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $MULTU $a_4,$a_7 # mul_add_c2(a[4],b[7],c3,c1,c2);
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $at,$c_3,$t_2
-+ $ADDU $c_1,$at
-+ $ST $c_2,10*$BNSZ($a0)
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $c_2,$t_2,$zero
-+ $SLL $t_2,1
-+ $MULTU $a_5,$a_6 # mul_add_c2(a[5],b[6],c3,c1,c2);
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $at,$c_1,$t_2
-+ $ADDU $c_2,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $at,$t_2,$zero
-+ $ADDU $c_2,$at
-+ $MULTU $a_7,$a_5 # mul_add_c2(a[7],b[5],c1,c2,c3);
-+ $SLL $t_2,1
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $at,$c_1,$t_2
-+ $ADDU $c_2,$at
-+ $ST $c_3,11*$BNSZ($a0)
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $c_3,$t_2,$zero
-+ $SLL $t_2,1
-+ $MULTU $a_6,$a_6 # mul_add_c(a[6],b[6],c1,c2,c3);
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $MULTU $a_6,$a_7 # mul_add_c2(a[6],b[7],c2,c3,c1);
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ $ST $c_1,12*$BNSZ($a0)
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $c_1,$t_2,$zero
-+ $SLL $t_2,1
-+ $MULTU $a_7,$a_7 # mul_add_c(a[7],b[7],c3,c1,c2);
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $at,$c_3,$t_2
-+ $ADDU $c_1,$at
-+ $ST $c_2,13*$BNSZ($a0)
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ $ST $c_3,14*$BNSZ($a0)
-+ $ST $c_1,15*$BNSZ($a0)
-+
-+ .set noreorder
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+ $REG_L $t3,4*$SZREG($sp)
-+ $REG_L $t2,3*$SZREG($sp)
-+ $REG_L $t1,2*$SZREG($sp)
-+ $REG_L $t0,1*$SZREG($sp)
-+ $REG_L $gp,0*$SZREG($sp)
-+ $PTR_ADD $sp,6*$SZREG
-+___
-+$code.=<<___;
-+ jr $ra
-+ nop
-+.end bn_sqr_comba8
-+
-+.align 5
-+.globl bn_sqr_comba4
-+.ent bn_sqr_comba4
-+bn_sqr_comba4:
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+ .frame $sp,6*$SZREG,$ra
-+ .mask 0x8000f008,-$SZREG
-+ .set noreorder
-+ $PTR_SUB $sp,6*$SZREG
-+ $REG_S $ra,5*$SZREG($sp)
-+ $REG_S $t3,4*$SZREG($sp)
-+ $REG_S $t2,3*$SZREG($sp)
-+ $REG_S $t1,2*$SZREG($sp)
-+ $REG_S $t0,1*$SZREG($sp)
-+ $REG_S $gp,0*$SZREG($sp)
-+___
-+$code.=<<___;
-+ .set reorder
-+ $LD $a_0,0($a1)
-+ $LD $a_1,$BNSZ($a1)
-+ $MULTU $a_0,$a_0 # mul_add_c(a[0],b[0],c1,c2,c3);
-+ $LD $a_2,2*$BNSZ($a1)
-+ $LD $a_3,3*$BNSZ($a1)
-+ mflo $c_1
-+ mfhi $c_2
-+ $ST $c_1,0($a0)
-+
-+ $MULTU $a_0,$a_1 # mul_add_c2(a[0],b[1],c2,c3,c1);
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $c_1,$t_2,$zero
-+ $SLL $t_2,1
-+ $MULTU $a_2,$a_0 # mul_add_c2(a[2],b[0],c3,c1,c2);
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $ADDU $c_3,$t_2,$at
-+ $ST $c_2,$BNSZ($a0)
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $c_2,$t_2,$zero
-+ $SLL $t_2,1
-+ $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2);
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $at,$c_1,$t_2
-+ $ADDU $c_2,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $MULTU $a_0,$a_3 # mul_add_c2(a[0],b[3],c1,c2,c3);
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $at,$c_1,$t_2
-+ $ADDU $c_2,$at
-+ $ST $c_3,2*$BNSZ($a0)
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $c_3,$t_2,$zero
-+ $SLL $t_2,1
-+ $MULTU $a_1,$a_2 # mul_add_c(a2[1],b[2],c1,c2,c3);
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $at,$t_2,$zero
-+ $ADDU $c_3,$at
-+ $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1);
-+ $SLL $t_2,1
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ sltu $at,$c_2,$t_2
-+ $ADDU $c_3,$at
-+ $ST $c_1,3*$BNSZ($a0)
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $c_1,$t_2,$zero
-+ $SLL $t_2,1
-+ $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1);
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $at,$c_3,$t_2
-+ $ADDU $c_1,$at
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_2,$t_1
-+ sltu $at,$c_2,$t_1
-+ $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2);
-+ $ADDU $t_2,$at
-+ $ADDU $c_3,$t_2
-+ sltu $at,$c_3,$t_2
-+ $ADDU $c_1,$at
-+ $ST $c_2,4*$BNSZ($a0)
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ slt $c_2,$t_2,$zero
-+ $SLL $t_2,1
-+ $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3);
-+ slt $a2,$t_1,$zero
-+ $ADDU $t_2,$a2
-+ $SLL $t_1,1
-+ $ADDU $c_3,$t_1
-+ sltu $at,$c_3,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_1,$t_2
-+ sltu $at,$c_1,$t_2
-+ $ADDU $c_2,$at
-+ $ST $c_3,5*$BNSZ($a0)
-+
-+ mflo $t_1
-+ mfhi $t_2
-+ $ADDU $c_1,$t_1
-+ sltu $at,$c_1,$t_1
-+ $ADDU $t_2,$at
-+ $ADDU $c_2,$t_2
-+ $ST $c_1,6*$BNSZ($a0)
-+ $ST $c_2,7*$BNSZ($a0)
-+
-+ .set noreorder
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+ $REG_L $t3,4*$SZREG($sp)
-+ $REG_L $t2,3*$SZREG($sp)
-+ $REG_L $t1,2*$SZREG($sp)
-+ $REG_L $t0,1*$SZREG($sp)
-+ $REG_L $gp,0*$SZREG($sp)
-+ $PTR_ADD $sp,6*$SZREG
-+___
-+$code.=<<___;
-+ jr $ra
-+ nop
-+.end bn_sqr_comba4
-+___
-+print $code;
-+close STDOUT;
-diff --git a/crypto/sha/asm/sha1-mips.pl b/crypto/sha/asm/sha1-mips.pl
-new file mode 100644
-index 0000000..f1a702f
---- /dev/null
-+++ b/crypto/sha/asm/sha1-mips.pl
-@@ -0,0 +1,354 @@
-+#!/usr/bin/env perl
-+
-+# ====================================================================
-+# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-+# project. The module is, however, dual licensed under OpenSSL and
-+# CRYPTOGAMS licenses depending on where you obtain it. For further
-+# details see http://www.openssl.org/~appro/cryptogams/.
-+# ====================================================================
-+
-+# SHA1 block procedure for MIPS.
-+
-+# Performance improvement is 30% on unaligned input. The "secret" is
-+# to deploy lwl/lwr pair to load unaligned input. One could have
-+# vectorized Xupdate on MIPSIII/IV, but the goal was to code MIPS32-
-+# compatible subroutine. There is room for minor optimization on
-+# little-endian platforms...
-+
-+######################################################################
-+# There is a number of MIPS ABI in use, O32 and N32/64 are most
-+# widely used. Then there is a new contender: NUBI. It appears that if
-+# one picks the latter, it's possible to arrange code in ABI neutral
-+# manner. Therefore let's stick to NUBI register layout:
-+#
-+($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
-+($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
-+($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
-+($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
-+#
-+# The return value is placed in $a0. Following coding rules facilitate
-+# interoperability:
-+#
-+# - never ever touch $tp, "thread pointer", former $gp;
-+# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
-+# old code];
-+# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
-+#
-+# For reference here is register layout for N32/64 MIPS ABIs:
-+#
-+# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
-+# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
-+# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
-+# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
-+# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
-+#
-+$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64
-+
-+if ($flavour =~ /64|n32/i) {
-+ $PTR_ADD="dadd"; # incidentally works even on n32
-+ $PTR_SUB="dsub"; # incidentally works even on n32
-+ $REG_S="sd";
-+ $REG_L="ld";
-+ $PTR_SLL="dsll"; # incidentally works even on n32
-+ $SZREG=8;
-+} else {
-+ $PTR_ADD="add";
-+ $PTR_SUB="sub";
-+ $REG_S="sw";
-+ $REG_L="lw";
-+ $PTR_SLL="sll";
-+ $SZREG=4;
-+}
-+#
-+# <appro@openssl.org>
-+#
-+######################################################################
-+
-+$big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0;
-+
-+for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); }
-+open STDOUT,">$output";
-+
-+if (!defined($big_endian))
-+ { $big_endian=(unpack('L',pack('N',1))==1); }
-+
-+# offsets of the Most and Least Significant Bytes
-+$MSB=$big_endian?0:3;
-+$LSB=3&~$MSB;
-+
-+@X=map("\$$_",(8..23)); # a4-a7,s0-s11
-+
-+$ctx=$a0;
-+$inp=$a1;
-+$num=$a2;
-+$A="\$1";
-+$B="\$2";
-+$C="\$3";
-+$D="\$7";
-+$E="\$24"; @V=($A,$B,$C,$D,$E);
-+$t0="\$25";
-+$t1=$num; # $num is offloaded to stack
-+$t2="\$30"; # fp
-+$K="\$31"; # ra
-+
-+sub BODY_00_14 {
-+my ($i,$a,$b,$c,$d,$e)=@_;
-+my $j=$i+1;
-+$code.=<<___ if (!$big_endian);
-+ srl $t0,@X[$i],24 # byte swap($i)
-+ srl $t1,@X[$i],8
-+ andi $t2,@X[$i],0xFF00
-+ sll @X[$i],@X[$i],24
-+ andi $t1,0xFF00
-+ sll $t2,$t2,8
-+ or @X[$i],$t0
-+ or $t1,$t2
-+ or @X[$i],$t1
-+___
-+$code.=<<___;
-+ lwl @X[$j],$j*4+$MSB($inp)
-+ sll $t0,$a,5 # $i
-+ addu $e,$K
-+ lwr @X[$j],$j*4+$LSB($inp)
-+ srl $t1,$a,27
-+ addu $e,$t0
-+ xor $t0,$c,$d
-+ addu $e,$t1
-+ sll $t2,$b,30
-+ and $t0,$b
-+ srl $b,$b,2
-+ xor $t0,$d
-+ addu $e,@X[$i]
-+ or $b,$t2
-+ addu $e,$t0
-+___
-+}
-+
-+sub BODY_15_19 {
-+my ($i,$a,$b,$c,$d,$e)=@_;
-+my $j=$i+1;
-+
-+$code.=<<___ if (!$big_endian && $i==15);
-+ srl $t0,@X[$i],24 # byte swap($i)
-+ srl $t1,@X[$i],8
-+ andi $t2,@X[$i],0xFF00
-+ sll @X[$i],@X[$i],24
-+ andi $t1,0xFF00
-+ sll $t2,$t2,8
-+ or @X[$i],$t0
-+ or @X[$i],$t1
-+ or @X[$i],$t2
-+___
-+$code.=<<___;
-+ xor @X[$j%16],@X[($j+2)%16]
-+ sll $t0,$a,5 # $i
-+ addu $e,$K
-+ srl $t1,$a,27
-+ addu $e,$t0
-+ xor @X[$j%16],@X[($j+8)%16]
-+ xor $t0,$c,$d
-+ addu $e,$t1
-+ xor @X[$j%16],@X[($j+13)%16]
-+ sll $t2,$b,30
-+ and $t0,$b
-+ srl $t1,@X[$j%16],31
-+ addu @X[$j%16],@X[$j%16]
-+ srl $b,$b,2
-+ xor $t0,$d
-+ or @X[$j%16],$t1
-+ addu $e,@X[$i%16]
-+ or $b,$t2
-+ addu $e,$t0
-+___
-+}
-+
-+sub BODY_20_39 {
-+my ($i,$a,$b,$c,$d,$e)=@_;
-+my $j=$i+1;
-+$code.=<<___ if ($i<79);
-+ xor @X[$j%16],@X[($j+2)%16]
-+ sll $t0,$a,5 # $i
-+ addu $e,$K
-+ srl $t1,$a,27
-+ addu $e,$t0
-+ xor @X[$j%16],@X[($j+8)%16]
-+ xor $t0,$c,$d
-+ addu $e,$t1
-+ xor @X[$j%16],@X[($j+13)%16]
-+ sll $t2,$b,30
-+ xor $t0,$b
-+ srl $t1,@X[$j%16],31
-+ addu @X[$j%16],@X[$j%16]
-+ srl $b,$b,2
-+ addu $e,@X[$i%16]
-+ or @X[$j%16],$t1
-+ or $b,$t2
-+ addu $e,$t0
-+___
-+$code.=<<___ if ($i==79);
-+ lw @X[0],0($ctx)
-+ sll $t0,$a,5 # $i
-+ addu $e,$K
-+ lw @X[1],4($ctx)
-+ srl $t1,$a,27
-+ addu $e,$t0
-+ lw @X[2],8($ctx)
-+ xor $t0,$c,$d
-+ addu $e,$t1
-+ lw @X[3],12($ctx)
-+ sll $t2,$b,30
-+ xor $t0,$b
-+ lw @X[4],16($ctx)
-+ srl $b,$b,2
-+ addu $e,@X[$i%16]
-+ or $b,$t2
-+ addu $e,$t0
-+___
-+}
-+
-+sub BODY_40_59 {
-+my ($i,$a,$b,$c,$d,$e)=@_;
-+my $j=$i+1;
-+$code.=<<___ if ($i<79);
-+ xor @X[$j%16],@X[($j+2)%16]
-+ sll $t0,$a,5 # $i
-+ addu $e,$K
-+ srl $t1,$a,27
-+ addu $e,$t0
-+ xor @X[$j%16],@X[($j+8)%16]
-+ and $t0,$c,$d
-+ addu $e,$t1
-+ xor @X[$j%16],@X[($j+13)%16]
-+ sll $t2,$b,30
-+ addu $e,$t0
-+ srl $t1,@X[$j%16],31
-+ xor $t0,$c,$d
-+ addu @X[$j%16],@X[$j%16]
-+ and $t0,$b
-+ srl $b,$b,2
-+ or @X[$j%16],$t1
-+ addu $e,@X[$i%16]
-+ or $b,$t2
-+ addu $e,$t0
-+___
-+}
-+
-+$FRAMESIZE=16; # large enough to accomodate NUBI saved registers
-+$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000;
-+
-+$code=<<___;
-+#ifdef OPENSSL_FIPSCANISTER
-+# include <openssl/fipssyms.h>
-+#endif
-+
-+.text
-+
-+.set noat
-+.set noreorder
-+.align 5
-+.globl sha1_block_data_order
-+.ent sha1_block_data_order
-+sha1_block_data_order:
-+ .frame $sp,$FRAMESIZE*$SZREG,$ra
-+ .mask $SAVED_REGS_MASK,-$SZREG
-+ .set noreorder
-+ $PTR_SUB $sp,$FRAMESIZE*$SZREG
-+ $REG_S $ra,($FRAMESIZE-1)*$SZREG($sp)
-+ $REG_S $fp,($FRAMESIZE-2)*$SZREG($sp)
-+ $REG_S $s11,($FRAMESIZE-3)*$SZREG($sp)
-+ $REG_S $s10,($FRAMESIZE-4)*$SZREG($sp)
-+ $REG_S $s9,($FRAMESIZE-5)*$SZREG($sp)
-+ $REG_S $s8,($FRAMESIZE-6)*$SZREG($sp)
-+ $REG_S $s7,($FRAMESIZE-7)*$SZREG($sp)
-+ $REG_S $s6,($FRAMESIZE-8)*$SZREG($sp)
-+ $REG_S $s5,($FRAMESIZE-9)*$SZREG($sp)
-+ $REG_S $s4,($FRAMESIZE-10)*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
-+ $REG_S $s3,($FRAMESIZE-11)*$SZREG($sp)
-+ $REG_S $s2,($FRAMESIZE-12)*$SZREG($sp)
-+ $REG_S $s1,($FRAMESIZE-13)*$SZREG($sp)
-+ $REG_S $s0,($FRAMESIZE-14)*$SZREG($sp)
-+ $REG_S $gp,($FRAMESIZE-15)*$SZREG($sp)
-+___
-+$code.=<<___;
-+ $PTR_SLL $num,6
-+ $PTR_ADD $num,$inp
-+ $REG_S $num,0($sp)
-+ lw $A,0($ctx)
-+ lw $B,4($ctx)
-+ lw $C,8($ctx)
-+ lw $D,12($ctx)
-+ b .Loop
-+ lw $E,16($ctx)
-+.align 4
-+.Loop:
-+ .set reorder
-+ lwl @X[0],$MSB($inp)
-+ lui $K,0x5a82
-+ lwr @X[0],$LSB($inp)
-+ ori $K,0x7999 # K_00_19
-+___
-+for ($i=0;$i<15;$i++) { &BODY_00_14($i,@V); unshift(@V,pop(@V)); }
-+for (;$i<20;$i++) { &BODY_15_19($i,@V); unshift(@V,pop(@V)); }
-+$code.=<<___;
-+ lui $K,0x6ed9
-+ ori $K,0xeba1 # K_20_39
-+___
-+for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
-+$code.=<<___;
-+ lui $K,0x8f1b
-+ ori $K,0xbcdc # K_40_59
-+___
-+for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
-+$code.=<<___;
-+ lui $K,0xca62
-+ ori $K,0xc1d6 # K_60_79
-+___
-+for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
-+$code.=<<___;
-+ $PTR_ADD $inp,64
-+ $REG_L $num,0($sp)
-+
-+ addu $A,$X[0]
-+ addu $B,$X[1]
-+ sw $A,0($ctx)
-+ addu $C,$X[2]
-+ addu $D,$X[3]
-+ sw $B,4($ctx)
-+ addu $E,$X[4]
-+ sw $C,8($ctx)
-+ sw $D,12($ctx)
-+ sw $E,16($ctx)
-+ .set noreorder
-+ bne $inp,$num,.Loop
-+ nop
-+
-+ .set noreorder
-+ $REG_L $ra,($FRAMESIZE-1)*$SZREG($sp)
-+ $REG_L $fp,($FRAMESIZE-2)*$SZREG($sp)
-+ $REG_L $s11,($FRAMESIZE-3)*$SZREG($sp)
-+ $REG_L $s10,($FRAMESIZE-4)*$SZREG($sp)
-+ $REG_L $s9,($FRAMESIZE-5)*$SZREG($sp)
-+ $REG_L $s8,($FRAMESIZE-6)*$SZREG($sp)
-+ $REG_L $s7,($FRAMESIZE-7)*$SZREG($sp)
-+ $REG_L $s6,($FRAMESIZE-8)*$SZREG($sp)
-+ $REG_L $s5,($FRAMESIZE-9)*$SZREG($sp)
-+ $REG_L $s4,($FRAMESIZE-10)*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+ $REG_L $s3,($FRAMESIZE-11)*$SZREG($sp)
-+ $REG_L $s2,($FRAMESIZE-12)*$SZREG($sp)
-+ $REG_L $s1,($FRAMESIZE-13)*$SZREG($sp)
-+ $REG_L $s0,($FRAMESIZE-14)*$SZREG($sp)
-+ $REG_L $gp,($FRAMESIZE-15)*$SZREG($sp)
-+___
-+$code.=<<___;
-+ jr $ra
-+ $PTR_ADD $sp,$FRAMESIZE*$SZREG
-+.end sha1_block_data_order
-+.rdata
-+.asciiz "SHA1 for MIPS, CRYPTOGAMS by <appro\@openssl.org>"
-+___
-+print $code;
-+close STDOUT;
-diff --git a/crypto/sha/asm/sha512-mips.pl b/crypto/sha/asm/sha512-mips.pl
-new file mode 100644
-index 0000000..ba5b250
---- /dev/null
-+++ b/crypto/sha/asm/sha512-mips.pl
-@@ -0,0 +1,455 @@
-+#!/usr/bin/env perl
-+
-+# ====================================================================
-+# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-+# project. The module is, however, dual licensed under OpenSSL and
-+# CRYPTOGAMS licenses depending on where you obtain it. For further
-+# details see http://www.openssl.org/~appro/cryptogams/.
-+# ====================================================================
-+
-+# SHA2 block procedures for MIPS.
-+
-+# October 2010.
-+#
-+# SHA256 performance improvement on MIPS R5000 CPU is ~27% over gcc-
-+# generated code in o32 build and ~55% in n32/64 build. SHA512 [which
-+# for now can only be compiled for MIPS64 ISA] improvement is modest
-+# ~17%, but it comes for free, because it's same instruction sequence.
-+# Improvement coefficients are for aligned input.
-+
-+######################################################################
-+# There is a number of MIPS ABI in use, O32 and N32/64 are most
-+# widely used. Then there is a new contender: NUBI. It appears that if
-+# one picks the latter, it's possible to arrange code in ABI neutral
-+# manner. Therefore let's stick to NUBI register layout:
-+#
-+($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
-+($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
-+($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
-+($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
-+#
-+# The return value is placed in $a0. Following coding rules facilitate
-+# interoperability:
-+#
-+# - never ever touch $tp, "thread pointer", former $gp [o32 can be
-+# excluded from the rule, because it's specified volatile];
-+# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
-+# old code];
-+# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
-+#
-+# For reference here is register layout for N32/64 MIPS ABIs:
-+#
-+# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
-+# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
-+# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
-+# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
-+# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
-+#
-+$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64
-+
-+if ($flavour =~ /64|n32/i) {
-+ $PTR_ADD="dadd"; # incidentally works even on n32
-+ $PTR_SUB="dsub"; # incidentally works even on n32
-+ $REG_S="sd";
-+ $REG_L="ld";
-+ $PTR_SLL="dsll"; # incidentally works even on n32
-+ $SZREG=8;
-+} else {
-+ $PTR_ADD="add";
-+ $PTR_SUB="sub";
-+ $REG_S="sw";
-+ $REG_L="lw";
-+ $PTR_SLL="sll";
-+ $SZREG=4;
-+}
-+$pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
-+#
-+# <appro@openssl.org>
-+#
-+######################################################################
-+
-+$big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0;
-+
-+for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); }
-+open STDOUT,">$output";
-+
-+if (!defined($big_endian)) { $big_endian=(unpack('L',pack('N',1))==1); }
-+
-+if ($output =~ /512/) {
-+ $label="512";
-+ $SZ=8;
-+ $LD="ld"; # load from memory
-+ $ST="sd"; # store to memory
-+ $SLL="dsll"; # shift left logical
-+ $SRL="dsrl"; # shift right logical
-+ $ADDU="daddu";
-+ @Sigma0=(28,34,39);
-+ @Sigma1=(14,18,41);
-+ @sigma0=( 7, 1, 8); # right shift first
-+ @sigma1=( 6,19,61); # right shift first
-+ $lastK=0x817;
-+ $rounds=80;
-+} else {
-+ $label="256";
-+ $SZ=4;
-+ $LD="lw"; # load from memory
-+ $ST="sw"; # store to memory
-+ $SLL="sll"; # shift left logical
-+ $SRL="srl"; # shift right logical
-+ $ADDU="addu";
-+ @Sigma0=( 2,13,22);
-+ @Sigma1=( 6,11,25);
-+ @sigma0=( 3, 7,18); # right shift first
-+ @sigma1=(10,17,19); # right shift first
-+ $lastK=0x8f2;
-+ $rounds=64;
-+}
-+
-+$MSB = $big_endian ? 0 : ($SZ-1);
-+$LSB = ($SZ-1)&~$MSB;
-+
-+@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("\$$_",(1,2,3,7,24,25,30,31));
-+@X=map("\$$_",(8..23));
-+
-+$ctx=$a0;
-+$inp=$a1;
-+$len=$a2; $Ktbl=$len;
-+
-+sub BODY_00_15 {
-+my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
-+my ($T1,$tmp0,$tmp1,$tmp2)=(@X[4],@X[5],@X[6],@X[7]);
-+
-+$code.=<<___ if ($i<15);
-+ ${LD}l @X[1],`($i+1)*$SZ+$MSB`($inp)
-+ ${LD}r @X[1],`($i+1)*$SZ+$LSB`($inp)
-+___
-+$code.=<<___ if (!$big_endian && $i<16 && $SZ==4);
-+ srl $tmp0,@X[0],24 # byte swap($i)
-+ srl $tmp1,@X[0],8
-+ andi $tmp2,@X[0],0xFF00
-+ sll @X[0],@X[0],24
-+ andi $tmp1,0xFF00
-+ sll $tmp2,$tmp2,8
-+ or @X[0],$tmp0
-+ or $tmp1,$tmp2
-+ or @X[0],$tmp1
-+___
-+$code.=<<___ if (!$big_endian && $i<16 && $SZ==8);
-+ ori $tmp0,$zero,0xFF
-+ dsll $tmp2,$tmp0,32
-+ or $tmp0,$tmp2 # 0x000000FF000000FF
-+ and $tmp1,@X[0],$tmp0 # byte swap($i)
-+ dsrl $tmp2,@X[0],24
-+ dsll $tmp1,24
-+ and $tmp2,$tmp0
-+ dsll $tmp0,8 # 0x0000FF000000FF00
-+ or $tmp1,$tmp2
-+ and $tmp2,@X[0],$tmp0
-+ dsrl @X[0],8
-+ dsll $tmp2,8
-+ and @X[0],$tmp0
-+ or $tmp1,$tmp2
-+ or @X[0],$tmp1
-+ dsrl $tmp1,@X[0],32
-+ dsll @X[0],32
-+ or @X[0],$tmp1
-+___
-+$code.=<<___;
-+ $ADDU $T1,$X[0],$h # $i
-+ $SRL $h,$e,@Sigma1[0]
-+ xor $tmp2,$f,$g
-+ $SLL $tmp1,$e,`$SZ*8-@Sigma1[2]`
-+ and $tmp2,$e
-+ $SRL $tmp0,$e,@Sigma1[1]
-+ xor $h,$tmp1
-+ $SLL $tmp1,$e,`$SZ*8-@Sigma1[1]`
-+ xor $h,$tmp0
-+ $SRL $tmp0,$e,@Sigma1[2]
-+ xor $h,$tmp1
-+ $SLL $tmp1,$e,`$SZ*8-@Sigma1[0]`
-+ xor $h,$tmp0
-+ xor $tmp2,$g # Ch(e,f,g)
-+ xor $tmp0,$tmp1,$h # Sigma1(e)
-+
-+ $SRL $h,$a,@Sigma0[0]
-+ $ADDU $T1,$tmp2
-+ $LD $tmp2,`$i*$SZ`($Ktbl) # K[$i]
-+ $SLL $tmp1,$a,`$SZ*8-@Sigma0[2]`
-+ $ADDU $T1,$tmp0
-+ $SRL $tmp0,$a,@Sigma0[1]
-+ xor $h,$tmp1
-+ $SLL $tmp1,$a,`$SZ*8-@Sigma0[1]`
-+ xor $h,$tmp0
-+ $SRL $tmp0,$a,@Sigma0[2]
-+ xor $h,$tmp1
-+ $SLL $tmp1,$a,`$SZ*8-@Sigma0[0]`
-+ xor $h,$tmp0
-+ $ST @X[0],`($i%16)*$SZ`($sp) # offload to ring buffer
-+ xor $h,$tmp1 # Sigma0(a)
-+
-+ or $tmp0,$a,$b
-+ and $tmp1,$a,$b
-+ and $tmp0,$c
-+ or $tmp1,$tmp0 # Maj(a,b,c)
-+ $ADDU $T1,$tmp2 # +=K[$i]
-+ $ADDU $h,$tmp1
-+
-+ $ADDU $d,$T1
-+ $ADDU $h,$T1
-+___
-+$code.=<<___ if ($i>=13);
-+ $LD @X[3],`(($i+3)%16)*$SZ`($sp) # prefetch from ring buffer
-+___
-+}
-+
-+sub BODY_16_XX {
-+my $i=@_[0];
-+my ($tmp0,$tmp1,$tmp2,$tmp3)=(@X[4],@X[5],@X[6],@X[7]);
-+
-+$code.=<<___;
-+ $SRL $tmp2,@X[1],@sigma0[0] # Xupdate($i)
-+ $ADDU @X[0],@X[9] # +=X[i+9]
-+ $SLL $tmp1,@X[1],`$SZ*8-@sigma0[2]`
-+ $SRL $tmp0,@X[1],@sigma0[1]
-+ xor $tmp2,$tmp1
-+ $SLL $tmp1,`@sigma0[2]-@sigma0[1]`
-+ xor $tmp2,$tmp0
-+ $SRL $tmp0,@X[1],@sigma0[2]
-+ xor $tmp2,$tmp1
-+
-+ $SRL $tmp3,@X[14],@sigma1[0]
-+ xor $tmp2,$tmp0 # sigma0(X[i+1])
-+ $SLL $tmp1,@X[14],`$SZ*8-@sigma1[2]`
-+ $ADDU @X[0],$tmp2
-+ $SRL $tmp0,@X[14],@sigma1[1]
-+ xor $tmp3,$tmp1
-+ $SLL $tmp1,`@sigma1[2]-@sigma1[1]`
-+ xor $tmp3,$tmp0
-+ $SRL $tmp0,@X[14],@sigma1[2]
-+ xor $tmp3,$tmp1
-+
-+ xor $tmp3,$tmp0 # sigma1(X[i+14])
-+ $ADDU @X[0],$tmp3
-+___
-+ &BODY_00_15(@_);
-+}
-+
-+$FRAMESIZE=16*$SZ+16*$SZREG;
-+$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000;
-+
-+$code.=<<___;
-+#ifdef OPENSSL_FIPSCANISTER
-+# include <openssl/fipssyms.h>
-+#endif
-+
-+.text
-+.set noat
-+#if !defined(__vxworks) || defined(__pic__)
-+.option pic2
-+#endif
-+
-+.align 5
-+.globl sha${label}_block_data_order
-+.ent sha${label}_block_data_order
-+sha${label}_block_data_order:
-+ .frame $sp,$FRAMESIZE,$ra
-+ .mask $SAVED_REGS_MASK,-$SZREG
-+ .set noreorder
-+___
-+$code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification
-+ .cpload $pf
-+___
-+$code.=<<___;
-+ $PTR_SUB $sp,$FRAMESIZE
-+ $REG_S $ra,$FRAMESIZE-1*$SZREG($sp)
-+ $REG_S $fp,$FRAMESIZE-2*$SZREG($sp)
-+ $REG_S $s11,$FRAMESIZE-3*$SZREG($sp)
-+ $REG_S $s10,$FRAMESIZE-4*$SZREG($sp)
-+ $REG_S $s9,$FRAMESIZE-5*$SZREG($sp)
-+ $REG_S $s8,$FRAMESIZE-6*$SZREG($sp)
-+ $REG_S $s7,$FRAMESIZE-7*$SZREG($sp)
-+ $REG_S $s6,$FRAMESIZE-8*$SZREG($sp)
-+ $REG_S $s5,$FRAMESIZE-9*$SZREG($sp)
-+ $REG_S $s4,$FRAMESIZE-10*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
-+ $REG_S $s3,$FRAMESIZE-11*$SZREG($sp)
-+ $REG_S $s2,$FRAMESIZE-12*$SZREG($sp)
-+ $REG_S $s1,$FRAMESIZE-13*$SZREG($sp)
-+ $REG_S $s0,$FRAMESIZE-14*$SZREG($sp)
-+ $REG_S $gp,$FRAMESIZE-15*$SZREG($sp)
-+___
-+$code.=<<___;
-+ $PTR_SLL @X[15],$len,`log(16*$SZ)/log(2)`
-+___
-+$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
-+ .cplocal $Ktbl
-+ .cpsetup $pf,$zero,sha${label}_block_data_order
-+___
-+$code.=<<___;
-+ .set reorder
-+ la $Ktbl,K${label} # PIC-ified 'load address'
-+
-+ $LD $A,0*$SZ($ctx) # load context
-+ $LD $B,1*$SZ($ctx)
-+ $LD $C,2*$SZ($ctx)
-+ $LD $D,3*$SZ($ctx)
-+ $LD $E,4*$SZ($ctx)
-+ $LD $F,5*$SZ($ctx)
-+ $LD $G,6*$SZ($ctx)
-+ $LD $H,7*$SZ($ctx)
-+
-+ $PTR_ADD @X[15],$inp # pointer to the end of input
-+ $REG_S @X[15],16*$SZ($sp)
-+ b .Loop
-+
-+.align 5
-+.Loop:
-+ ${LD}l @X[0],$MSB($inp)
-+ ${LD}r @X[0],$LSB($inp)
-+___
-+for ($i=0;$i<16;$i++)
-+{ &BODY_00_15($i,@V); unshift(@V,pop(@V)); push(@X,shift(@X)); }
-+$code.=<<___;
-+ b .L16_xx
-+.align 4
-+.L16_xx:
-+___
-+for (;$i<32;$i++)
-+{ &BODY_16_XX($i,@V); unshift(@V,pop(@V)); push(@X,shift(@X)); }
-+$code.=<<___;
-+ and @X[6],0xfff
-+ li @X[7],$lastK
-+ .set noreorder
-+ bne @X[6],@X[7],.L16_xx
-+ $PTR_ADD $Ktbl,16*$SZ # Ktbl+=16
-+
-+ $REG_L @X[15],16*$SZ($sp) # restore pointer to the end of input
-+ $LD @X[0],0*$SZ($ctx)
-+ $LD @X[1],1*$SZ($ctx)
-+ $LD @X[2],2*$SZ($ctx)
-+ $PTR_ADD $inp,16*$SZ
-+ $LD @X[3],3*$SZ($ctx)
-+ $ADDU $A,@X[0]
-+ $LD @X[4],4*$SZ($ctx)
-+ $ADDU $B,@X[1]
-+ $LD @X[5],5*$SZ($ctx)
-+ $ADDU $C,@X[2]
-+ $LD @X[6],6*$SZ($ctx)
-+ $ADDU $D,@X[3]
-+ $LD @X[7],7*$SZ($ctx)
-+ $ADDU $E,@X[4]
-+ $ST $A,0*$SZ($ctx)
-+ $ADDU $F,@X[5]
-+ $ST $B,1*$SZ($ctx)
-+ $ADDU $G,@X[6]
-+ $ST $C,2*$SZ($ctx)
-+ $ADDU $H,@X[7]
-+ $ST $D,3*$SZ($ctx)
-+ $ST $E,4*$SZ($ctx)
-+ $ST $F,5*$SZ($ctx)
-+ $ST $G,6*$SZ($ctx)
-+ $ST $H,7*$SZ($ctx)
-+
-+ bnel $inp,@X[15],.Loop
-+ $PTR_SUB $Ktbl,`($rounds-16)*$SZ` # rewind $Ktbl
-+
-+ $REG_L $ra,$FRAMESIZE-1*$SZREG($sp)
-+ $REG_L $fp,$FRAMESIZE-2*$SZREG($sp)
-+ $REG_L $s11,$FRAMESIZE-3*$SZREG($sp)
-+ $REG_L $s10,$FRAMESIZE-4*$SZREG($sp)
-+ $REG_L $s9,$FRAMESIZE-5*$SZREG($sp)
-+ $REG_L $s8,$FRAMESIZE-6*$SZREG($sp)
-+ $REG_L $s7,$FRAMESIZE-7*$SZREG($sp)
-+ $REG_L $s6,$FRAMESIZE-8*$SZREG($sp)
-+ $REG_L $s5,$FRAMESIZE-9*$SZREG($sp)
-+ $REG_L $s4,$FRAMESIZE-10*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+ $REG_L $s3,$FRAMESIZE-11*$SZREG($sp)
-+ $REG_L $s2,$FRAMESIZE-12*$SZREG($sp)
-+ $REG_L $s1,$FRAMESIZE-13*$SZREG($sp)
-+ $REG_L $s0,$FRAMESIZE-14*$SZREG($sp)
-+ $REG_L $gp,$FRAMESIZE-15*$SZREG($sp)
-+___
-+$code.=<<___;
-+ jr $ra
-+ $PTR_ADD $sp,$FRAMESIZE
-+.end sha${label}_block_data_order
-+
-+.rdata
-+.align 5
-+K${label}:
-+___
-+if ($SZ==4) {
-+$code.=<<___;
-+ .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
-+ .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
-+ .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
-+ .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
-+ .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
-+ .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
-+ .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
-+ .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
-+ .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
-+ .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
-+ .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
-+ .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
-+ .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
-+ .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
-+ .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
-+ .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-+___
-+} else {
-+$code.=<<___;
-+ .dword 0x428a2f98d728ae22, 0x7137449123ef65cd
-+ .dword 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc
-+ .dword 0x3956c25bf348b538, 0x59f111f1b605d019
-+ .dword 0x923f82a4af194f9b, 0xab1c5ed5da6d8118
-+ .dword 0xd807aa98a3030242, 0x12835b0145706fbe
-+ .dword 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2
-+ .dword 0x72be5d74f27b896f, 0x80deb1fe3b1696b1
-+ .dword 0x9bdc06a725c71235, 0xc19bf174cf692694
-+ .dword 0xe49b69c19ef14ad2, 0xefbe4786384f25e3
-+ .dword 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65
-+ .dword 0x2de92c6f592b0275, 0x4a7484aa6ea6e483
-+ .dword 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5
-+ .dword 0x983e5152ee66dfab, 0xa831c66d2db43210
-+ .dword 0xb00327c898fb213f, 0xbf597fc7beef0ee4
-+ .dword 0xc6e00bf33da88fc2, 0xd5a79147930aa725
-+ .dword 0x06ca6351e003826f, 0x142929670a0e6e70
-+ .dword 0x27b70a8546d22ffc, 0x2e1b21385c26c926
-+ .dword 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df
-+ .dword 0x650a73548baf63de, 0x766a0abb3c77b2a8
-+ .dword 0x81c2c92e47edaee6, 0x92722c851482353b
-+ .dword 0xa2bfe8a14cf10364, 0xa81a664bbc423001
-+ .dword 0xc24b8b70d0f89791, 0xc76c51a30654be30
-+ .dword 0xd192e819d6ef5218, 0xd69906245565a910
-+ .dword 0xf40e35855771202a, 0x106aa07032bbd1b8
-+ .dword 0x19a4c116b8d2d0c8, 0x1e376c085141ab53
-+ .dword 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8
-+ .dword 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb
-+ .dword 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3
-+ .dword 0x748f82ee5defb2fc, 0x78a5636f43172f60
-+ .dword 0x84c87814a1f0ab72, 0x8cc702081a6439ec
-+ .dword 0x90befffa23631e28, 0xa4506cebde82bde9
-+ .dword 0xbef9a3f7b2c67915, 0xc67178f2e372532b
-+ .dword 0xca273eceea26619c, 0xd186b8c721c0c207
-+ .dword 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178
-+ .dword 0x06f067aa72176fba, 0x0a637dc5a2c898a6
-+ .dword 0x113f9804bef90dae, 0x1b710b35131c471b
-+ .dword 0x28db77f523047d84, 0x32caab7b40c72493
-+ .dword 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c
-+ .dword 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a
-+ .dword 0x5fcb6fab3ad6faec, 0x6c44198c4a475817
-+___
-+}
-+$code.=<<___;
-+.asciiz "SHA${label} for MIPS, CRYPTOGAMS by <appro\@openssl.org>"
-+.align 5
-+
-+___
-+
-+$code =~ s/\`([^\`]*)\`/eval $1/gem;
-+print $code;
-+close STDOUT;
diff --git a/patches/npn.patch b/patches/npn.patch
deleted file mode 100644
index 46b7a7d..0000000
--- a/patches/npn.patch
+++ /dev/null
@@ -1,1293 +0,0 @@
---- openssl-1.0.0b.orig/apps/apps.c 2010-11-11 14:42:19.000000000 +0000
-+++ openssl-1.0.0b/apps/apps.c 2010-11-29 19:56:04.902465346 +0000
-@@ -3012,3 +3012,46 @@ int raw_write_stdout(const void *buf,int
- int raw_write_stdout(const void *buf,int siz)
- { return write(fileno(stdout),buf,siz); }
- #endif
-+
-+#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG)
-+/* next_protos_parse parses a comma separated list of strings into a string
-+ * in a format suitable for passing to SSL_CTX_set_next_protos_advertised.
-+ * outlen: (output) set to the length of the resulting buffer on success.
-+ * in: a NUL termianted string like "abc,def,ghi"
-+ *
-+ * returns: a malloced buffer or NULL on failure.
-+ */
-+unsigned char *next_protos_parse(unsigned short *outlen, const char *in)
-+ {
-+ size_t len;
-+ unsigned char *out;
-+ size_t i, start = 0;
-+
-+ len = strlen(in);
-+ if (len >= 65535)
-+ return NULL;
-+
-+ out = OPENSSL_malloc(strlen(in) + 1);
-+ if (!out)
-+ return NULL;
-+
-+ for (i = 0; i <= len; ++i)
-+ {
-+ if (i == len || in[i] == ',')
-+ {
-+ if (i - start > 255)
-+ {
-+ OPENSSL_free(out);
-+ return NULL;
-+ }
-+ out[start] = i - start;
-+ start = i + 1;
-+ }
-+ else
-+ out[i+1] = in[i];
-+ }
-+
-+ *outlen = len + 1;
-+ return out;
-+ }
-+#endif /* !OPENSSL_NO_TLSEXT && !OPENSSL_NO_NEXTPROTONEG */
---- openssl-1.0.0b.orig/apps/apps.h 2009-10-31 13:34:19.000000000 +0000
-+++ openssl-1.0.0b/apps/apps.h 2010-11-29 19:56:04.902465346 +0000
-@@ -358,3 +358,7 @@ int raw_write_stdout(const void *,int);
- #define TM_STOP 1
- double app_tminterval (int stop,int usertime);
- #endif
-+
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+unsigned char *next_protos_parse(unsigned short *outlen, const char *in);
-+#endif
---- openssl-1.0.0b.orig/apps/s_client.c 2010-11-29 19:56:04.832465351 +0000
-+++ openssl-1.0.0b/apps/s_client.c 2010-11-29 19:56:04.902465346 +0000
-@@ -342,6 +342,9 @@ static void sc_usage(void)
- BIO_printf(bio_err," -tlsextdebug - hex dump of all TLS extensions received\n");
- BIO_printf(bio_err," -status - request certificate status from server\n");
- BIO_printf(bio_err," -no_ticket - disable use of RFC4507bis session tickets\n");
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+ BIO_printf(bio_err," -nextprotoneg arg - enable NPN extension, considering named protocols supported (comma-separated list)\n");
-+# endif
- BIO_printf(bio_err," -cutthrough - enable 1-RTT full-handshake for strong ciphers\n");
- #endif
- BIO_printf(bio_err," -legacy_renegotiation - enable use of legacy renegotiation (dangerous)\n");
-@@ -367,6 +370,40 @@ static int MS_CALLBACK ssl_servername_cb
-
- return SSL_TLSEXT_ERR_OK;
- }
-+
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+/* This the context that we pass to next_proto_cb */
-+typedef struct tlsextnextprotoctx_st {
-+ unsigned char *data;
-+ unsigned short len;
-+ int status;
-+} tlsextnextprotoctx;
-+
-+static tlsextnextprotoctx next_proto;
-+
-+static int next_proto_cb(SSL *s, unsigned char **out, unsigned char *outlen, const unsigned char *in, unsigned int inlen, void *arg)
-+ {
-+ tlsextnextprotoctx *ctx = arg;
-+
-+ if (!c_quiet)
-+ {
-+ /* We can assume that |in| is syntactically valid. */
-+ unsigned i;
-+ BIO_printf(bio_c_out, "Protocols advertised by server: ");
-+ for (i = 0; i < inlen; )
-+ {
-+ if (i)
-+ BIO_write(bio_c_out, ", ", 2);
-+ BIO_write(bio_c_out, &in[i + 1], in[i]);
-+ i += in[i] + 1;
-+ }
-+ BIO_write(bio_c_out, "\n", 1);
-+ }
-+
-+ ctx->status = SSL_select_next_proto(out, outlen, in, inlen, ctx->data, ctx->len);
-+ return SSL_TLSEXT_ERR_OK;
-+ }
-+# endif /* ndef OPENSSL_NO_NEXTPROTONEG */
- #endif
-
- enum
-@@ -431,6 +468,9 @@ int MAIN(int argc, char **argv)
- char *servername = NULL;
- tlsextctx tlsextcbp =
- {NULL,0};
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+ const char *next_proto_neg_in = NULL;
-+# endif
- #endif
- char *sess_in = NULL;
- char *sess_out = NULL;
-@@ -658,6 +698,13 @@ int MAIN(int argc, char **argv)
- #ifndef OPENSSL_NO_TLSEXT
- else if (strcmp(*argv,"-no_ticket") == 0)
- { off|=SSL_OP_NO_TICKET; }
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+ else if (strcmp(*argv,"-nextprotoneg") == 0)
-+ {
-+ if (--argc < 1) goto bad;
-+ next_proto_neg_in = *(++argv);
-+ }
-+# endif
- #endif
- else if (strcmp(*argv,"-cutthrough") == 0)
- cutthrough=1;
-@@ -766,6 +813,21 @@ bad:
- OpenSSL_add_ssl_algorithms();
- SSL_load_error_strings();
-
-+#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG)
-+ next_proto.status = -1;
-+ if (next_proto_neg_in)
-+ {
-+ next_proto.data = next_protos_parse(&next_proto.len, next_proto_neg_in);
-+ if (next_proto.data == NULL)
-+ {
-+ BIO_printf(bio_err, "Error parsing -nextprotoneg argument\n");
-+ goto end;
-+ }
-+ }
-+ else
-+ next_proto.data = NULL;
-+#endif
-+
- #ifndef OPENSSL_NO_ENGINE
- e = setup_engine(bio_err, engine_id, 1);
- if (ssl_client_engine_id)
-@@ -896,6 +958,11 @@ bad:
- SSL_CTX_set_mode(ctx, ssl_mode);
- }
-
-+#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG)
-+ if (next_proto.data)
-+ SSL_CTX_set_next_proto_select_cb(ctx, next_proto_cb, &next_proto);
-+#endif
-+
- if (state) SSL_CTX_set_info_callback(ctx,apps_ssl_info_callback);
- if (cipher != NULL)
- if(!SSL_CTX_set_cipher_list(ctx,cipher)) {
-@@ -1755,6 +1822,18 @@ static void print_stuff(BIO *bio, SSL *s
- BIO_printf(bio,"Expansion: %s\n",
- expansion ? SSL_COMP_get_name(expansion) : "NONE");
- #endif
-+
-+#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG)
-+ if (next_proto.status != -1) {
-+ const unsigned char *proto;
-+ unsigned int proto_len;
-+ SSL_get0_next_proto_negotiated(s, &proto, &proto_len);
-+ BIO_printf(bio, "Next protocol: (%d) ", next_proto.status);
-+ BIO_write(bio, proto, proto_len);
-+ BIO_write(bio, "\n", 1);
-+ }
-+#endif
-+
- SSL_SESSION_print(bio,SSL_get_session(s));
- BIO_printf(bio,"---\n");
- if (peer != NULL)
---- openssl-1.0.0b.orig/apps/s_server.c 2010-06-15 17:25:02.000000000 +0000
-+++ openssl-1.0.0b/apps/s_server.c 2010-11-29 19:56:04.902465346 +0000
-@@ -492,6 +492,9 @@ static void sv_usage(void)
- BIO_printf(bio_err," -tlsextdebug - hex dump of all TLS extensions received\n");
- BIO_printf(bio_err," -no_ticket - disable use of RFC4507bis session tickets\n");
- BIO_printf(bio_err," -legacy_renegotiation - enable use of legacy renegotiation (dangerous)\n");
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+ BIO_printf(bio_err," -nextprotoneg arg - set the advertised protocols for the NPN extension (comma-separated list)\n");
-+# endif
- #endif
- }
-
-@@ -826,6 +829,24 @@ BIO_printf(err, "cert_status: received %
- ret = SSL_TLSEXT_ERR_ALERT_FATAL;
- goto done;
- }
-+
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+/* This is the context that we pass to next_proto_cb */
-+typedef struct tlsextnextprotoctx_st {
-+ unsigned char *data;
-+ unsigned int len;
-+} tlsextnextprotoctx;
-+
-+static int next_proto_cb(SSL *s, const unsigned char **data, unsigned int *len, void *arg)
-+ {
-+ tlsextnextprotoctx *next_proto = arg;
-+
-+ *data = next_proto->data;
-+ *len = next_proto->len;
-+
-+ return SSL_TLSEXT_ERR_OK;
-+ }
-+# endif /* ndef OPENSSL_NO_NPN */
- #endif
-
- int MAIN(int, char **);
-@@ -867,6 +888,10 @@ int MAIN(int argc, char *argv[])
- #endif
- #ifndef OPENSSL_NO_TLSEXT
- tlsextctx tlsextcbp = {NULL, NULL, SSL_TLSEXT_ERR_ALERT_WARNING};
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+ const char *next_proto_neg_in = NULL;
-+ tlsextnextprotoctx next_proto;
-+# endif
- #endif
- #ifndef OPENSSL_NO_PSK
- /* by default do not send a PSK identity hint */
-@@ -1191,7 +1216,13 @@ int MAIN(int argc, char *argv[])
- if (--argc < 1) goto bad;
- s_key_file2= *(++argv);
- }
--
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+ else if (strcmp(*argv,"-nextprotoneg") == 0)
-+ {
-+ if (--argc < 1) goto bad;
-+ next_proto_neg_in = *(++argv);
-+ }
-+# endif
- #endif
- #if !defined(OPENSSL_NO_JPAKE) && !defined(OPENSSL_NO_PSK)
- else if (strcmp(*argv,"-jpake") == 0)
-@@ -1476,6 +1507,11 @@ bad:
- if (vpm)
- SSL_CTX_set1_param(ctx2, vpm);
- }
-+
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+ if (next_proto.data)
-+ SSL_CTX_set_next_protos_advertised_cb(ctx, next_proto_cb, &next_proto);
-+# endif
- #endif
-
- #ifndef OPENSSL_NO_DH
-@@ -1617,6 +1653,21 @@ bad:
- goto end;
- }
- }
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+ if (next_proto_neg_in)
-+ {
-+ unsigned short len;
-+ next_proto.data = next_protos_parse(&len,
-+ next_proto_neg_in);
-+ if (next_proto.data == NULL)
-+ goto end;
-+ next_proto.len = len;
-+ }
-+ else
-+ {
-+ next_proto.data = NULL;
-+ }
-+# endif
- #endif
- RSA_free(rsa);
- BIO_printf(bio_s_out,"\n");
-@@ -2159,6 +2210,10 @@ static int init_ssl_connection(SSL *con)
- X509 *peer;
- long verify_error;
- MS_STATIC char buf[BUFSIZ];
-+#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG)
-+ const unsigned char *next_proto_neg;
-+ unsigned next_proto_neg_len;
-+#endif
-
- if ((i=SSL_accept(con)) <= 0)
- {
-@@ -2198,6 +2253,15 @@ static int init_ssl_connection(SSL *con)
- BIO_printf(bio_s_out,"Shared ciphers:%s\n",buf);
- str=SSL_CIPHER_get_name(SSL_get_current_cipher(con));
- BIO_printf(bio_s_out,"CIPHER is %s\n",(str != NULL)?str:"(NONE)");
-+#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG)
-+ SSL_get0_next_proto_negotiated(con, &next_proto_neg, &next_proto_neg_len);
-+ if (next_proto_neg)
-+ {
-+ BIO_printf(bio_s_out,"NEXTPROTO is ");
-+ BIO_write(bio_s_out, next_proto_neg, next_proto_neg_len);
-+ BIO_printf(bio_s_out, "\n");
-+ }
-+#endif
- if (con->hit) BIO_printf(bio_s_out,"Reused session-id\n");
- if (SSL_ctrl(con,SSL_CTRL_GET_FLAGS,0,NULL) &
- TLS1_FLAGS_TLS_PADDING_BUG)
---- openssl-1.0.0b.orig/include/openssl/ssl.h 2010-11-29 19:56:04.846517045 +0000
-+++ openssl-1.0.0b/include/openssl/ssl.h 2010-11-29 19:56:04.965928855 +0000
-@@ -857,6 +857,25 @@ struct ssl_ctx_st
- /* draft-rescorla-tls-opaque-prf-input-00.txt information */
- int (*tlsext_opaque_prf_input_callback)(SSL *, void *peerinput, size_t len, void *arg);
- void *tlsext_opaque_prf_input_callback_arg;
-+
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+ /* Next protocol negotiation information */
-+ /* (for experimental NPN extension). */
-+
-+ /* For a server, this contains a callback function by which the set of
-+ * advertised protocols can be provided. */
-+ int (*next_protos_advertised_cb)(SSL *s, const unsigned char **buf,
-+ unsigned int *len, void *arg);
-+ void *next_protos_advertised_cb_arg;
-+ /* For a client, this contains a callback function that selects the
-+ * next protocol from the list provided by the server. */
-+ int (*next_proto_select_cb)(SSL *s, unsigned char **out,
-+ unsigned char *outlen,
-+ const unsigned char *in,
-+ unsigned int inlen,
-+ void *arg);
-+ void *next_proto_select_cb_arg;
-+# endif
- #endif
-
- #ifndef OPENSSL_NO_PSK
-@@ -928,6 +947,30 @@ int SSL_CTX_set_client_cert_engine(SSL_C
- #endif
- void SSL_CTX_set_cookie_generate_cb(SSL_CTX *ctx, int (*app_gen_cookie_cb)(SSL *ssl, unsigned char *cookie, unsigned int *cookie_len));
- void SSL_CTX_set_cookie_verify_cb(SSL_CTX *ctx, int (*app_verify_cookie_cb)(SSL *ssl, unsigned char *cookie, unsigned int cookie_len));
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+void SSL_CTX_set_next_protos_advertised_cb(SSL_CTX *s,
-+ int (*cb) (SSL *ssl,
-+ const unsigned char **out,
-+ unsigned int *outlen,
-+ void *arg), void *arg);
-+void SSL_CTX_set_next_proto_select_cb(SSL_CTX *s,
-+ int (*cb) (SSL *ssl, unsigned char **out,
-+ unsigned char *outlen,
-+ const unsigned char *in,
-+ unsigned int inlen, void *arg),
-+ void *arg);
-+
-+int SSL_select_next_proto(unsigned char **out, unsigned char *outlen,
-+ const unsigned char *in, unsigned int inlen,
-+ const unsigned char *client, unsigned int client_len);
-+void SSL_get0_next_proto_negotiated(const SSL *s, const unsigned char **data,
-+ unsigned *len);
-+
-+#define OPENSSL_NPN_UNSUPPORTED 0
-+#define OPENSSL_NPN_NEGOTIATED 1
-+#define OPENSSL_NPN_NO_OVERLAP 2
-+
-+#endif
-
- #ifndef OPENSSL_NO_PSK
- /* the maximum length of the buffer given to callbacks containing the
-@@ -1187,6 +1230,19 @@ struct ssl_st
- void *tls_session_secret_cb_arg;
-
- SSL_CTX * initial_ctx; /* initial ctx, used to store sessions */
-+
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+ /* Next protocol negotiation. For the client, this is the protocol that
-+ * we sent in NextProtocol and is set when handling ServerHello
-+ * extensions.
-+ *
-+ * For a server, this is the client's selected_protocol from
-+ * NextProtocol and is set when handling the NextProtocol message,
-+ * before the Finished message. */
-+ unsigned char *next_proto_negotiated;
-+ unsigned char next_proto_negotiated_len;
-+#endif
-+
- #define session_ctx initial_ctx
- #else
- #define session_ctx ctx
-@@ -1919,6 +1975,7 @@ void ERR_load_SSL_strings(void);
- #define SSL_F_SSL3_GET_KEY_EXCHANGE 141
- #define SSL_F_SSL3_GET_MESSAGE 142
- #define SSL_F_SSL3_GET_NEW_SESSION_TICKET 283
-+#define SSL_F_SSL3_GET_NEXT_PROTO 304
- #define SSL_F_SSL3_GET_RECORD 143
- #define SSL_F_SSL3_GET_SERVER_CERTIFICATE 144
- #define SSL_F_SSL3_GET_SERVER_DONE 145
-@@ -2117,6 +2174,8 @@ void ERR_load_SSL_strings(void);
- #define SSL_R_EXCESSIVE_MESSAGE_SIZE 152
- #define SSL_R_EXTRA_DATA_IN_MESSAGE 153
- #define SSL_R_GOT_A_FIN_BEFORE_A_CCS 154
-+#define SSL_R_GOT_NEXT_PROTO_BEFORE_A_CCS 346
-+#define SSL_R_GOT_NEXT_PROTO_WITHOUT_EXTENSION 347
- #define SSL_R_HTTPS_PROXY_REQUEST 155
- #define SSL_R_HTTP_REQUEST 156
- #define SSL_R_ILLEGAL_PADDING 283
---- openssl-1.0.0b.orig/include/openssl/ssl3.h 2010-11-29 19:56:04.832465351 +0000
-+++ openssl-1.0.0b/include/openssl/ssl3.h 2010-11-29 19:56:04.965928855 +0000
-@@ -465,6 +465,12 @@ typedef struct ssl3_state_st
- void *server_opaque_prf_input;
- size_t server_opaque_prf_input_len;
-
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+ /* Set if we saw the Next Protocol Negotiation extension from
-+ our peer. */
-+ int next_proto_neg_seen;
-+#endif
-+
- struct {
- /* actually only needs to be 16+20 */
- unsigned char cert_verify_md[EVP_MAX_MD_SIZE*2];
-@@ -557,6 +563,10 @@ typedef struct ssl3_state_st
- #define SSL3_ST_CW_CERT_VRFY_B (0x191|SSL_ST_CONNECT)
- #define SSL3_ST_CW_CHANGE_A (0x1A0|SSL_ST_CONNECT)
- #define SSL3_ST_CW_CHANGE_B (0x1A1|SSL_ST_CONNECT)
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+#define SSL3_ST_CW_NEXT_PROTO_A (0x200|SSL_ST_CONNECT)
-+#define SSL3_ST_CW_NEXT_PROTO_B (0x201|SSL_ST_CONNECT)
-+#endif
- #define SSL3_ST_CW_FINISHED_A (0x1B0|SSL_ST_CONNECT)
- #define SSL3_ST_CW_FINISHED_B (0x1B1|SSL_ST_CONNECT)
- /* read from server */
-@@ -602,6 +612,10 @@ typedef struct ssl3_state_st
- #define SSL3_ST_SR_CERT_VRFY_B (0x1A1|SSL_ST_ACCEPT)
- #define SSL3_ST_SR_CHANGE_A (0x1B0|SSL_ST_ACCEPT)
- #define SSL3_ST_SR_CHANGE_B (0x1B1|SSL_ST_ACCEPT)
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+#define SSL3_ST_SR_NEXT_PROTO_A (0x210|SSL_ST_ACCEPT)
-+#define SSL3_ST_SR_NEXT_PROTO_B (0x211|SSL_ST_ACCEPT)
-+#endif
- #define SSL3_ST_SR_FINISHED_A (0x1C0|SSL_ST_ACCEPT)
- #define SSL3_ST_SR_FINISHED_B (0x1C1|SSL_ST_ACCEPT)
- /* write to client */
-@@ -626,6 +640,9 @@ typedef struct ssl3_state_st
- #define SSL3_MT_CLIENT_KEY_EXCHANGE 16
- #define SSL3_MT_FINISHED 20
- #define SSL3_MT_CERTIFICATE_STATUS 22
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+#define SSL3_MT_NEXT_PROTO 67
-+#endif
- #define DTLS1_MT_HELLO_VERIFY_REQUEST 3
-
-
---- openssl-1.0.0b.orig/include/openssl/tls1.h 2009-11-11 14:51:29.000000000 +0000
-+++ openssl-1.0.0b/include/openssl/tls1.h 2010-11-29 19:56:04.965928855 +0000
-@@ -204,6 +204,11 @@ extern "C" {
- /* Temporary extension type */
- #define TLSEXT_TYPE_renegotiate 0xff01
-
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+/* This is not an IANA defined extension number */
-+#define TLSEXT_TYPE_next_proto_neg 13172
-+#endif
-+
- /* NameType value from RFC 3546 */
- #define TLSEXT_NAMETYPE_host_name 0
- /* status request value from RFC 3546 */
---- openssl-1.0.0b.orig/ssl/s3_both.c 2010-11-29 19:56:04.846517045 +0000
-+++ openssl-1.0.0b/ssl/s3_both.c 2010-11-29 19:56:04.965928855 +0000
-@@ -202,15 +202,40 @@ int ssl3_send_finished(SSL *s, int a, in
- return(ssl3_do_write(s,SSL3_RT_HANDSHAKE));
- }
-
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+/* ssl3_take_mac calculates the Finished MAC for the handshakes messages seen to far. */
-+static void ssl3_take_mac(SSL *s)
-+ {
-+ const char *sender;
-+ int slen;
-+
-+ if (s->state & SSL_ST_CONNECT)
-+ {
-+ sender=s->method->ssl3_enc->server_finished_label;
-+ slen=s->method->ssl3_enc->server_finished_label_len;
-+ }
-+ else
-+ {
-+ sender=s->method->ssl3_enc->client_finished_label;
-+ slen=s->method->ssl3_enc->client_finished_label_len;
-+ }
-+
-+ s->s3->tmp.peer_finish_md_len = s->method->ssl3_enc->final_finish_mac(s,
-+ sender,slen,s->s3->tmp.peer_finish_md);
-+ }
-+#endif
-+
- int ssl3_get_finished(SSL *s, int a, int b)
- {
- int al,i,ok;
- long n;
- unsigned char *p;
-
-+#ifdef OPENSSL_NO_NEXTPROTONEG
- /* the mac has already been generated when we received the
- * change cipher spec message and is in s->s3->tmp.peer_finish_md
- */
-+#endif
-
- n=s->method->ssl_get_message(s,
- a,
-@@ -521,6 +546,15 @@ long ssl3_get_message(SSL *s, int st1, i
- s->init_num += i;
- n -= i;
- }
-+
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+ /* If receiving Finished, record MAC of prior handshake messages for
-+ * Finished verification. */
-+ if (*s->init_buf->data == SSL3_MT_FINISHED)
-+ ssl3_take_mac(s);
-+#endif
-+
-+ /* Feed this message into MAC computation. */
- ssl3_finish_mac(s, (unsigned char *)s->init_buf->data, s->init_num + 4);
- if (s->msg_callback)
- s->msg_callback(0, s->version, SSL3_RT_HANDSHAKE, s->init_buf->data, (size_t)s->init_num + 4, s, s->msg_callback_arg);
---- openssl-1.0.0b.orig/ssl/s3_clnt.c 2010-11-29 19:56:04.846517045 +0000
-+++ openssl-1.0.0b/ssl/s3_clnt.c 2010-11-29 19:56:04.965928855 +0000
-@@ -435,7 +435,16 @@ int ssl3_connect(SSL *s)
- ret=ssl3_send_change_cipher_spec(s,
- SSL3_ST_CW_CHANGE_A,SSL3_ST_CW_CHANGE_B);
- if (ret <= 0) goto end;
-+
-+#if defined(OPENSSL_NO_TLSEXT) || defined(OPENSSL_NO_NEXTPROTONEG)
- s->state=SSL3_ST_CW_FINISHED_A;
-+#else
-+ if (s->next_proto_negotiated)
-+ s->state=SSL3_ST_CW_NEXT_PROTO_A;
-+ else
-+ s->state=SSL3_ST_CW_FINISHED_A;
-+#endif
-+
- s->init_num=0;
-
- s->session->cipher=s->s3->tmp.new_cipher;
-@@ -463,6 +472,15 @@ int ssl3_connect(SSL *s)
-
- break;
-
-+#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG)
-+ case SSL3_ST_CW_NEXT_PROTO_A:
-+ case SSL3_ST_CW_NEXT_PROTO_B:
-+ ret=ssl3_send_next_proto(s);
-+ if (ret <= 0) goto end;
-+ s->state=SSL3_ST_CW_FINISHED_A;
-+ break;
-+#endif
-+
- case SSL3_ST_CW_FINISHED_A:
- case SSL3_ST_CW_FINISHED_B:
- ret=ssl3_send_finished(s,
-@@ -3060,6 +3078,32 @@ err:
- */
-
- #ifndef OPENSSL_NO_TLSEXT
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+int ssl3_send_next_proto(SSL *s)
-+ {
-+ unsigned int len, padding_len;
-+ unsigned char *d;
-+
-+ if (s->state == SSL3_ST_CW_NEXT_PROTO_A)
-+ {
-+ len = s->next_proto_negotiated_len;
-+ padding_len = 32 - ((len + 2) % 32);
-+ d = (unsigned char *)s->init_buf->data;
-+ d[4] = len;
-+ memcpy(d + 5, s->next_proto_negotiated, len);
-+ d[5 + len] = padding_len;
-+ memset(d + 6 + len, 0, padding_len);
-+ *(d++)=SSL3_MT_NEXT_PROTO;
-+ l2n3(2 + len + padding_len, d);
-+ s->state = SSL3_ST_CW_NEXT_PROTO_B;
-+ s->init_num = 4 + 2 + len + padding_len;
-+ s->init_off = 0;
-+ }
-+
-+ return ssl3_do_write(s, SSL3_RT_HANDSHAKE);
-+ }
-+# endif
-+
- int ssl3_check_finished(SSL *s)
- {
- int ok;
---- openssl-1.0.0b.orig/ssl/s3_lib.c 2010-11-29 19:56:04.832465351 +0000
-+++ openssl-1.0.0b/ssl/s3_lib.c 2010-11-29 19:56:04.965928855 +0000
-@@ -2230,6 +2230,15 @@ void ssl3_clear(SSL *s)
- s->s3->num_renegotiations=0;
- s->s3->in_read_app_data=0;
- s->version=SSL3_VERSION;
-+
-+#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG)
-+ if (s->next_proto_negotiated)
-+ {
-+ OPENSSL_free(s->next_proto_negotiated);
-+ s->next_proto_negotiated = NULL;
-+ s->next_proto_negotiated_len = 0;
-+ }
-+#endif
- }
-
- long ssl3_ctrl(SSL *s, int cmd, long larg, void *parg)
---- openssl-1.0.0b.orig/ssl/s3_pkt.c 2010-11-29 19:56:04.832465351 +0000
-+++ openssl-1.0.0b/ssl/s3_pkt.c 2010-11-29 19:56:04.965928855 +0000
-@@ -1394,8 +1394,10 @@ err:
- int ssl3_do_change_cipher_spec(SSL *s)
- {
- int i;
-+#ifdef OPENSSL_NO_NEXTPROTONEG
- const char *sender;
- int slen;
-+#endif
-
- if (s->state & SSL_ST_ACCEPT)
- i=SSL3_CHANGE_CIPHER_SERVER_READ;
-@@ -1418,6 +1420,7 @@ int ssl3_do_change_cipher_spec(SSL *s)
- if (!s->method->ssl3_enc->change_cipher_state(s,i))
- return(0);
-
-+#ifdef OPENSSL_NO_NEXTPROTONEG
- /* we have to record the message digest at
- * this point so we can get it before we read
- * the finished message */
-@@ -1434,6 +1437,7 @@ int ssl3_do_change_cipher_spec(SSL *s)
-
- s->s3->tmp.peer_finish_md_len = s->method->ssl3_enc->final_finish_mac(s,
- sender,slen,s->s3->tmp.peer_finish_md);
-+#endif
-
- return(1);
- }
---- openssl-1.0.0b.orig/ssl/s3_srvr.c 2010-11-29 19:56:04.846517045 +0000
-+++ openssl-1.0.0b/ssl/s3_srvr.c 2010-11-29 19:56:04.965928855 +0000
-@@ -538,7 +538,14 @@ int ssl3_accept(SSL *s)
- * the client uses its key from the certificate
- * for key exchange.
- */
-+#if defined(OPENSSL_NO_TLSEXT) || defined(OPENSSL_NO_NEXTPROTONEG)
- s->state=SSL3_ST_SR_FINISHED_A;
-+#else
-+ if (s->s3->next_proto_neg_seen)
-+ s->state=SSL3_ST_SR_NEXT_PROTO_A;
-+ else
-+ s->state=SSL3_ST_SR_FINISHED_A;
-+#endif
- s->init_num = 0;
- }
- else
-@@ -581,10 +588,27 @@ int ssl3_accept(SSL *s)
- ret=ssl3_get_cert_verify(s);
- if (ret <= 0) goto end;
-
-+#if defined(OPENSSL_NO_TLSEXT) || defined(OPENSSL_NO_NEXTPROTONEG)
- s->state=SSL3_ST_SR_FINISHED_A;
-+#else
-+ if (s->s3->next_proto_neg_seen)
-+ s->state=SSL3_ST_SR_NEXT_PROTO_A;
-+ else
-+ s->state=SSL3_ST_SR_FINISHED_A;
-+#endif
- s->init_num=0;
- break;
-
-+#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG)
-+ case SSL3_ST_SR_NEXT_PROTO_A:
-+ case SSL3_ST_SR_NEXT_PROTO_B:
-+ ret=ssl3_get_next_proto(s);
-+ if (ret <= 0) goto end;
-+ s->init_num = 0;
-+ s->state=SSL3_ST_SR_FINISHED_A;
-+ break;
-+#endif
-+
- case SSL3_ST_SR_FINISHED_A:
- case SSL3_ST_SR_FINISHED_B:
- ret=ssl3_get_finished(s,SSL3_ST_SR_FINISHED_A,
-@@ -655,7 +679,16 @@ int ssl3_accept(SSL *s)
- if (ret <= 0) goto end;
- s->state=SSL3_ST_SW_FLUSH;
- if (s->hit)
-+ {
-+#if defined(OPENSSL_NO_TLSEXT) || defined(OPENSSL_NO_NEXTPROTONEG)
- s->s3->tmp.next_state=SSL3_ST_SR_FINISHED_A;
-+#else
-+ if (s->s3->next_proto_neg_seen)
-+ s->s3->tmp.next_state=SSL3_ST_SR_NEXT_PROTO_A;
-+ else
-+ s->s3->tmp.next_state=SSL3_ST_SR_FINISHED_A;
-+#endif
-+ }
- else
- s->s3->tmp.next_state=SSL_ST_OK;
- s->init_num=0;
-@@ -3196,4 +3229,72 @@ int ssl3_send_cert_status(SSL *s)
- /* SSL3_ST_SW_CERT_STATUS_B */
- return(ssl3_do_write(s,SSL3_RT_HANDSHAKE));
- }
-+
-+# ifndef OPENSSL_NO_NPN
-+/* ssl3_get_next_proto reads a Next Protocol Negotiation handshake message. It
-+ * sets the next_proto member in s if found */
-+int ssl3_get_next_proto(SSL *s)
-+ {
-+ int ok;
-+ unsigned proto_len, padding_len;
-+ long n;
-+ const unsigned char *p;
-+
-+ /* Clients cannot send a NextProtocol message if we didn't see the
-+ * extension in their ClientHello */
-+ if (!s->s3->next_proto_neg_seen)
-+ {
-+ SSLerr(SSL_F_SSL3_GET_NEXT_PROTO,SSL_R_GOT_NEXT_PROTO_WITHOUT_EXTENSION);
-+ return -1;
-+ }
-+
-+ n=s->method->ssl_get_message(s,
-+ SSL3_ST_SR_NEXT_PROTO_A,
-+ SSL3_ST_SR_NEXT_PROTO_B,
-+ SSL3_MT_NEXT_PROTO,
-+ 514, /* See the payload format below */
-+ &ok);
-+
-+ if (!ok)
-+ return((int)n);
-+
-+ /* s->state doesn't reflect whether ChangeCipherSpec has been received
-+ * in this handshake, but s->s3->change_cipher_spec does (will be reset
-+ * by ssl3_get_finished). */
-+ if (!s->s3->change_cipher_spec)
-+ {
-+ SSLerr(SSL_F_SSL3_GET_NEXT_PROTO,SSL_R_GOT_NEXT_PROTO_BEFORE_A_CCS);
-+ return -1;
-+ }
-+
-+ if (n < 2)
-+ return 0; /* The body must be > 1 bytes long */
-+
-+ p=(unsigned char *)s->init_msg;
-+
-+ /* The payload looks like:
-+ * uint8 proto_len;
-+ * uint8 proto[proto_len];
-+ * uint8 padding_len;
-+ * uint8 padding[padding_len];
-+ */
-+ proto_len = p[0];
-+ if (proto_len + 2 > s->init_num)
-+ return 0;
-+ padding_len = p[proto_len + 1];
-+ if (proto_len + padding_len + 2 != s->init_num)
-+ return 0;
-+
-+ s->next_proto_negotiated = OPENSSL_malloc(proto_len);
-+ if (!s->next_proto_negotiated)
-+ {
-+ SSLerr(SSL_F_SSL3_GET_NEXT_PROTO,ERR_R_MALLOC_FAILURE);
-+ return 0;
-+ }
-+ memcpy(s->next_proto_negotiated, p + 1, proto_len);
-+ s->next_proto_negotiated_len = proto_len;
-+
-+ return 1;
-+ }
-+# endif
- #endif
---- openssl-1.0.0b.orig/ssl/ssl.h 2010-11-29 19:56:04.846517045 +0000
-+++ openssl-1.0.0b/ssl/ssl.h 2010-11-29 19:56:04.965928855 +0000
-@@ -857,6 +857,25 @@ struct ssl_ctx_st
- /* draft-rescorla-tls-opaque-prf-input-00.txt information */
- int (*tlsext_opaque_prf_input_callback)(SSL *, void *peerinput, size_t len, void *arg);
- void *tlsext_opaque_prf_input_callback_arg;
-+
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+ /* Next protocol negotiation information */
-+ /* (for experimental NPN extension). */
-+
-+ /* For a server, this contains a callback function by which the set of
-+ * advertised protocols can be provided. */
-+ int (*next_protos_advertised_cb)(SSL *s, const unsigned char **buf,
-+ unsigned int *len, void *arg);
-+ void *next_protos_advertised_cb_arg;
-+ /* For a client, this contains a callback function that selects the
-+ * next protocol from the list provided by the server. */
-+ int (*next_proto_select_cb)(SSL *s, unsigned char **out,
-+ unsigned char *outlen,
-+ const unsigned char *in,
-+ unsigned int inlen,
-+ void *arg);
-+ void *next_proto_select_cb_arg;
-+# endif
- #endif
-
- #ifndef OPENSSL_NO_PSK
-@@ -928,6 +947,30 @@ int SSL_CTX_set_client_cert_engine(SSL_C
- #endif
- void SSL_CTX_set_cookie_generate_cb(SSL_CTX *ctx, int (*app_gen_cookie_cb)(SSL *ssl, unsigned char *cookie, unsigned int *cookie_len));
- void SSL_CTX_set_cookie_verify_cb(SSL_CTX *ctx, int (*app_verify_cookie_cb)(SSL *ssl, unsigned char *cookie, unsigned int cookie_len));
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+void SSL_CTX_set_next_protos_advertised_cb(SSL_CTX *s,
-+ int (*cb) (SSL *ssl,
-+ const unsigned char **out,
-+ unsigned int *outlen,
-+ void *arg), void *arg);
-+void SSL_CTX_set_next_proto_select_cb(SSL_CTX *s,
-+ int (*cb) (SSL *ssl, unsigned char **out,
-+ unsigned char *outlen,
-+ const unsigned char *in,
-+ unsigned int inlen, void *arg),
-+ void *arg);
-+
-+int SSL_select_next_proto(unsigned char **out, unsigned char *outlen,
-+ const unsigned char *in, unsigned int inlen,
-+ const unsigned char *client, unsigned int client_len);
-+void SSL_get0_next_proto_negotiated(const SSL *s, const unsigned char **data,
-+ unsigned *len);
-+
-+#define OPENSSL_NPN_UNSUPPORTED 0
-+#define OPENSSL_NPN_NEGOTIATED 1
-+#define OPENSSL_NPN_NO_OVERLAP 2
-+
-+#endif
-
- #ifndef OPENSSL_NO_PSK
- /* the maximum length of the buffer given to callbacks containing the
-@@ -1187,6 +1230,19 @@ struct ssl_st
- void *tls_session_secret_cb_arg;
-
- SSL_CTX * initial_ctx; /* initial ctx, used to store sessions */
-+
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+ /* Next protocol negotiation. For the client, this is the protocol that
-+ * we sent in NextProtocol and is set when handling ServerHello
-+ * extensions.
-+ *
-+ * For a server, this is the client's selected_protocol from
-+ * NextProtocol and is set when handling the NextProtocol message,
-+ * before the Finished message. */
-+ unsigned char *next_proto_negotiated;
-+ unsigned char next_proto_negotiated_len;
-+#endif
-+
- #define session_ctx initial_ctx
- #else
- #define session_ctx ctx
-@@ -1919,6 +1975,7 @@ void ERR_load_SSL_strings(void);
- #define SSL_F_SSL3_GET_KEY_EXCHANGE 141
- #define SSL_F_SSL3_GET_MESSAGE 142
- #define SSL_F_SSL3_GET_NEW_SESSION_TICKET 283
-+#define SSL_F_SSL3_GET_NEXT_PROTO 304
- #define SSL_F_SSL3_GET_RECORD 143
- #define SSL_F_SSL3_GET_SERVER_CERTIFICATE 144
- #define SSL_F_SSL3_GET_SERVER_DONE 145
-@@ -2117,6 +2174,8 @@ void ERR_load_SSL_strings(void);
- #define SSL_R_EXCESSIVE_MESSAGE_SIZE 152
- #define SSL_R_EXTRA_DATA_IN_MESSAGE 153
- #define SSL_R_GOT_A_FIN_BEFORE_A_CCS 154
-+#define SSL_R_GOT_NEXT_PROTO_BEFORE_A_CCS 346
-+#define SSL_R_GOT_NEXT_PROTO_WITHOUT_EXTENSION 347
- #define SSL_R_HTTPS_PROXY_REQUEST 155
- #define SSL_R_HTTP_REQUEST 156
- #define SSL_R_ILLEGAL_PADDING 283
---- openssl-1.0.0b.orig/ssl/ssl3.h 2010-11-29 19:56:04.832465351 +0000
-+++ openssl-1.0.0b/ssl/ssl3.h 2010-11-29 19:56:04.965928855 +0000
-@@ -465,6 +465,12 @@ typedef struct ssl3_state_st
- void *server_opaque_prf_input;
- size_t server_opaque_prf_input_len;
-
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+ /* Set if we saw the Next Protocol Negotiation extension from
-+ our peer. */
-+ int next_proto_neg_seen;
-+#endif
-+
- struct {
- /* actually only needs to be 16+20 */
- unsigned char cert_verify_md[EVP_MAX_MD_SIZE*2];
-@@ -557,6 +563,10 @@ typedef struct ssl3_state_st
- #define SSL3_ST_CW_CERT_VRFY_B (0x191|SSL_ST_CONNECT)
- #define SSL3_ST_CW_CHANGE_A (0x1A0|SSL_ST_CONNECT)
- #define SSL3_ST_CW_CHANGE_B (0x1A1|SSL_ST_CONNECT)
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+#define SSL3_ST_CW_NEXT_PROTO_A (0x200|SSL_ST_CONNECT)
-+#define SSL3_ST_CW_NEXT_PROTO_B (0x201|SSL_ST_CONNECT)
-+#endif
- #define SSL3_ST_CW_FINISHED_A (0x1B0|SSL_ST_CONNECT)
- #define SSL3_ST_CW_FINISHED_B (0x1B1|SSL_ST_CONNECT)
- /* read from server */
-@@ -602,6 +612,10 @@ typedef struct ssl3_state_st
- #define SSL3_ST_SR_CERT_VRFY_B (0x1A1|SSL_ST_ACCEPT)
- #define SSL3_ST_SR_CHANGE_A (0x1B0|SSL_ST_ACCEPT)
- #define SSL3_ST_SR_CHANGE_B (0x1B1|SSL_ST_ACCEPT)
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+#define SSL3_ST_SR_NEXT_PROTO_A (0x210|SSL_ST_ACCEPT)
-+#define SSL3_ST_SR_NEXT_PROTO_B (0x211|SSL_ST_ACCEPT)
-+#endif
- #define SSL3_ST_SR_FINISHED_A (0x1C0|SSL_ST_ACCEPT)
- #define SSL3_ST_SR_FINISHED_B (0x1C1|SSL_ST_ACCEPT)
- /* write to client */
-@@ -626,6 +640,9 @@ typedef struct ssl3_state_st
- #define SSL3_MT_CLIENT_KEY_EXCHANGE 16
- #define SSL3_MT_FINISHED 20
- #define SSL3_MT_CERTIFICATE_STATUS 22
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+#define SSL3_MT_NEXT_PROTO 67
-+#endif
- #define DTLS1_MT_HELLO_VERIFY_REQUEST 3
-
-
---- openssl-1.0.0b.orig/ssl/ssl_err.c 2010-11-29 19:56:04.846517045 +0000
-+++ openssl-1.0.0b/ssl/ssl_err.c 2010-11-29 19:56:04.965928855 +0000
-@@ -155,6 +155,7 @@ static ERR_STRING_DATA SSL_str_functs[]=
- {ERR_FUNC(SSL_F_SSL3_GET_KEY_EXCHANGE), "SSL3_GET_KEY_EXCHANGE"},
- {ERR_FUNC(SSL_F_SSL3_GET_MESSAGE), "SSL3_GET_MESSAGE"},
- {ERR_FUNC(SSL_F_SSL3_GET_NEW_SESSION_TICKET), "SSL3_GET_NEW_SESSION_TICKET"},
-+{ERR_FUNC(SSL_F_SSL3_GET_NEXT_PROTO), "SSL3_GET_NEXT_PROTO"},
- {ERR_FUNC(SSL_F_SSL3_GET_RECORD), "SSL3_GET_RECORD"},
- {ERR_FUNC(SSL_F_SSL3_GET_SERVER_CERTIFICATE), "SSL3_GET_SERVER_CERTIFICATE"},
- {ERR_FUNC(SSL_F_SSL3_GET_SERVER_DONE), "SSL3_GET_SERVER_DONE"},
-@@ -355,6 +356,8 @@ static ERR_STRING_DATA SSL_str_reasons[]
- {ERR_REASON(SSL_R_EXCESSIVE_MESSAGE_SIZE),"excessive message size"},
- {ERR_REASON(SSL_R_EXTRA_DATA_IN_MESSAGE) ,"extra data in message"},
- {ERR_REASON(SSL_R_GOT_A_FIN_BEFORE_A_CCS),"got a fin before a ccs"},
-+{ERR_REASON(SSL_R_GOT_NEXT_PROTO_BEFORE_A_CCS),"got next proto before a ccs"},
-+{ERR_REASON(SSL_R_GOT_NEXT_PROTO_WITHOUT_EXTENSION),"got next proto without seeing extension"},
- {ERR_REASON(SSL_R_HTTPS_PROXY_REQUEST) ,"https proxy request"},
- {ERR_REASON(SSL_R_HTTP_REQUEST) ,"http request"},
- {ERR_REASON(SSL_R_ILLEGAL_PADDING) ,"illegal padding"},
---- openssl-1.0.0b.orig/ssl/ssl_lib.c 2010-11-29 19:56:04.846517045 +0000
-+++ openssl-1.0.0b/ssl/ssl_lib.c 2010-11-29 19:56:04.965928855 +0000
-@@ -354,6 +354,9 @@ SSL *SSL_new(SSL_CTX *ctx)
- s->tlsext_ocsp_resplen = -1;
- CRYPTO_add(&ctx->references,1,CRYPTO_LOCK_SSL_CTX);
- s->initial_ctx=ctx;
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+ s->next_proto_negotiated = NULL;
-+# endif
- #endif
-
- s->verify_result=X509_V_OK;
-@@ -587,6 +590,11 @@ void SSL_free(SSL *s)
- kssl_ctx_free(s->kssl_ctx);
- #endif /* OPENSSL_NO_KRB5 */
-
-+#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG)
-+ if (s->next_proto_negotiated)
-+ OPENSSL_free(s->next_proto_negotiated);
-+#endif
-+
- OPENSSL_free(s);
- }
-
-@@ -1503,6 +1511,124 @@ int SSL_get_servername_type(const SSL *s
- return TLSEXT_NAMETYPE_host_name;
- return -1;
- }
-+
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+/* SSL_select_next_proto implements the standard protocol selection. It is
-+ * expected that this function is called from the callback set by
-+ * SSL_CTX_set_next_proto_select_cb.
-+ *
-+ * The protocol data is assumed to be a vector of 8-bit, length prefixed byte
-+ * strings. The length byte itself is not included in the length. A byte
-+ * string of length 0 is invalid. No byte string may be truncated.
-+ *
-+ * The current, but experimental algorithm for selecting the protocol is:
-+ *
-+ * 1) If the server doesn't support NPN then this is indicated to the
-+ * callback. In this case, the client application has to abort the connection
-+ * or have a default application level protocol.
-+ *
-+ * 2) If the server supports NPN, but advertises an empty list then the
-+ * client selects the first protcol in its list, but indicates via the
-+ * API that this fallback case was enacted.
-+ *
-+ * 3) Otherwise, the client finds the first protocol in the server's list
-+ * that it supports and selects this protocol. This is because it's
-+ * assumed that the server has better information about which protocol
-+ * a client should use.
-+ *
-+ * 4) If the client doesn't support any of the server's advertised
-+ * protocols, then this is treated the same as case 2.
-+ *
-+ * It returns either
-+ * OPENSSL_NPN_NEGOTIATED if a common protocol was found, or
-+ * OPENSSL_NPN_NO_OVERLAP if the fallback case was reached.
-+ */
-+int SSL_select_next_proto(unsigned char **out, unsigned char *outlen, const unsigned char *server, unsigned int server_len, const unsigned char *client, unsigned int client_len)
-+ {
-+ unsigned int i, j;
-+ const unsigned char *result;
-+ int status = OPENSSL_NPN_UNSUPPORTED;
-+
-+ /* For each protocol in server preference order, see if we support it. */
-+ for (i = 0; i < server_len; )
-+ {
-+ for (j = 0; j < client_len; )
-+ {
-+ if (server[i] == client[j] &&
-+ memcmp(&server[i+1], &client[j+1], server[i]) == 0)
-+ {
-+ /* We found a match */
-+ result = &server[i];
-+ status = OPENSSL_NPN_NEGOTIATED;
-+ goto found;
-+ }
-+ j += client[j];
-+ j++;
-+ }
-+ i += server[i];
-+ i++;
-+ }
-+
-+ /* There's no overlap between our protocols and the server's list. */
-+ result = client;
-+ status = OPENSSL_NPN_NO_OVERLAP;
-+
-+ found:
-+ *out = (unsigned char *) result + 1;
-+ *outlen = result[0];
-+ return status;
-+ }
-+
-+/* SSL_get0_next_proto_negotiated sets *data and *len to point to the client's
-+ * requested protocol for this connection and returns 0. If the client didn't
-+ * request any protocol, then *data is set to NULL.
-+ *
-+ * Note that the client can request any protocol it chooses. The value returned
-+ * from this function need not be a member of the list of supported protocols
-+ * provided by the callback.
-+ */
-+void SSL_get0_next_proto_negotiated(const SSL *s, const unsigned char **data, unsigned *len)
-+ {
-+ *data = s->next_proto_negotiated;
-+ if (!*data) {
-+ *len = 0;
-+ } else {
-+ *len = s->next_proto_negotiated_len;
-+ }
-+}
-+
-+/* SSL_CTX_set_next_protos_advertised_cb sets a callback that is called when a
-+ * TLS server needs a list of supported protocols for Next Protocol
-+ * Negotiation. The returned list must be in wire format. The list is returned
-+ * by setting |out| to point to it and |outlen| to its length. This memory will
-+ * not be modified, but one should assume that the SSL* keeps a reference to
-+ * it.
-+ *
-+ * The callback should return SSL_TLSEXT_ERR_OK if it wishes to advertise. Otherwise, no
-+ * such extension will be included in the ServerHello. */
-+void SSL_CTX_set_next_protos_advertised_cb(SSL_CTX *ctx, int (*cb) (SSL *ssl, const unsigned char **out, unsigned int *outlen, void *arg), void *arg)
-+ {
-+ ctx->next_protos_advertised_cb = cb;
-+ ctx->next_protos_advertised_cb_arg = arg;
-+ }
-+
-+/* SSL_CTX_set_next_proto_select_cb sets a callback that is called when a
-+ * client needs to select a protocol from the server's provided list. |out|
-+ * must be set to point to the selected protocol (which may be within |in|).
-+ * The length of the protocol name must be written into |outlen|. The server's
-+ * advertised protocols are provided in |in| and |inlen|. The callback can
-+ * assume that |in| is syntactically valid.
-+ *
-+ * The client must select a protocol. It is fatal to the connection if this
-+ * callback returns a value other than SSL_TLSEXT_ERR_OK.
-+ */
-+void SSL_CTX_set_next_proto_select_cb(SSL_CTX *ctx, int (*cb) (SSL *s, unsigned char **out, unsigned char *outlen, const unsigned char *in, unsigned int inlen, void *arg), void *arg)
-+ {
-+ ctx->next_proto_select_cb = cb;
-+ ctx->next_proto_select_cb_arg = arg;
-+ }
-+
-+# endif
- #endif
-
- static unsigned long ssl_session_hash(const SSL_SESSION *a)
-@@ -1667,6 +1793,10 @@ SSL_CTX *SSL_CTX_new(const SSL_METHOD *m
- ret->tlsext_status_cb = 0;
- ret->tlsext_status_arg = NULL;
-
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+ ret->next_protos_advertised_cb = 0;
-+ ret->next_proto_select_cb = 0;
-+# endif
- #endif
- #ifndef OPENSSL_NO_PSK
- ret->psk_identity_hint=NULL;
---- openssl-1.0.0b.orig/ssl/ssl_locl.h 2010-11-29 19:56:04.846517045 +0000
-+++ openssl-1.0.0b/ssl/ssl_locl.h 2010-11-29 19:56:04.965928855 +0000
-@@ -968,6 +968,9 @@ int ssl3_get_server_certificate(SSL *s);
- int ssl3_check_cert_and_algorithm(SSL *s);
- #ifndef OPENSSL_NO_TLSEXT
- int ssl3_check_finished(SSL *s);
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+int ssl3_send_next_proto(SSL *s);
-+# endif
- #endif
-
- int dtls1_client_hello(SSL *s);
-@@ -986,6 +989,9 @@ int ssl3_check_client_hello(SSL *s);
- int ssl3_get_client_certificate(SSL *s);
- int ssl3_get_client_key_exchange(SSL *s);
- int ssl3_get_cert_verify(SSL *s);
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+int ssl3_get_next_proto(SSL *s);
-+#endif
-
- int dtls1_send_hello_request(SSL *s);
- int dtls1_send_server_hello(SSL *s);
---- openssl-1.0.0b.orig/ssl/t1_lib.c 2010-11-16 13:26:24.000000000 +0000
-+++ openssl-1.0.0b/ssl/t1_lib.c 2010-11-29 19:56:04.965928855 +0000
-@@ -494,6 +494,18 @@ unsigned char *ssl_add_clienthello_tlsex
- i2d_X509_EXTENSIONS(s->tlsext_ocsp_exts, &ret);
- }
-
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+ if (s->ctx->next_proto_select_cb && !s->s3->tmp.finish_md_len)
-+ {
-+ /* The client advertises an emtpy extension to indicate its
-+ * support for Next Protocol Negotiation */
-+ if (limit - ret - 4 < 0)
-+ return NULL;
-+ s2n(TLSEXT_TYPE_next_proto_neg,ret);
-+ s2n(0,ret);
-+ }
-+#endif
-+
- if ((extdatalen = ret-p-2)== 0)
- return p;
-
-@@ -505,6 +517,9 @@ unsigned char *ssl_add_serverhello_tlsex
- {
- int extdatalen=0;
- unsigned char *ret = p;
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+ int next_proto_neg_seen;
-+#endif
-
- /* don't add extensions for SSLv3, unless doing secure renegotiation */
- if (s->version == SSL3_VERSION && !s->s3->send_connection_binding)
-@@ -618,6 +633,28 @@ unsigned char *ssl_add_serverhello_tlsex
-
- }
-
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+ next_proto_neg_seen = s->s3->next_proto_neg_seen;
-+ s->s3->next_proto_neg_seen = 0;
-+ if (next_proto_neg_seen && s->ctx->next_protos_advertised_cb)
-+ {
-+ const unsigned char *npa;
-+ unsigned int npalen;
-+ int r;
-+
-+ r = s->ctx->next_protos_advertised_cb(s, &npa, &npalen, s->ctx->next_protos_advertised_cb_arg);
-+ if (r == SSL_TLSEXT_ERR_OK)
-+ {
-+ if ((long)(limit - ret - 4 - npalen) < 0) return NULL;
-+ s2n(TLSEXT_TYPE_next_proto_neg,ret);
-+ s2n(npalen,ret);
-+ memcpy(ret, npa, npalen);
-+ ret += npalen;
-+ s->s3->next_proto_neg_seen = 1;
-+ }
-+ }
-+#endif
-+
- if ((extdatalen = ret-p-2)== 0)
- return p;
-
-@@ -982,6 +1019,28 @@ int ssl_parse_clienthello_tlsext(SSL *s,
- else
- s->tlsext_status_type = -1;
- }
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+ else if (type == TLSEXT_TYPE_next_proto_neg &&
-+ s->s3->tmp.finish_md_len == 0)
-+ {
-+ /* We shouldn't accept this extension on a
-+ * renegotiation.
-+ *
-+ * s->new_session will be set on renegotiation, but we
-+ * probably shouldn't rely that it couldn't be set on
-+ * the initial renegotation too in certain cases (when
-+ * there's some other reason to disallow resuming an
-+ * earlier session -- the current code won't be doing
-+ * anything like that, but this might change).
-+
-+ * A valid sign that there's been a previous handshake
-+ * in this connection is if s->s3->tmp.finish_md_len >
-+ * 0. (We are talking about a check that will happen
-+ * in the Hello protocol round, well before a new
-+ * Finished message could have been computed.) */
-+ s->s3->next_proto_neg_seen = 1;
-+ }
-+#endif
-
- /* session ticket processed earlier */
- data+=size;
-@@ -1005,6 +1064,26 @@ int ssl_parse_clienthello_tlsext(SSL *s,
- return 1;
- }
-
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+/* ssl_next_proto_validate validates a Next Protocol Negotiation block. No
-+ * elements of zero length are allowed and the set of elements must exactly fill
-+ * the length of the block. */
-+static int ssl_next_proto_validate(unsigned char *d, unsigned len)
-+ {
-+ unsigned int off = 0;
-+
-+ while (off < len)
-+ {
-+ if (d[off] == 0)
-+ return 0;
-+ off += d[off];
-+ off++;
-+ }
-+
-+ return off == len;
-+ }
-+#endif
-+
- int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d, int n, int *al)
- {
- unsigned short length;
-@@ -1139,6 +1218,39 @@ int ssl_parse_serverhello_tlsext(SSL *s,
- /* Set flag to expect CertificateStatus message */
- s->tlsext_status_expected = 1;
- }
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+ else if (type == TLSEXT_TYPE_next_proto_neg)
-+ {
-+ unsigned char *selected;
-+ unsigned char selected_len;
-+
-+ /* We must have requested it. */
-+ if ((s->ctx->next_proto_select_cb == NULL))
-+ {
-+ *al = TLS1_AD_UNSUPPORTED_EXTENSION;
-+ return 0;
-+ }
-+ /* The data must be valid */
-+ if (!ssl_next_proto_validate(data, size))
-+ {
-+ *al = TLS1_AD_DECODE_ERROR;
-+ return 0;
-+ }
-+ if (s->ctx->next_proto_select_cb(s, &selected, &selected_len, data, size, s->ctx->next_proto_select_cb_arg) != SSL_TLSEXT_ERR_OK)
-+ {
-+ *al = TLS1_AD_INTERNAL_ERROR;
-+ return 0;
-+ }
-+ s->next_proto_negotiated = OPENSSL_malloc(selected_len);
-+ if (!s->next_proto_negotiated)
-+ {
-+ *al = TLS1_AD_INTERNAL_ERROR;
-+ return 0;
-+ }
-+ memcpy(s->next_proto_negotiated, selected, selected_len);
-+ s->next_proto_negotiated_len = selected_len;
-+ }
-+#endif
- else if (type == TLSEXT_TYPE_renegotiate)
- {
- if(!ssl_parse_serverhello_renegotiate_ext(s, data, size, al))
---- openssl-1.0.0b.orig/ssl/tls1.h 2009-11-11 14:51:29.000000000 +0000
-+++ openssl-1.0.0b/ssl/tls1.h 2010-11-29 19:56:04.965928855 +0000
-@@ -204,6 +204,11 @@ extern "C" {
- /* Temporary extension type */
- #define TLSEXT_TYPE_renegotiate 0xff01
-
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+/* This is not an IANA defined extension number */
-+#define TLSEXT_TYPE_next_proto_neg 13172
-+#endif
-+
- /* NameType value from RFC 3546 */
- #define TLSEXT_NAMETYPE_host_name 0
- /* status request value from RFC 3546 */
diff --git a/patches/progs.patch b/patches/progs.patch
index 16fd9b0..f0879ae 100644
--- a/patches/progs.patch
+++ b/patches/progs.patch
@@ -20,8 +20,8 @@
+#if 0 /* ANDROID */
{FUNC_TYPE_GENERAL,"ts",ts_main},
+#endif
- #ifndef OPENSSL_NO_MD2
- {FUNC_TYPE_MD,"md2",dgst_main},
+ #ifndef OPENSSL_NO_SRP
+ {FUNC_TYPE_GENERAL,"srp",srp_main},
#endif
--- openssl-1.0.0.orig/apps/speed.c 2010-03-03 11:56:17.000000000 -0800
+++ openssl-1.0.0/apps/speed.c 2010-05-18 14:05:57.000000000 -0700
diff --git a/patches/ssl_Android.mk b/patches/ssl_Android.mk
index 40641a3..619aede 100644
--- a/patches/ssl_Android.mk
+++ b/patches/ssl_Android.mk
@@ -6,43 +6,49 @@
external/openssl/crypto
local_src_files:= \
- s2_meth.c \
- s2_srvr.c \
- s2_clnt.c \
- s2_lib.c \
- s2_enc.c \
- s2_pkt.c \
- s3_meth.c \
- s3_srvr.c \
- s3_clnt.c \
- s3_lib.c \
- s3_enc.c \
- s3_pkt.c \
- s3_both.c \
- s23_meth.c \
- s23_srvr.c \
+ bio_ssl.c \
+ d1_both.c \
+ d1_enc.c \
+ d1_lib.c \
+ d1_pkt.c \
+ d1_srtp.c \
+ kssl.c \
s23_clnt.c \
s23_lib.c \
+ s23_meth.c \
s23_pkt.c \
- t1_meth.c \
- t1_srvr.c \
- t1_clnt.c \
- t1_lib.c \
- t1_enc.c \
- t1_reneg.c \
- ssl_lib.c \
- ssl_err2.c \
- ssl_cert.c \
- ssl_sess.c \
- ssl_ciph.c \
- ssl_stat.c \
- ssl_rsa.c \
- ssl_asn1.c \
- ssl_txt.c \
+ s23_srvr.c \
+ s2_clnt.c \
+ s2_enc.c \
+ s2_lib.c \
+ s2_meth.c \
+ s2_pkt.c \
+ s2_srvr.c \
+ s3_both.c \
+ s3_clnt.c \
+ s3_enc.c \
+ s3_lib.c \
+ s3_meth.c \
+ s3_pkt.c \
+ s3_srvr.c \
ssl_algs.c \
- bio_ssl.c \
+ ssl_asn1.c \
+ ssl_cert.c \
+ ssl_ciph.c \
ssl_err.c \
- kssl.c
+ ssl_err2.c \
+ ssl_lib.c \
+ ssl_rsa.c \
+ ssl_sess.c \
+ ssl_stat.c \
+ ssl_txt.c \
+ t1_clnt.c \
+ t1_enc.c \
+ t1_lib.c \
+ t1_meth.c \
+ t1_reneg.c \
+ t1_srvr.c \
+ tls_srp.c
#######################################
# target static library