openssl-1.0.1 upgrade

Bug: 6168278

Change-Id: I648f9172828120df5d19a14425e9ceec92647921
diff --git a/patches/README b/patches/README
index f70135c..f7d886d 100644
--- a/patches/README
+++ b/patches/README
@@ -33,7 +33,3 @@
 sha1_armv4_large.patch
 
 This patch eliminates memory stores to addresses below SP.
-
-mips_asm.patch
-
-MIPS assembly routines (AES, BN, SHA1, SHA256)
diff --git a/patches/apps_Android.mk b/patches/apps_Android.mk
index c2dc2d7..9110490 100644
--- a/patches/apps_Android.mk
+++ b/patches/apps_Android.mk
@@ -48,6 +48,7 @@
 	smime.c \
 	speed.c \
 	spkac.c \
+	srp.c \
 	verify.c \
 	version.c \
 	x509.c
diff --git a/patches/crypto_Android.mk b/patches/crypto_Android.mk
index 8090c12..fb599ce 100644
--- a/patches/crypto_Android.mk
+++ b/patches/crypto_Android.mk
@@ -169,7 +169,11 @@
 	bn/bn_sqrt.c \
 	bn/bn_word.c \
 	buffer/buf_err.c \
+	buffer/buf_str.c \
 	buffer/buffer.c \
+	cmac/cm_ameth.c \
+	cmac/cm_pmeth.c \
+	cmac/cmac.c \
 	comp/c_rle.c \
 	comp/c_zlib.c \
 	comp/comp_err.c \
@@ -235,6 +239,7 @@
 	dso/dso_null.c \
 	dso/dso_openssl.c \
 	ec/ec2_mult.c \
+	ec/ec2_oct.c \
 	ec/ec2_smpl.c \
 	ec/ec_ameth.c \
 	ec/ec_asn1.c \
@@ -245,11 +250,13 @@
 	ec/ec_key.c \
 	ec/ec_lib.c \
 	ec/ec_mult.c \
+	ec/ec_oct.c \
 	ec/ec_pmeth.c \
 	ec/ec_print.c \
 	ec/eck_prn.c \
 	ec/ecp_mont.c \
 	ec/ecp_nist.c \
+	ec/ecp_oct.c \
 	ec/ecp_smpl.c \
 	ecdh/ech_err.c \
 	ecdh/ech_key.c \
@@ -295,6 +302,7 @@
 	evp/c_alld.c \
 	evp/digest.c \
 	evp/e_aes.c \
+	evp/e_aes_cbc_hmac_sha1.c \
 	evp/e_bf.c \
 	evp/e_des.c \
 	evp/e_des3.c \
@@ -302,6 +310,7 @@
 	evp/e_old.c \
 	evp/e_rc2.c \
 	evp/e_rc4.c \
+	evp/e_rc4_hmac_md5.c \
 	evp/e_rc5.c \
 	evp/e_xcbc_d.c \
 	evp/encode.c \
@@ -347,9 +356,13 @@
 	md5/md5_dgst.c \
 	md5/md5_one.c \
 	modes/cbc128.c \
+	modes/ccm128.c \
 	modes/cfb128.c \
 	modes/ctr128.c \
+	modes/gcm128.c \
 	modes/ofb128.c \
+	modes/xts128.c \
+	o_init.c \
 	objects/o_names.c \
 	objects/obj_dat.c \
 	objects/obj_err.c \
@@ -398,6 +411,7 @@
 	pkcs7/pk7_mime.c \
 	pkcs7/pk7_smime.c \
 	pkcs7/pkcs7err.c \
+	pqueue/pqueue.c \
 	rand/md_rand.c \
 	rand/rand_egd.c \
 	rand/rand_err.c \
@@ -411,11 +425,13 @@
 	rc2/rc2ofb64.c \
 	rc4/rc4_enc.c \
 	rc4/rc4_skey.c \
+	rc4/rc4_utl.c \
 	ripemd/rmd_dgst.c \
 	ripemd/rmd_one.c \
 	rsa/rsa_ameth.c \
 	rsa/rsa_asn1.c \
 	rsa/rsa_chk.c \
+	rsa/rsa_crpt.c \
 	rsa/rsa_eay.c \
 	rsa/rsa_err.c \
 	rsa/rsa_gen.c \
@@ -436,6 +452,8 @@
 	sha/sha256.c \
 	sha/sha512.c \
 	sha/sha_dgst.c \
+	srp/srp_lib.c \
+	srp/srp_vfy.c \
 	stack/stack.c \
 	ts/ts_err.c \
 	txt_db/txt_db.c \
@@ -507,12 +525,15 @@
 	external/openssl \
 	external/openssl/crypto/asn1 \
 	external/openssl/crypto/evp \
+	external/openssl/crypto/modes \
 	external/openssl/include \
 	external/openssl/include/openssl \
 	external/zlib
 
 local_c_flags := -DNO_WINDOWS_BRAINDEATH
 
+local_as_flags := -x assembler-with-cpp
+
 #######################################
 # target static library
 include $(CLEAR_VARS)
@@ -525,6 +546,7 @@
 
 LOCAL_SRC_FILES += $(local_src_files)
 LOCAL_CFLAGS += $(local_c_flags)
+LOCAL_ASFLAGS += $(local_as_flags)
 LOCAL_C_INCLUDES += $(local_c_includes)
 ifeq ($(TARGET_ARCH),arm)
 	LOCAL_SRC_FILES += $(arm_src_files)
@@ -561,6 +583,7 @@
 
 LOCAL_SRC_FILES += $(local_src_files)
 LOCAL_CFLAGS += $(local_c_flags)
+LOCAL_ASFLAGS += $(local_as_flags)
 LOCAL_C_INCLUDES += $(local_c_includes)
 ifeq ($(TARGET_ARCH),arm)
 	LOCAL_SRC_FILES += $(arm_src_files)
@@ -587,6 +610,7 @@
 include $(LOCAL_PATH)/../android-config.mk
 LOCAL_SRC_FILES += $(local_src_files)
 LOCAL_CFLAGS += $(local_c_flags) -DPURIFY
+LOCAL_ASFLAGS += $(local_as_flags)
 LOCAL_C_INCLUDES += $(local_c_includes)
 LOCAL_SRC_FILES += $(other_arch_src_files)
 LOCAL_STATIC_LIBRARIES += libz
@@ -602,6 +626,7 @@
 include $(LOCAL_PATH)/../android-config.mk
 LOCAL_SRC_FILES += $(local_src_files)
 LOCAL_CFLAGS += $(local_c_flags) -DPURIFY
+LOCAL_ASFLAGS += $(local_as_flags)
 LOCAL_C_INCLUDES += $(local_c_includes)
 LOCAL_SRC_FILES += $(other_arch_src_files)
 LOCAL_STATIC_LIBRARIES += libz
diff --git a/patches/handshake_cutthrough.patch b/patches/handshake_cutthrough.patch
index 4f29839..57c4c78 100644
--- a/patches/handshake_cutthrough.patch
+++ b/patches/handshake_cutthrough.patch
@@ -6,9 +6,9 @@
  	BIO_printf(bio_err," -status           - request certificate status from server\n");
  	BIO_printf(bio_err," -no_ticket        - disable use of RFC4507bis session tickets\n");
 +	BIO_printf(bio_err," -cutthrough       - enable 1-RTT full-handshake for strong ciphers\n");
- #endif
- 	}
- 
+ # if !defined(OPENSSL_NO_NEXTPROTONEG)
+ 	BIO_printf(bio_err," -nextprotoneg arg - enable NPN extension, considering named protocols supported (comma-separated list)\n");
+ # endif
 @@ -304,6 +305,7 @@ int MAIN(int argc, char **argv)
  	EVP_PKEY *key = NULL;
  	char *CApath=NULL,*CAfile=NULL,*cipher=NULL;
@@ -191,9 +191,9 @@
  /* extra state */
  #define SSL3_ST_CW_FLUSH		(0x100|SSL_ST_CONNECT)
 +#define SSL3_ST_CUTTHROUGH_COMPLETE	(0x101|SSL_ST_CONNECT)
- /* write to server */
- #define SSL3_ST_CW_CLNT_HELLO_A		(0x110|SSL_ST_CONNECT)
- #define SSL3_ST_CW_CLNT_HELLO_B		(0x111|SSL_ST_CONNECT)
+ #ifndef OPENSSL_NO_SCTP
+ #define DTLS1_SCTP_ST_CW_WRITE_SOCK			(0x310|SSL_ST_CONNECT)
+ #define DTLS1_SCTP_ST_CR_READ_SOCK			(0x320|SSL_ST_CONNECT)
 diff -uarp openssl-1.0.0.orig/ssl/ssl_lib.c openssl-1.0.0/ssl/ssl_lib.c
 --- openssl-1.0.0.orig/ssl/ssl_lib.c	2010-02-17 14:43:46.000000000 -0500
 +++ openssl-1.0.0/ssl/ssl_lib.c	2010-04-21 17:02:45.000000000 -0400
diff --git a/patches/jsse.patch b/patches/jsse.patch
index 249fb5b..80e5357 100644
--- a/patches/jsse.patch
+++ b/patches/jsse.patch
@@ -10,14 +10,6 @@
  	/* Default generate session ID callback. */
  	GEN_SESSION_CB generate_session_id;
  
-@@ -1546,6 +1549,7 @@ const SSL_CIPHER *SSL_get_current_cipher
- int	SSL_CIPHER_get_bits(const SSL_CIPHER *c,int *alg_bits);
- char *	SSL_CIPHER_get_version(const SSL_CIPHER *c);
- const char *	SSL_CIPHER_get_name(const SSL_CIPHER *c);
-+const char *	SSL_CIPHER_authentication_method(const SSL_CIPHER *c);
- 
- int	SSL_get_fd(const SSL *s);
- int	SSL_get_rfd(const SSL *s);
 @@ -1554,6 +1558,7 @@ const char  * SSL_get_cipher_list(const 
  char *	SSL_get_shared_ciphers(const SSL *s, char *buf, int len);
  int	SSL_get_read_ahead(const SSL * s);
@@ -48,9 +40,9 @@
  const unsigned char *SSL_SESSION_get_id(const SSL_SESSION *s,
  					unsigned int *len);
 +const char *	SSL_SESSION_get_version(const SSL_SESSION *s);
+ unsigned int SSL_SESSION_get_compress_id(const SSL_SESSION *s);
  #ifndef OPENSSL_NO_FP_API
  int	SSL_SESSION_print_fp(FILE *fp,const SSL_SESSION *ses);
- #endif
 @@ -1624,6 +1633,7 @@ int	SSL_SESSION_print(BIO *fp,const SSL_
  void	SSL_SESSION_free(SSL_SESSION *ses);
  int	i2d_SSL_SESSION(SSL_SESSION *in,unsigned char **pp);
@@ -296,13 +288,19 @@
  /* works well for SSLv2, not so good for SSLv3 */
  char *SSL_get_shared_ciphers(const SSL *s,char *buf,int len)
  	{
-@@ -2551,18 +2578,45 @@ SSL_METHOD *ssl_bad_method(int ver)
+@@ -2551,22 +2578,45 @@ SSL_METHOD *ssl_bad_method(int ver)
  	return(NULL);
  	}
  
 -const char *SSL_get_version(const SSL *s)
 +static const char *ssl_get_version(int version)
  	{
+-	if (s->version == TLS1_2_VERSION)
++	if (version == TLS1_2_VERSION)
+ 		return("TLSv1.2");
+-	else if (s->version == TLS1_1_VERSION)
++	else if (version == TLS1_1_VERSION)
+ 		return("TLSv1.1");
 -	if (s->version == TLS1_VERSION)
 +	if (version == TLS1_VERSION)
  		return("TLSv1");
@@ -334,12 +332,8 @@
 +		{
 +	case SSL2_VERSION:
 +		return SSL_TXT_RSA;
-+	case SSL3_VERSION:
-+	case TLS1_VERSION:
-+	case DTLS1_VERSION:
-+		return SSL_CIPHER_authentication_method(ssl->s3->tmp.new_cipher);
 +	default:
-+		return "UNKNOWN";
++		return SSL_CIPHER_authentication_method(ssl->s3->tmp.new_cipher);
 +		}
 +	}
 +
diff --git a/patches/mips_asm.patch b/patches/mips_asm.patch
deleted file mode 100644
index 68a80f1..0000000
--- a/patches/mips_asm.patch
+++ /dev/null
@@ -1,5461 +0,0 @@
-diff --git a/crypto/aes/asm/aes-mips.pl b/crypto/aes/asm/aes-mips.pl
-new file mode 100644
-index 0000000..2ce6def
---- /dev/null
-+++ b/crypto/aes/asm/aes-mips.pl
-@@ -0,0 +1,1611 @@
-+#!/usr/bin/env perl
-+
-+# ====================================================================
-+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-+# project. The module is, however, dual licensed under OpenSSL and
-+# CRYPTOGAMS licenses depending on where you obtain it. For further
-+# details see http://www.openssl.org/~appro/cryptogams/.
-+# ====================================================================
-+
-+# AES for MIPS
-+
-+# October 2010
-+#
-+# Code uses 1K[+256B] S-box and on single-issue core [such as R5000]
-+# spends ~68 cycles per byte processed with 128-bit key. This is ~16%
-+# faster than gcc-generated code, which is not very impressive. But
-+# recall that compressed S-box requires extra processing, namely
-+# additional rotations. Rotations are implemented with lwl/lwr pairs,
-+# which is normally used for loading unaligned data. Another cool
-+# thing about this module is its endian neutrality, which means that
-+# it processes data without ever changing byte order...
-+
-+######################################################################
-+# There is a number of MIPS ABI in use, O32 and N32/64 are most
-+# widely used. Then there is a new contender: NUBI. It appears that if
-+# one picks the latter, it's possible to arrange code in ABI neutral
-+# manner. Therefore let's stick to NUBI register layout:
-+#
-+($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
-+($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
-+($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
-+($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
-+#
-+# The return value is placed in $a0. Following coding rules facilitate
-+# interoperability:
-+#
-+# - never ever touch $tp, "thread pointer", former $gp;
-+# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
-+#   old code];
-+# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
-+#
-+# For reference here is register layout for N32/64 MIPS ABIs:
-+#
-+# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
-+# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
-+# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
-+# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
-+# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
-+#
-+$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64
-+
-+if ($flavour =~ /64|n32/i) {
-+	$PTR_ADD="dadd";	# incidentally works even on n32
-+	$PTR_SUB="dsub";	# incidentally works even on n32
-+	$REG_S="sd";
-+	$REG_L="ld";
-+	$PTR_SLL="dsll";	# incidentally works even on n32
-+	$SZREG=8;
-+} else {
-+	$PTR_ADD="add";
-+	$PTR_SUB="sub";
-+	$REG_S="sw";
-+	$REG_L="lw";
-+	$PTR_SLL="sll";
-+	$SZREG=4;
-+}
-+$pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
-+#
-+# <appro@openssl.org>
-+#
-+######################################################################
-+
-+$big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0;
-+
-+for (@ARGV) {	$output=$_ if (/^\w[\w\-]*\.\w+$/);	}
-+open STDOUT,">$output";
-+
-+if (!defined($big_endian))
-+{    $big_endian=(unpack('L',pack('N',1))==1);   }
-+
-+while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
-+open STDOUT,">$output";
-+
-+my ($MSB,$LSB)=(0,3);	# automatically converted to little-endian
-+
-+$code.=<<___;
-+.text
-+#ifdef OPENSSL_FIPSCANISTER
-+# include <openssl/fipssyms.h>
-+#endif
-+
-+#if !defined(__vxworks) || defined(__pic__)
-+.option	pic2
-+#endif
-+.set	noat
-+___
-+
-+{{{
-+my $FRAMESIZE=16*$SZREG;
-+my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000;
-+
-+my ($inp,$out,$key,$Tbl,$s0,$s1,$s2,$s3)=($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7);
-+my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
-+my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23));
-+my ($key0,$cnt)=($gp,$fp);
-+
-+# instuction ordering is "stolen" from output from MIPSpro assembler
-+# invoked with -mips3 -O3 arguments...
-+$code.=<<___;
-+.align	5
-+.ent	_mips_AES_encrypt
-+_mips_AES_encrypt:
-+	.frame	$sp,0,$ra
-+	.set	reorder
-+	lw	$t0,0($key)
-+	lw	$t1,4($key)
-+	lw	$t2,8($key)
-+	lw	$t3,12($key)
-+	lw	$cnt,240($key)
-+	$PTR_ADD $key0,$key,16
-+
-+	xor	$s0,$t0
-+	xor	$s1,$t1
-+	xor	$s2,$t2
-+	xor	$s3,$t3
-+
-+	sub	$cnt,1
-+	_xtr	$i0,$s1,16-2
-+.Loop_enc:
-+	_xtr	$i1,$s2,16-2
-+	_xtr	$i2,$s3,16-2
-+	_xtr	$i3,$s0,16-2
-+	and	$i0,0x3fc
-+	and	$i1,0x3fc
-+	and	$i2,0x3fc
-+	and	$i3,0x3fc
-+	$PTR_ADD $i0,$Tbl
-+	$PTR_ADD $i1,$Tbl
-+	$PTR_ADD $i2,$Tbl
-+	$PTR_ADD $i3,$Tbl
-+	lwl	$t0,3($i0)		# Te1[s1>>16]
-+	lwl	$t1,3($i1)		# Te1[s2>>16]
-+	lwl	$t2,3($i2)		# Te1[s3>>16]
-+	lwl	$t3,3($i3)		# Te1[s0>>16]
-+	lwr	$t0,2($i0)		# Te1[s1>>16]
-+	lwr	$t1,2($i1)		# Te1[s2>>16]
-+	lwr	$t2,2($i2)		# Te1[s3>>16]
-+	lwr	$t3,2($i3)		# Te1[s0>>16]
-+
-+	_xtr	$i0,$s2,8-2
-+	_xtr	$i1,$s3,8-2
-+	_xtr	$i2,$s0,8-2
-+	_xtr	$i3,$s1,8-2
-+	and	$i0,0x3fc
-+	and	$i1,0x3fc
-+	and	$i2,0x3fc
-+	and	$i3,0x3fc
-+	$PTR_ADD $i0,$Tbl
-+	$PTR_ADD $i1,$Tbl
-+	$PTR_ADD $i2,$Tbl
-+	$PTR_ADD $i3,$Tbl
-+	lwl	$t4,2($i0)		# Te2[s2>>8]
-+	lwl	$t5,2($i1)		# Te2[s3>>8]
-+	lwl	$t6,2($i2)		# Te2[s0>>8]
-+	lwl	$t7,2($i3)		# Te2[s1>>8]
-+	lwr	$t4,1($i0)		# Te2[s2>>8]
-+	lwr	$t5,1($i1)		# Te2[s3>>8]
-+	lwr	$t6,1($i2)		# Te2[s0>>8]
-+	lwr	$t7,1($i3)		# Te2[s1>>8]
-+
-+	_xtr	$i0,$s3,0-2
-+	_xtr	$i1,$s0,0-2
-+	_xtr	$i2,$s1,0-2
-+	_xtr	$i3,$s2,0-2
-+	and	$i0,0x3fc
-+	and	$i1,0x3fc
-+	and	$i2,0x3fc
-+	and	$i3,0x3fc
-+	$PTR_ADD $i0,$Tbl
-+	$PTR_ADD $i1,$Tbl
-+	$PTR_ADD $i2,$Tbl
-+	$PTR_ADD $i3,$Tbl
-+	lwl	$t8,1($i0)		# Te3[s3]
-+	lwl	$t9,1($i1)		# Te3[s0]
-+	lwl	$t10,1($i2)		# Te3[s1]
-+	lwl	$t11,1($i3)		# Te3[s2]
-+	lwr	$t8,0($i0)		# Te3[s3]
-+	lwr	$t9,0($i1)		# Te3[s0]
-+	lwr	$t10,0($i2)		# Te3[s1]
-+	lwr	$t11,0($i3)		# Te3[s2]
-+
-+	_xtr	$i0,$s0,24-2
-+	_xtr	$i1,$s1,24-2
-+	_xtr	$i2,$s2,24-2
-+	_xtr	$i3,$s3,24-2
-+	and	$i0,0x3fc
-+	and	$i1,0x3fc
-+	and	$i2,0x3fc
-+	and	$i3,0x3fc
-+	$PTR_ADD $i0,$Tbl
-+	$PTR_ADD $i1,$Tbl
-+	$PTR_ADD $i2,$Tbl
-+	$PTR_ADD $i3,$Tbl
-+	xor	$t0,$t4
-+	xor	$t1,$t5
-+	xor	$t2,$t6
-+	xor	$t3,$t7
-+	lw	$t4,0($i0)		# Te0[s0>>24]
-+	lw	$t5,0($i1)		# Te0[s1>>24]
-+	lw	$t6,0($i2)		# Te0[s2>>24]
-+	lw	$t7,0($i3)		# Te0[s3>>24]
-+
-+	lw	$s0,0($key0)
-+	lw	$s1,4($key0)
-+	lw	$s2,8($key0)
-+	lw	$s3,12($key0)
-+
-+	xor	$t0,$t8
-+	xor	$t1,$t9
-+	xor	$t2,$t10
-+	xor	$t3,$t11
-+
-+	xor	$t0,$t4
-+	xor	$t1,$t5
-+	xor	$t2,$t6
-+	xor	$t3,$t7
-+
-+	sub	$cnt,1
-+	$PTR_ADD $key0,16
-+	xor	$s0,$t0
-+	xor	$s1,$t1
-+	xor	$s2,$t2
-+	xor	$s3,$t3
-+	.set	noreorder
-+	bnez	$cnt,.Loop_enc
-+	_xtr	$i0,$s1,16-2
-+
-+	.set	reorder
-+	_xtr	$i1,$s2,16-2
-+	_xtr	$i2,$s3,16-2
-+	_xtr	$i3,$s0,16-2
-+	and	$i0,0x3fc
-+	and	$i1,0x3fc
-+	and	$i2,0x3fc
-+	and	$i3,0x3fc
-+	$PTR_ADD $i0,$Tbl
-+	$PTR_ADD $i1,$Tbl
-+	$PTR_ADD $i2,$Tbl
-+	$PTR_ADD $i3,$Tbl
-+	lbu	$t0,2($i0)		# Te4[s1>>16]
-+	lbu	$t1,2($i1)		# Te4[s2>>16]
-+	lbu	$t2,2($i2)		# Te4[s3>>16]
-+	lbu	$t3,2($i3)		# Te4[s0>>16]
-+
-+	_xtr	$i0,$s2,8-2
-+	_xtr	$i1,$s3,8-2
-+	_xtr	$i2,$s0,8-2
-+	_xtr	$i3,$s1,8-2
-+	and	$i0,0x3fc
-+	and	$i1,0x3fc
-+	and	$i2,0x3fc
-+	and	$i3,0x3fc
-+	$PTR_ADD $i0,$Tbl
-+	$PTR_ADD $i1,$Tbl
-+	$PTR_ADD $i2,$Tbl
-+	$PTR_ADD $i3,$Tbl
-+	lbu	$t4,2($i0)		# Te4[s2>>8]
-+	lbu	$t5,2($i1)		# Te4[s3>>8]
-+	lbu	$t6,2($i2)		# Te4[s0>>8]
-+	lbu	$t7,2($i3)		# Te4[s1>>8]
-+
-+	_xtr	$i0,$s0,24-2
-+	_xtr	$i1,$s1,24-2
-+	_xtr	$i2,$s2,24-2
-+	_xtr	$i3,$s3,24-2
-+	and	$i0,0x3fc
-+	and	$i1,0x3fc
-+	and	$i2,0x3fc
-+	and	$i3,0x3fc
-+	$PTR_ADD $i0,$Tbl
-+	$PTR_ADD $i1,$Tbl
-+	$PTR_ADD $i2,$Tbl
-+	$PTR_ADD $i3,$Tbl
-+	lbu	$t8,2($i0)		# Te4[s0>>24]
-+	lbu	$t9,2($i1)		# Te4[s1>>24]
-+	lbu	$t10,2($i2)		# Te4[s2>>24]
-+	lbu	$t11,2($i3)		# Te4[s3>>24]
-+
-+	_xtr	$i0,$s3,0-2
-+	_xtr	$i1,$s0,0-2
-+	_xtr	$i2,$s1,0-2
-+	_xtr	$i3,$s2,0-2
-+	and	$i0,0x3fc
-+	and	$i1,0x3fc
-+	and	$i2,0x3fc
-+	and	$i3,0x3fc
-+
-+	_ins	$t0,16
-+	_ins	$t1,16
-+	_ins	$t2,16
-+	_ins	$t3,16
-+
-+	_ins	$t4,8
-+	_ins	$t5,8
-+	_ins	$t6,8
-+	_ins	$t7,8
-+
-+	xor	$t0,$t4
-+	xor	$t1,$t5
-+	xor	$t2,$t6
-+	xor	$t3,$t7
-+
-+	$PTR_ADD $i0,$Tbl
-+	$PTR_ADD $i1,$Tbl
-+	$PTR_ADD $i2,$Tbl
-+	$PTR_ADD $i3,$Tbl
-+	lbu	$t4,2($i0)		# Te4[s3]
-+	lbu	$t5,2($i1)		# Te4[s0]
-+	lbu	$t6,2($i2)		# Te4[s1]
-+	lbu	$t7,2($i3)		# Te4[s2]
-+
-+	_ins	$t8,24
-+	_ins	$t9,24
-+	_ins	$t10,24
-+	_ins	$t11,24
-+
-+	lw	$s0,0($key0)
-+	lw	$s1,4($key0)
-+	lw	$s2,8($key0)
-+	lw	$s3,12($key0)
-+
-+	xor	$t0,$t8
-+	xor	$t1,$t9
-+	xor	$t2,$t10
-+	xor	$t3,$t11
-+
-+	_ins	$t4,0
-+	_ins	$t5,0
-+	_ins	$t6,0
-+	_ins	$t7,0
-+
-+	xor	$t0,$t4
-+	xor	$t1,$t5
-+	xor	$t2,$t6
-+	xor	$t3,$t7
-+
-+	xor	$s0,$t0
-+	xor	$s1,$t1
-+	xor	$s2,$t2
-+	xor	$s3,$t3
-+
-+	jr	$ra
-+.end	_mips_AES_encrypt
-+
-+.align	5
-+.globl	AES_encrypt
-+.ent	AES_encrypt
-+AES_encrypt:
-+	.frame	$sp,$FRAMESIZE,$ra
-+	.mask	$SAVED_REGS_MASK,-$SZREG
-+	.set	noreorder
-+___
-+$code.=<<___ if ($flavour =~ /o32/i);	# o32 PIC-ification
-+	.cpload	$pf
-+___
-+$code.=<<___;
-+	$PTR_SUB $sp,$FRAMESIZE
-+	$REG_S	$ra,$FRAMESIZE-1*$SZREG($sp)
-+	$REG_S	$fp,$FRAMESIZE-2*$SZREG($sp)
-+	$REG_S	$s11,$FRAMESIZE-3*$SZREG($sp)
-+	$REG_S	$s10,$FRAMESIZE-4*$SZREG($sp)
-+	$REG_S	$s9,$FRAMESIZE-5*$SZREG($sp)
-+	$REG_S	$s8,$FRAMESIZE-6*$SZREG($sp)
-+	$REG_S	$s7,$FRAMESIZE-7*$SZREG($sp)
-+	$REG_S	$s6,$FRAMESIZE-8*$SZREG($sp)
-+	$REG_S	$s5,$FRAMESIZE-9*$SZREG($sp)
-+	$REG_S	$s4,$FRAMESIZE-10*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
-+	$REG_S	\$15,$FRAMESIZE-11*$SZREG($sp)
-+	$REG_S	\$14,$FRAMESIZE-12*$SZREG($sp)
-+	$REG_S	\$13,$FRAMESIZE-13*$SZREG($sp)
-+	$REG_S	\$12,$FRAMESIZE-14*$SZREG($sp)
-+	$REG_S	$gp,$FRAMESIZE-15*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour !~ /o32/i);	# non-o32 PIC-ification
-+	.cplocal	$Tbl
-+	.cpsetup	$pf,$zero,AES_encrypt
-+___
-+$code.=<<___;
-+	.set	reorder
-+	la	$Tbl,AES_Te		# PIC-ified 'load address'
-+
-+	lwl	$s0,0+$MSB($inp)
-+	lwl	$s1,4+$MSB($inp)
-+	lwl	$s2,8+$MSB($inp)
-+	lwl	$s3,12+$MSB($inp)
-+	lwr	$s0,0+$LSB($inp)
-+	lwr	$s1,4+$LSB($inp)
-+	lwr	$s2,8+$LSB($inp)
-+	lwr	$s3,12+$LSB($inp)
-+
-+	bal	_mips_AES_encrypt
-+
-+	swr	$s0,0+$LSB($out)
-+	swr	$s1,4+$LSB($out)
-+	swr	$s2,8+$LSB($out)
-+	swr	$s3,12+$LSB($out)
-+	swl	$s0,0+$MSB($out)
-+	swl	$s1,4+$MSB($out)
-+	swl	$s2,8+$MSB($out)
-+	swl	$s3,12+$MSB($out)
-+
-+	.set	noreorder
-+	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
-+	$REG_L	$fp,$FRAMESIZE-2*$SZREG($sp)
-+	$REG_L	$s11,$FRAMESIZE-3*$SZREG($sp)
-+	$REG_L	$s10,$FRAMESIZE-4*$SZREG($sp)
-+	$REG_L	$s9,$FRAMESIZE-5*$SZREG($sp)
-+	$REG_L	$s8,$FRAMESIZE-6*$SZREG($sp)
-+	$REG_L	$s7,$FRAMESIZE-7*$SZREG($sp)
-+	$REG_L	$s6,$FRAMESIZE-8*$SZREG($sp)
-+	$REG_L	$s5,$FRAMESIZE-9*$SZREG($sp)
-+	$REG_L	$s4,$FRAMESIZE-10*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+	$REG_L	\$15,$FRAMESIZE-11*$SZREG($sp)
-+	$REG_L	\$14,$FRAMESIZE-12*$SZREG($sp)
-+	$REG_L	\$13,$FRAMESIZE-13*$SZREG($sp)
-+	$REG_L	\$12,$FRAMESIZE-14*$SZREG($sp)
-+	$REG_L	$gp,$FRAMESIZE-15*$SZREG($sp)
-+___
-+$code.=<<___;
-+	jr	$ra
-+	$PTR_ADD $sp,$FRAMESIZE
-+.end	AES_encrypt
-+___
-+
-+$code.=<<___;
-+.align	5
-+.ent	_mips_AES_decrypt
-+_mips_AES_decrypt:
-+	.frame	$sp,0,$ra
-+	.set	reorder
-+	lw	$t0,0($key)
-+	lw	$t1,4($key)
-+	lw	$t2,8($key)
-+	lw	$t3,12($key)
-+	lw	$cnt,240($key)
-+	$PTR_ADD $key0,$key,16
-+
-+	xor	$s0,$t0
-+	xor	$s1,$t1
-+	xor	$s2,$t2
-+	xor	$s3,$t3
-+
-+	sub	$cnt,1
-+	_xtr	$i0,$s3,16-2
-+.Loop_dec:
-+	_xtr	$i1,$s0,16-2
-+	_xtr	$i2,$s1,16-2
-+	_xtr	$i3,$s2,16-2
-+	and	$i0,0x3fc
-+	and	$i1,0x3fc
-+	and	$i2,0x3fc
-+	and	$i3,0x3fc
-+	$PTR_ADD $i0,$Tbl
-+	$PTR_ADD $i1,$Tbl
-+	$PTR_ADD $i2,$Tbl
-+	$PTR_ADD $i3,$Tbl
-+	lwl	$t0,3($i0)		# Td1[s3>>16]
-+	lwl	$t1,3($i1)		# Td1[s0>>16]
-+	lwl	$t2,3($i2)		# Td1[s1>>16]
-+	lwl	$t3,3($i3)		# Td1[s2>>16]
-+	lwr	$t0,2($i0)		# Td1[s3>>16]
-+	lwr	$t1,2($i1)		# Td1[s0>>16]
-+	lwr	$t2,2($i2)		# Td1[s1>>16]
-+	lwr	$t3,2($i3)		# Td1[s2>>16]
-+
-+	_xtr	$i0,$s2,8-2
-+	_xtr	$i1,$s3,8-2
-+	_xtr	$i2,$s0,8-2
-+	_xtr	$i3,$s1,8-2
-+	and	$i0,0x3fc
-+	and	$i1,0x3fc
-+	and	$i2,0x3fc
-+	and	$i3,0x3fc
-+	$PTR_ADD $i0,$Tbl
-+	$PTR_ADD $i1,$Tbl
-+	$PTR_ADD $i2,$Tbl
-+	$PTR_ADD $i3,$Tbl
-+	lwl	$t4,2($i0)		# Td2[s2>>8]
-+	lwl	$t5,2($i1)		# Td2[s3>>8]
-+	lwl	$t6,2($i2)		# Td2[s0>>8]
-+	lwl	$t7,2($i3)		# Td2[s1>>8]
-+	lwr	$t4,1($i0)		# Td2[s2>>8]
-+	lwr	$t5,1($i1)		# Td2[s3>>8]
-+	lwr	$t6,1($i2)		# Td2[s0>>8]
-+	lwr	$t7,1($i3)		# Td2[s1>>8]
-+
-+	_xtr	$i0,$s1,0-2
-+	_xtr	$i1,$s2,0-2
-+	_xtr	$i2,$s3,0-2
-+	_xtr	$i3,$s0,0-2
-+	and	$i0,0x3fc
-+	and	$i1,0x3fc
-+	and	$i2,0x3fc
-+	and	$i3,0x3fc
-+	$PTR_ADD $i0,$Tbl
-+	$PTR_ADD $i1,$Tbl
-+	$PTR_ADD $i2,$Tbl
-+	$PTR_ADD $i3,$Tbl
-+	lwl	$t8,1($i0)		# Td3[s1]
-+	lwl	$t9,1($i1)		# Td3[s2]
-+	lwl	$t10,1($i2)		# Td3[s3]
-+	lwl	$t11,1($i3)		# Td3[s0]
-+	lwr	$t8,0($i0)		# Td3[s1]
-+	lwr	$t9,0($i1)		# Td3[s2]
-+	lwr	$t10,0($i2)		# Td3[s3]
-+	lwr	$t11,0($i3)		# Td3[s0]
-+
-+	_xtr	$i0,$s0,24-2
-+	_xtr	$i1,$s1,24-2
-+	_xtr	$i2,$s2,24-2
-+	_xtr	$i3,$s3,24-2
-+	and	$i0,0x3fc
-+	and	$i1,0x3fc
-+	and	$i2,0x3fc
-+	and	$i3,0x3fc
-+	$PTR_ADD $i0,$Tbl
-+	$PTR_ADD $i1,$Tbl
-+	$PTR_ADD $i2,$Tbl
-+	$PTR_ADD $i3,$Tbl
-+
-+	xor	$t0,$t4
-+	xor	$t1,$t5
-+	xor	$t2,$t6
-+	xor	$t3,$t7
-+
-+
-+	lw	$t4,0($i0)		# Td0[s0>>24]
-+	lw	$t5,0($i1)		# Td0[s1>>24]
-+	lw	$t6,0($i2)		# Td0[s2>>24]
-+	lw	$t7,0($i3)		# Td0[s3>>24]
-+
-+	lw	$s0,0($key0)
-+	lw	$s1,4($key0)
-+	lw	$s2,8($key0)
-+	lw	$s3,12($key0)
-+
-+	xor	$t0,$t8
-+	xor	$t1,$t9
-+	xor	$t2,$t10
-+	xor	$t3,$t11
-+
-+	xor	$t0,$t4
-+	xor	$t1,$t5
-+	xor	$t2,$t6
-+	xor	$t3,$t7
-+
-+	sub	$cnt,1
-+	$PTR_ADD $key0,16
-+	xor	$s0,$t0
-+	xor	$s1,$t1
-+	xor	$s2,$t2
-+	xor	$s3,$t3
-+	.set	noreorder
-+	bnez	$cnt,.Loop_dec
-+	_xtr	$i0,$s3,16-2
-+
-+	.set	reorder
-+	lw	$t4,1024($Tbl)		# prefetch Td4
-+	lw	$t5,1024+32($Tbl)
-+	lw	$t6,1024+64($Tbl)
-+	lw	$t7,1024+96($Tbl)
-+	lw	$t8,1024+128($Tbl)
-+	lw	$t9,1024+160($Tbl)
-+	lw	$t10,1024+192($Tbl)
-+	lw	$t11,1024+224($Tbl)
-+
-+	_xtr	$i0,$s3,16
-+	_xtr	$i1,$s0,16
-+	_xtr	$i2,$s1,16
-+	_xtr	$i3,$s2,16
-+	and	$i0,0xff
-+	and	$i1,0xff
-+	and	$i2,0xff
-+	and	$i3,0xff
-+	$PTR_ADD $i0,$Tbl
-+	$PTR_ADD $i1,$Tbl
-+	$PTR_ADD $i2,$Tbl
-+	$PTR_ADD $i3,$Tbl
-+	lbu	$t0,1024($i0)		# Td4[s3>>16]
-+	lbu	$t1,1024($i1)		# Td4[s0>>16]
-+	lbu	$t2,1024($i2)		# Td4[s1>>16]
-+	lbu	$t3,1024($i3)		# Td4[s2>>16]
-+
-+	_xtr	$i0,$s2,8
-+	_xtr	$i1,$s3,8
-+	_xtr	$i2,$s0,8
-+	_xtr	$i3,$s1,8
-+	and	$i0,0xff
-+	and	$i1,0xff
-+	and	$i2,0xff
-+	and	$i3,0xff
-+	$PTR_ADD $i0,$Tbl
-+	$PTR_ADD $i1,$Tbl
-+	$PTR_ADD $i2,$Tbl
-+	$PTR_ADD $i3,$Tbl
-+	lbu	$t4,1024($i0)		# Td4[s2>>8]
-+	lbu	$t5,1024($i1)		# Td4[s3>>8]
-+	lbu	$t6,1024($i2)		# Td4[s0>>8]
-+	lbu	$t7,1024($i3)		# Td4[s1>>8]
-+
-+	_xtr	$i0,$s0,24
-+	_xtr	$i1,$s1,24
-+	_xtr	$i2,$s2,24
-+	_xtr	$i3,$s3,24
-+	$PTR_ADD $i0,$Tbl
-+	$PTR_ADD $i1,$Tbl
-+	$PTR_ADD $i2,$Tbl
-+	$PTR_ADD $i3,$Tbl
-+	lbu	$t8,1024($i0)		# Td4[s0>>24]
-+	lbu	$t9,1024($i1)		# Td4[s1>>24]
-+	lbu	$t10,1024($i2)		# Td4[s2>>24]
-+	lbu	$t11,1024($i3)		# Td4[s3>>24]
-+
-+	_xtr	$i0,$s1,0
-+	_xtr	$i1,$s2,0
-+	_xtr	$i2,$s3,0
-+	_xtr	$i3,$s0,0
-+
-+	_ins	$t0,16
-+	_ins	$t1,16
-+	_ins	$t2,16
-+	_ins	$t3,16
-+
-+	_ins	$t4,8
-+	_ins	$t5,8
-+	_ins	$t6,8
-+	_ins	$t7,8
-+
-+	xor	$t0,$t4
-+	xor	$t1,$t5
-+	xor	$t2,$t6
-+	xor	$t3,$t7
-+
-+	$PTR_ADD $i0,$Tbl
-+	$PTR_ADD $i1,$Tbl
-+	$PTR_ADD $i2,$Tbl
-+	$PTR_ADD $i3,$Tbl
-+	lbu	$t4,1024($i0)		# Td4[s1]
-+	lbu	$t5,1024($i1)		# Td4[s2]
-+	lbu	$t6,1024($i2)		# Td4[s3]
-+	lbu	$t7,1024($i3)		# Td4[s0]
-+
-+	_ins	$t8,24
-+	_ins	$t9,24
-+	_ins	$t10,24
-+	_ins	$t11,24
-+
-+	lw	$s0,0($key0)
-+	lw	$s1,4($key0)
-+	lw	$s2,8($key0)
-+	lw	$s3,12($key0)
-+
-+	_ins	$t4,0
-+	_ins	$t5,0
-+	_ins	$t6,0
-+	_ins	$t7,0
-+
-+
-+	xor	$t0,$t8
-+	xor	$t1,$t9
-+	xor	$t2,$t10
-+	xor	$t3,$t11
-+
-+	xor	$t0,$t4
-+	xor	$t1,$t5
-+	xor	$t2,$t6
-+	xor	$t3,$t7
-+
-+	xor	$s0,$t0
-+	xor	$s1,$t1
-+	xor	$s2,$t2
-+	xor	$s3,$t3
-+
-+	jr	$ra
-+.end	_mips_AES_decrypt
-+
-+.align	5
-+.globl	AES_decrypt
-+.ent	AES_decrypt
-+AES_decrypt:
-+	.frame	$sp,$FRAMESIZE,$ra
-+	.mask	$SAVED_REGS_MASK,-$SZREG
-+	.set	noreorder
-+___
-+$code.=<<___ if ($flavour =~ /o32/i);	# o32 PIC-ification
-+	.cpload	$pf
-+___
-+$code.=<<___;
-+	$PTR_SUB $sp,$FRAMESIZE
-+	$REG_S	$ra,$FRAMESIZE-1*$SZREG($sp)
-+	$REG_S	$fp,$FRAMESIZE-2*$SZREG($sp)
-+	$REG_S	$s11,$FRAMESIZE-3*$SZREG($sp)
-+	$REG_S	$s10,$FRAMESIZE-4*$SZREG($sp)
-+	$REG_S	$s9,$FRAMESIZE-5*$SZREG($sp)
-+	$REG_S	$s8,$FRAMESIZE-6*$SZREG($sp)
-+	$REG_S	$s7,$FRAMESIZE-7*$SZREG($sp)
-+	$REG_S	$s6,$FRAMESIZE-8*$SZREG($sp)
-+	$REG_S	$s5,$FRAMESIZE-9*$SZREG($sp)
-+	$REG_S	$s4,$FRAMESIZE-10*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
-+	$REG_S	\$15,$FRAMESIZE-11*$SZREG($sp)
-+	$REG_S	\$14,$FRAMESIZE-12*$SZREG($sp)
-+	$REG_S	\$13,$FRAMESIZE-13*$SZREG($sp)
-+	$REG_S	\$12,$FRAMESIZE-14*$SZREG($sp)
-+	$REG_S	$gp,$FRAMESIZE-15*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour !~ /o32/i);	# non-o32 PIC-ification
-+	.cplocal	$Tbl
-+	.cpsetup	$pf,$zero,AES_decrypt
-+___
-+$code.=<<___;
-+	.set	reorder
-+	la	$Tbl,AES_Td		# PIC-ified 'load address'
-+
-+	lwl	$s0,0+$MSB($inp)
-+	lwl	$s1,4+$MSB($inp)
-+	lwl	$s2,8+$MSB($inp)
-+	lwl	$s3,12+$MSB($inp)
-+	lwr	$s0,0+$LSB($inp)
-+	lwr	$s1,4+$LSB($inp)
-+	lwr	$s2,8+$LSB($inp)
-+	lwr	$s3,12+$LSB($inp)
-+
-+	bal	_mips_AES_decrypt
-+
-+	swr	$s0,0+$LSB($out)
-+	swr	$s1,4+$LSB($out)
-+	swr	$s2,8+$LSB($out)
-+	swr	$s3,12+$LSB($out)
-+	swl	$s0,0+$MSB($out)
-+	swl	$s1,4+$MSB($out)
-+	swl	$s2,8+$MSB($out)
-+	swl	$s3,12+$MSB($out)
-+
-+	.set	noreorder
-+	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
-+	$REG_L	$fp,$FRAMESIZE-2*$SZREG($sp)
-+	$REG_L	$s11,$FRAMESIZE-3*$SZREG($sp)
-+	$REG_L	$s10,$FRAMESIZE-4*$SZREG($sp)
-+	$REG_L	$s9,$FRAMESIZE-5*$SZREG($sp)
-+	$REG_L	$s8,$FRAMESIZE-6*$SZREG($sp)
-+	$REG_L	$s7,$FRAMESIZE-7*$SZREG($sp)
-+	$REG_L	$s6,$FRAMESIZE-8*$SZREG($sp)
-+	$REG_L	$s5,$FRAMESIZE-9*$SZREG($sp)
-+	$REG_L	$s4,$FRAMESIZE-10*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+	$REG_L	\$15,$FRAMESIZE-11*$SZREG($sp)
-+	$REG_L	\$14,$FRAMESIZE-12*$SZREG($sp)
-+	$REG_L	\$13,$FRAMESIZE-13*$SZREG($sp)
-+	$REG_L	\$12,$FRAMESIZE-14*$SZREG($sp)
-+	$REG_L	$gp,$FRAMESIZE-15*$SZREG($sp)
-+___
-+$code.=<<___;
-+	jr	$ra
-+	$PTR_ADD $sp,$FRAMESIZE
-+.end	AES_decrypt
-+___
-+}}}
-+
-+{{{
-+my $FRAMESIZE=8*$SZREG;
-+my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc000f008 : 0xc0000000;
-+
-+my ($inp,$bits,$key,$Tbl)=($a0,$a1,$a2,$a3);
-+my ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
-+my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
-+my ($rcon,$cnt)=($gp,$fp);
-+
-+$code.=<<___;
-+.align	5
-+.ent	_mips_AES_set_encrypt_key
-+_mips_AES_set_encrypt_key:
-+	.frame	$sp,0,$ra
-+	.set	noreorder
-+	beqz	$inp,.Lekey_done
-+	li	$t0,-1
-+	beqz	$key,.Lekey_done
-+	$PTR_ADD $rcon,$Tbl,1024+256
-+
-+	.set	reorder
-+	lwl	$rk0,0+$MSB($inp)	# load 128 bits
-+	lwl	$rk1,4+$MSB($inp)
-+	lwl	$rk2,8+$MSB($inp)
-+	lwl	$rk3,12+$MSB($inp)
-+	li	$at,128
-+	lwr	$rk0,0+$LSB($inp)
-+	lwr	$rk1,4+$LSB($inp)
-+	lwr	$rk2,8+$LSB($inp)
-+	lwr	$rk3,12+$LSB($inp)
-+	.set	noreorder
-+	beq	$bits,$at,.L128bits
-+	li	$cnt,10
-+
-+	.set	reorder
-+	lwl	$rk4,16+$MSB($inp)	# load 192 bits
-+	lwl	$rk5,20+$MSB($inp)
-+	li	$at,192
-+	lwr	$rk4,16+$LSB($inp)
-+	lwr	$rk5,20+$LSB($inp)
-+	.set	noreorder
-+	beq	$bits,$at,.L192bits
-+	li	$cnt,8
-+
-+	.set	reorder
-+	lwl	$rk6,24+$MSB($inp)	# load 256 bits
-+	lwl	$rk7,28+$MSB($inp)
-+	li	$at,256
-+	lwr	$rk6,24+$LSB($inp)
-+	lwr	$rk7,28+$LSB($inp)
-+	.set	noreorder
-+	beq	$bits,$at,.L256bits
-+	li	$cnt,7
-+
-+	b	.Lekey_done
-+	li	$t0,-2
-+
-+.align	4
-+.L128bits:
-+	.set	reorder
-+	srl	$i0,$rk3,16
-+	srl	$i1,$rk3,8
-+	and	$i0,0xff
-+	and	$i1,0xff
-+	and	$i2,$rk3,0xff
-+	srl	$i3,$rk3,24
-+	$PTR_ADD $i0,$Tbl
-+	$PTR_ADD $i1,$Tbl
-+	$PTR_ADD $i2,$Tbl
-+	$PTR_ADD $i3,$Tbl
-+	lbu	$i0,1024($i0)
-+	lbu	$i1,1024($i1)
-+	lbu	$i2,1024($i2)
-+	lbu	$i3,1024($i3)
-+
-+	sw	$rk0,0($key)
-+	sw	$rk1,4($key)
-+	sw	$rk2,8($key)
-+	sw	$rk3,12($key)
-+	sub	$cnt,1
-+	$PTR_ADD $key,16
-+
-+	_bias	$i0,24
-+	_bias	$i1,16
-+	_bias	$i2,8
-+	_bias	$i3,0
-+
-+	xor	$rk0,$i0
-+	lw	$i0,0($rcon)
-+	xor	$rk0,$i1
-+	xor	$rk0,$i2
-+	xor	$rk0,$i3
-+	xor	$rk0,$i0
-+
-+	xor	$rk1,$rk0
-+	xor	$rk2,$rk1
-+	xor	$rk3,$rk2
-+
-+	.set	noreorder
-+	bnez	$cnt,.L128bits
-+	$PTR_ADD $rcon,4
-+
-+	sw	$rk0,0($key)
-+	sw	$rk1,4($key)
-+	sw	$rk2,8($key)
-+	li	$cnt,10
-+	sw	$rk3,12($key)
-+	li	$t0,0
-+	sw	$cnt,80($key)
-+	b	.Lekey_done
-+	$PTR_SUB $key,10*16
-+
-+.align	4
-+.L192bits:
-+	.set	reorder
-+	srl	$i0,$rk5,16
-+	srl	$i1,$rk5,8
-+	and	$i0,0xff
-+	and	$i1,0xff
-+	and	$i2,$rk5,0xff
-+	srl	$i3,$rk5,24
-+	$PTR_ADD $i0,$Tbl
-+	$PTR_ADD $i1,$Tbl
-+	$PTR_ADD $i2,$Tbl
-+	$PTR_ADD $i3,$Tbl
-+	lbu	$i0,1024($i0)
-+	lbu	$i1,1024($i1)
-+	lbu	$i2,1024($i2)
-+	lbu	$i3,1024($i3)
-+
-+	sw	$rk0,0($key)
-+	sw	$rk1,4($key)
-+	sw	$rk2,8($key)
-+	sw	$rk3,12($key)
-+	sw	$rk4,16($key)
-+	sw	$rk5,20($key)
-+	sub	$cnt,1
-+	$PTR_ADD $key,24
-+
-+	_bias	$i0,24
-+	_bias	$i1,16
-+	_bias	$i2,8
-+	_bias	$i3,0
-+
-+	xor	$rk0,$i0
-+	lw	$i0,0($rcon)
-+	xor	$rk0,$i1
-+	xor	$rk0,$i2
-+	xor	$rk0,$i3
-+	xor	$rk0,$i0
-+
-+	xor	$rk1,$rk0
-+	xor	$rk2,$rk1
-+	xor	$rk3,$rk2
-+	xor	$rk4,$rk3
-+	xor	$rk5,$rk4
-+
-+	.set	noreorder
-+	bnez	$cnt,.L192bits
-+	$PTR_ADD $rcon,4
-+
-+	sw	$rk0,0($key)
-+	sw	$rk1,4($key)
-+	sw	$rk2,8($key)
-+	li	$cnt,12
-+	sw	$rk3,12($key)
-+	li	$t0,0
-+	sw	$cnt,48($key)
-+	b	.Lekey_done
-+	$PTR_SUB $key,12*16
-+
-+.align	4
-+.L256bits:
-+	.set	reorder
-+	srl	$i0,$rk7,16
-+	srl	$i1,$rk7,8
-+	and	$i0,0xff
-+	and	$i1,0xff
-+	and	$i2,$rk7,0xff
-+	srl	$i3,$rk7,24
-+	$PTR_ADD $i0,$Tbl
-+	$PTR_ADD $i1,$Tbl
-+	$PTR_ADD $i2,$Tbl
-+	$PTR_ADD $i3,$Tbl
-+	lbu	$i0,1024($i0)
-+	lbu	$i1,1024($i1)
-+	lbu	$i2,1024($i2)
-+	lbu	$i3,1024($i3)
-+
-+	sw	$rk0,0($key)
-+	sw	$rk1,4($key)
-+	sw	$rk2,8($key)
-+	sw	$rk3,12($key)
-+	sw	$rk4,16($key)
-+	sw	$rk5,20($key)
-+	sw	$rk6,24($key)
-+	sw	$rk7,28($key)
-+	sub	$cnt,1
-+
-+	_bias	$i0,24
-+	_bias	$i1,16
-+	_bias	$i2,8
-+	_bias	$i3,0
-+
-+	xor	$rk0,$i0
-+	lw	$i0,0($rcon)
-+	xor	$rk0,$i1
-+	xor	$rk0,$i2
-+	xor	$rk0,$i3
-+	xor	$rk0,$i0
-+
-+	xor	$rk1,$rk0
-+	xor	$rk2,$rk1
-+	xor	$rk3,$rk2
-+	beqz	$cnt,.L256bits_done
-+
-+	srl	$i0,$rk3,24
-+	srl	$i1,$rk3,16
-+	srl	$i2,$rk3,8
-+	and	$i3,$rk3,0xff
-+	and	$i1,0xff
-+	and	$i2,0xff
-+	$PTR_ADD $i0,$Tbl
-+	$PTR_ADD $i1,$Tbl
-+	$PTR_ADD $i2,$Tbl
-+	$PTR_ADD $i3,$Tbl
-+	lbu	$i0,1024($i0)
-+	lbu	$i1,1024($i1)
-+	lbu	$i2,1024($i2)
-+	lbu	$i3,1024($i3)
-+	sll	$i0,24
-+	sll	$i1,16
-+	sll	$i2,8
-+
-+	xor	$rk4,$i0
-+	xor	$rk4,$i1
-+	xor	$rk4,$i2
-+	xor	$rk4,$i3
-+
-+	xor	$rk5,$rk4
-+	xor	$rk6,$rk5
-+	xor	$rk7,$rk6
-+
-+	$PTR_ADD $key,32
-+	.set	noreorder
-+	b	.L256bits
-+	$PTR_ADD $rcon,4
-+
-+.L256bits_done:
-+	sw	$rk0,32($key)
-+	sw	$rk1,36($key)
-+	sw	$rk2,40($key)
-+	li	$cnt,14
-+	sw	$rk3,44($key)
-+	li	$t0,0
-+	sw	$cnt,48($key)
-+	$PTR_SUB $key,12*16
-+
-+.Lekey_done:
-+	jr	$ra
-+	nop
-+.end	_mips_AES_set_encrypt_key
-+
-+.globl	AES_set_encrypt_key
-+.ent	AES_set_encrypt_key
-+AES_set_encrypt_key:
-+	.frame	$sp,$FRAMESIZE,$ra
-+	.mask	$SAVED_REGS_MASK,-$SZREG
-+	.set	noreorder
-+___
-+$code.=<<___ if ($flavour =~ /o32/i);	# o32 PIC-ification
-+	.cpload	$pf
-+___
-+$code.=<<___;
-+	$PTR_SUB $sp,$FRAMESIZE
-+	$REG_S	$ra,$FRAMESIZE-1*$SZREG($sp)
-+	$REG_S	$fp,$FRAMESIZE-2*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
-+	$REG_S	$s3,$FRAMESIZE-3*$SZREG($sp)
-+	$REG_S	$s2,$FRAMESIZE-4*$SZREG($sp)
-+	$REG_S	$s1,$FRAMESIZE-5*$SZREG($sp)
-+	$REG_S	$s0,$FRAMESIZE-6*$SZREG($sp)
-+	$REG_S	$gp,$FRAMESIZE-7*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour !~ /o32/i);	# non-o32 PIC-ification
-+	.cplocal	$Tbl
-+	.cpsetup	$pf,$zero,AES_set_encrypt_key
-+___
-+$code.=<<___;
-+	.set	reorder
-+	la	$Tbl,AES_Te		# PIC-ified 'load address'
-+
-+	bal	_mips_AES_set_encrypt_key
-+
-+	.set	noreorder
-+	move	$a0,$t0
-+	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
-+	$REG_L	$fp,$FRAMESIZE-2*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+	$REG_L	$s3,$FRAMESIZE-11*$SZREG($sp)
-+	$REG_L	$s2,$FRAMESIZE-12*$SZREG($sp)
-+	$REG_L	$s1,$FRAMESIZE-13*$SZREG($sp)
-+	$REG_L	$s0,$FRAMESIZE-14*$SZREG($sp)
-+	$REG_L	$gp,$FRAMESIZE-15*$SZREG($sp)
-+___
-+$code.=<<___;
-+	jr	$ra
-+	$PTR_ADD $sp,$FRAMESIZE
-+.end	AES_set_encrypt_key
-+___
-+
-+my ($head,$tail)=($inp,$bits);
-+my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
-+my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2);
-+$code.=<<___;
-+.align	5
-+.globl	AES_set_decrypt_key
-+.ent	AES_set_decrypt_key
-+AES_set_decrypt_key:
-+	.frame	$sp,$FRAMESIZE,$ra
-+	.mask	$SAVED_REGS_MASK,-$SZREG
-+	.set	noreorder
-+___
-+$code.=<<___ if ($flavour =~ /o32/i);	# o32 PIC-ification
-+	.cpload	$pf
-+___
-+$code.=<<___;
-+	$PTR_SUB $sp,$FRAMESIZE
-+	$REG_S	$ra,$FRAMESIZE-1*$SZREG($sp)
-+	$REG_S	$fp,$FRAMESIZE-2*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
-+	$REG_S	$s3,$FRAMESIZE-3*$SZREG($sp)
-+	$REG_S	$s2,$FRAMESIZE-4*$SZREG($sp)
-+	$REG_S	$s1,$FRAMESIZE-5*$SZREG($sp)
-+	$REG_S	$s0,$FRAMESIZE-6*$SZREG($sp)
-+	$REG_S	$gp,$FRAMESIZE-7*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour !~ /o32/i);	# non-o32 PIC-ification
-+	.cplocal	$Tbl
-+	.cpsetup	$pf,$zero,AES_set_decrypt_key
-+___
-+$code.=<<___;
-+	.set	reorder
-+	la	$Tbl,AES_Te		# PIC-ified 'load address'
-+
-+	bal	_mips_AES_set_encrypt_key
-+
-+	bltz	$t0,.Ldkey_done
-+
-+	sll	$at,$cnt,4
-+	$PTR_ADD $head,$key,0
-+	$PTR_ADD $tail,$key,$at
-+.align	4
-+.Lswap:
-+	lw	$rk0,0($head)
-+	lw	$rk1,4($head)
-+	lw	$rk2,8($head)
-+	lw	$rk3,12($head)
-+	lw	$rk4,0($tail)
-+	lw	$rk5,4($tail)
-+	lw	$rk6,8($tail)
-+	lw	$rk7,12($tail)
-+	sw	$rk0,0($tail)
-+	sw	$rk1,4($tail)
-+	sw	$rk2,8($tail)
-+	sw	$rk3,12($tail)
-+	$PTR_ADD $head,16
-+	$PTR_SUB $tail,16
-+	sw	$rk4,-16($head)
-+	sw	$rk5,-12($head)
-+	sw	$rk6,-8($head)
-+	sw	$rk7,-4($head)
-+	bne	$head,$tail,.Lswap
-+
-+	lw	$tp1,16($key)		# modulo-scheduled
-+	lui	$x80808080,0x8080
-+	sub	$cnt,1
-+	or	$x80808080,0x8080
-+	sll	$cnt,2
-+	$PTR_ADD $key,16
-+	lui	$x1b1b1b1b,0x1b1b
-+	nor	$x7f7f7f7f,$zero,$x80808080
-+	or	$x1b1b1b1b,0x1b1b
-+.align	4
-+.Lmix:
-+	and	$m,$tp1,$x80808080
-+	and	$tp2,$tp1,$x7f7f7f7f
-+	srl	$tp4,$m,7
-+	addu	$tp2,$tp2		# tp2<<1
-+	subu	$m,$tp4
-+	and	$m,$x1b1b1b1b
-+	xor	$tp2,$m
-+
-+	and	$m,$tp2,$x80808080
-+	and	$tp4,$tp2,$x7f7f7f7f
-+	srl	$tp8,$m,7
-+	addu	$tp4,$tp4		# tp4<<1
-+	subu	$m,$tp8
-+	and	$m,$x1b1b1b1b
-+	xor	$tp4,$m
-+
-+	and	$m,$tp4,$x80808080
-+	and	$tp8,$tp4,$x7f7f7f7f
-+	srl	$tp9,$m,7
-+	addu	$tp8,$tp8		# tp8<<1
-+	subu	$m,$tp9
-+	and	$m,$x1b1b1b1b
-+	xor	$tp8,$m
-+
-+	xor	$tp9,$tp8,$tp1
-+	xor	$tpe,$tp8,$tp4
-+	xor	$tpb,$tp9,$tp2
-+	xor	$tpd,$tp9,$tp4
-+
-+	_ror	$tp1,$tpd,16
-+	 xor	$tpe,$tp2
-+	_ror	$tp2,$tpd,-16
-+	xor	$tpe,$tp1
-+	_ror	$tp1,$tp9,8
-+	xor	$tpe,$tp2
-+	_ror	$tp2,$tp9,-24
-+	xor	$tpe,$tp1
-+	_ror	$tp1,$tpb,24
-+	xor	$tpe,$tp2
-+	_ror	$tp2,$tpb,-8
-+	xor	$tpe,$tp1
-+	lw	$tp1,4($key)		# modulo-scheduled
-+	xor	$tpe,$tp2
-+	sub	$cnt,1
-+	sw	$tpe,0($key)
-+	$PTR_ADD $key,4
-+	bnez	$cnt,.Lmix
-+
-+	li	$t0,0
-+.Ldkey_done:
-+	.set	noreorder
-+	move	$a0,$t0
-+	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
-+	$REG_L	$fp,$FRAMESIZE-2*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+	$REG_L	$s3,$FRAMESIZE-11*$SZREG($sp)
-+	$REG_L	$s2,$FRAMESIZE-12*$SZREG($sp)
-+	$REG_L	$s1,$FRAMESIZE-13*$SZREG($sp)
-+	$REG_L	$s0,$FRAMESIZE-14*$SZREG($sp)
-+	$REG_L	$gp,$FRAMESIZE-15*$SZREG($sp)
-+___
-+$code.=<<___;
-+	jr	$ra
-+	$PTR_ADD $sp,$FRAMESIZE
-+.end	AES_set_decrypt_key
-+___
-+}}}
-+
-+######################################################################
-+# Tables are kept in endian-neutral manner
-+$code.=<<___;
-+.rdata
-+.align	6
-+AES_Te:
-+.byte	0xc6,0x63,0x63,0xa5,	0xf8,0x7c,0x7c,0x84	# Te0
-+.byte	0xee,0x77,0x77,0x99,	0xf6,0x7b,0x7b,0x8d
-+.byte	0xff,0xf2,0xf2,0x0d,	0xd6,0x6b,0x6b,0xbd
-+.byte	0xde,0x6f,0x6f,0xb1,	0x91,0xc5,0xc5,0x54
-+.byte	0x60,0x30,0x30,0x50,	0x02,0x01,0x01,0x03
-+.byte	0xce,0x67,0x67,0xa9,	0x56,0x2b,0x2b,0x7d
-+.byte	0xe7,0xfe,0xfe,0x19,	0xb5,0xd7,0xd7,0x62
-+.byte	0x4d,0xab,0xab,0xe6,	0xec,0x76,0x76,0x9a
-+.byte	0x8f,0xca,0xca,0x45,	0x1f,0x82,0x82,0x9d
-+.byte	0x89,0xc9,0xc9,0x40,	0xfa,0x7d,0x7d,0x87
-+.byte	0xef,0xfa,0xfa,0x15,	0xb2,0x59,0x59,0xeb
-+.byte	0x8e,0x47,0x47,0xc9,	0xfb,0xf0,0xf0,0x0b
-+.byte	0x41,0xad,0xad,0xec,	0xb3,0xd4,0xd4,0x67
-+.byte	0x5f,0xa2,0xa2,0xfd,	0x45,0xaf,0xaf,0xea
-+.byte	0x23,0x9c,0x9c,0xbf,	0x53,0xa4,0xa4,0xf7
-+.byte	0xe4,0x72,0x72,0x96,	0x9b,0xc0,0xc0,0x5b
-+.byte	0x75,0xb7,0xb7,0xc2,	0xe1,0xfd,0xfd,0x1c
-+.byte	0x3d,0x93,0x93,0xae,	0x4c,0x26,0x26,0x6a
-+.byte	0x6c,0x36,0x36,0x5a,	0x7e,0x3f,0x3f,0x41
-+.byte	0xf5,0xf7,0xf7,0x02,	0x83,0xcc,0xcc,0x4f
-+.byte	0x68,0x34,0x34,0x5c,	0x51,0xa5,0xa5,0xf4
-+.byte	0xd1,0xe5,0xe5,0x34,	0xf9,0xf1,0xf1,0x08
-+.byte	0xe2,0x71,0x71,0x93,	0xab,0xd8,0xd8,0x73
-+.byte	0x62,0x31,0x31,0x53,	0x2a,0x15,0x15,0x3f
-+.byte	0x08,0x04,0x04,0x0c,	0x95,0xc7,0xc7,0x52
-+.byte	0x46,0x23,0x23,0x65,	0x9d,0xc3,0xc3,0x5e
-+.byte	0x30,0x18,0x18,0x28,	0x37,0x96,0x96,0xa1
-+.byte	0x0a,0x05,0x05,0x0f,	0x2f,0x9a,0x9a,0xb5
-+.byte	0x0e,0x07,0x07,0x09,	0x24,0x12,0x12,0x36
-+.byte	0x1b,0x80,0x80,0x9b,	0xdf,0xe2,0xe2,0x3d
-+.byte	0xcd,0xeb,0xeb,0x26,	0x4e,0x27,0x27,0x69
-+.byte	0x7f,0xb2,0xb2,0xcd,	0xea,0x75,0x75,0x9f
-+.byte	0x12,0x09,0x09,0x1b,	0x1d,0x83,0x83,0x9e
-+.byte	0x58,0x2c,0x2c,0x74,	0x34,0x1a,0x1a,0x2e
-+.byte	0x36,0x1b,0x1b,0x2d,	0xdc,0x6e,0x6e,0xb2
-+.byte	0xb4,0x5a,0x5a,0xee,	0x5b,0xa0,0xa0,0xfb
-+.byte	0xa4,0x52,0x52,0xf6,	0x76,0x3b,0x3b,0x4d
-+.byte	0xb7,0xd6,0xd6,0x61,	0x7d,0xb3,0xb3,0xce
-+.byte	0x52,0x29,0x29,0x7b,	0xdd,0xe3,0xe3,0x3e
-+.byte	0x5e,0x2f,0x2f,0x71,	0x13,0x84,0x84,0x97
-+.byte	0xa6,0x53,0x53,0xf5,	0xb9,0xd1,0xd1,0x68
-+.byte	0x00,0x00,0x00,0x00,	0xc1,0xed,0xed,0x2c
-+.byte	0x40,0x20,0x20,0x60,	0xe3,0xfc,0xfc,0x1f
-+.byte	0x79,0xb1,0xb1,0xc8,	0xb6,0x5b,0x5b,0xed
-+.byte	0xd4,0x6a,0x6a,0xbe,	0x8d,0xcb,0xcb,0x46
-+.byte	0x67,0xbe,0xbe,0xd9,	0x72,0x39,0x39,0x4b
-+.byte	0x94,0x4a,0x4a,0xde,	0x98,0x4c,0x4c,0xd4
-+.byte	0xb0,0x58,0x58,0xe8,	0x85,0xcf,0xcf,0x4a
-+.byte	0xbb,0xd0,0xd0,0x6b,	0xc5,0xef,0xef,0x2a
-+.byte	0x4f,0xaa,0xaa,0xe5,	0xed,0xfb,0xfb,0x16
-+.byte	0x86,0x43,0x43,0xc5,	0x9a,0x4d,0x4d,0xd7
-+.byte	0x66,0x33,0x33,0x55,	0x11,0x85,0x85,0x94
-+.byte	0x8a,0x45,0x45,0xcf,	0xe9,0xf9,0xf9,0x10
-+.byte	0x04,0x02,0x02,0x06,	0xfe,0x7f,0x7f,0x81
-+.byte	0xa0,0x50,0x50,0xf0,	0x78,0x3c,0x3c,0x44
-+.byte	0x25,0x9f,0x9f,0xba,	0x4b,0xa8,0xa8,0xe3
-+.byte	0xa2,0x51,0x51,0xf3,	0x5d,0xa3,0xa3,0xfe
-+.byte	0x80,0x40,0x40,0xc0,	0x05,0x8f,0x8f,0x8a
-+.byte	0x3f,0x92,0x92,0xad,	0x21,0x9d,0x9d,0xbc
-+.byte	0x70,0x38,0x38,0x48,	0xf1,0xf5,0xf5,0x04
-+.byte	0x63,0xbc,0xbc,0xdf,	0x77,0xb6,0xb6,0xc1
-+.byte	0xaf,0xda,0xda,0x75,	0x42,0x21,0x21,0x63
-+.byte	0x20,0x10,0x10,0x30,	0xe5,0xff,0xff,0x1a
-+.byte	0xfd,0xf3,0xf3,0x0e,	0xbf,0xd2,0xd2,0x6d
-+.byte	0x81,0xcd,0xcd,0x4c,	0x18,0x0c,0x0c,0x14
-+.byte	0x26,0x13,0x13,0x35,	0xc3,0xec,0xec,0x2f
-+.byte	0xbe,0x5f,0x5f,0xe1,	0x35,0x97,0x97,0xa2
-+.byte	0x88,0x44,0x44,0xcc,	0x2e,0x17,0x17,0x39
-+.byte	0x93,0xc4,0xc4,0x57,	0x55,0xa7,0xa7,0xf2
-+.byte	0xfc,0x7e,0x7e,0x82,	0x7a,0x3d,0x3d,0x47
-+.byte	0xc8,0x64,0x64,0xac,	0xba,0x5d,0x5d,0xe7
-+.byte	0x32,0x19,0x19,0x2b,	0xe6,0x73,0x73,0x95
-+.byte	0xc0,0x60,0x60,0xa0,	0x19,0x81,0x81,0x98
-+.byte	0x9e,0x4f,0x4f,0xd1,	0xa3,0xdc,0xdc,0x7f
-+.byte	0x44,0x22,0x22,0x66,	0x54,0x2a,0x2a,0x7e
-+.byte	0x3b,0x90,0x90,0xab,	0x0b,0x88,0x88,0x83
-+.byte	0x8c,0x46,0x46,0xca,	0xc7,0xee,0xee,0x29
-+.byte	0x6b,0xb8,0xb8,0xd3,	0x28,0x14,0x14,0x3c
-+.byte	0xa7,0xde,0xde,0x79,	0xbc,0x5e,0x5e,0xe2
-+.byte	0x16,0x0b,0x0b,0x1d,	0xad,0xdb,0xdb,0x76
-+.byte	0xdb,0xe0,0xe0,0x3b,	0x64,0x32,0x32,0x56
-+.byte	0x74,0x3a,0x3a,0x4e,	0x14,0x0a,0x0a,0x1e
-+.byte	0x92,0x49,0x49,0xdb,	0x0c,0x06,0x06,0x0a
-+.byte	0x48,0x24,0x24,0x6c,	0xb8,0x5c,0x5c,0xe4
-+.byte	0x9f,0xc2,0xc2,0x5d,	0xbd,0xd3,0xd3,0x6e
-+.byte	0x43,0xac,0xac,0xef,	0xc4,0x62,0x62,0xa6
-+.byte	0x39,0x91,0x91,0xa8,	0x31,0x95,0x95,0xa4
-+.byte	0xd3,0xe4,0xe4,0x37,	0xf2,0x79,0x79,0x8b
-+.byte	0xd5,0xe7,0xe7,0x32,	0x8b,0xc8,0xc8,0x43
-+.byte	0x6e,0x37,0x37,0x59,	0xda,0x6d,0x6d,0xb7
-+.byte	0x01,0x8d,0x8d,0x8c,	0xb1,0xd5,0xd5,0x64
-+.byte	0x9c,0x4e,0x4e,0xd2,	0x49,0xa9,0xa9,0xe0
-+.byte	0xd8,0x6c,0x6c,0xb4,	0xac,0x56,0x56,0xfa
-+.byte	0xf3,0xf4,0xf4,0x07,	0xcf,0xea,0xea,0x25
-+.byte	0xca,0x65,0x65,0xaf,	0xf4,0x7a,0x7a,0x8e
-+.byte	0x47,0xae,0xae,0xe9,	0x10,0x08,0x08,0x18
-+.byte	0x6f,0xba,0xba,0xd5,	0xf0,0x78,0x78,0x88
-+.byte	0x4a,0x25,0x25,0x6f,	0x5c,0x2e,0x2e,0x72
-+.byte	0x38,0x1c,0x1c,0x24,	0x57,0xa6,0xa6,0xf1
-+.byte	0x73,0xb4,0xb4,0xc7,	0x97,0xc6,0xc6,0x51
-+.byte	0xcb,0xe8,0xe8,0x23,	0xa1,0xdd,0xdd,0x7c
-+.byte	0xe8,0x74,0x74,0x9c,	0x3e,0x1f,0x1f,0x21
-+.byte	0x96,0x4b,0x4b,0xdd,	0x61,0xbd,0xbd,0xdc
-+.byte	0x0d,0x8b,0x8b,0x86,	0x0f,0x8a,0x8a,0x85
-+.byte	0xe0,0x70,0x70,0x90,	0x7c,0x3e,0x3e,0x42
-+.byte	0x71,0xb5,0xb5,0xc4,	0xcc,0x66,0x66,0xaa
-+.byte	0x90,0x48,0x48,0xd8,	0x06,0x03,0x03,0x05
-+.byte	0xf7,0xf6,0xf6,0x01,	0x1c,0x0e,0x0e,0x12
-+.byte	0xc2,0x61,0x61,0xa3,	0x6a,0x35,0x35,0x5f
-+.byte	0xae,0x57,0x57,0xf9,	0x69,0xb9,0xb9,0xd0
-+.byte	0x17,0x86,0x86,0x91,	0x99,0xc1,0xc1,0x58
-+.byte	0x3a,0x1d,0x1d,0x27,	0x27,0x9e,0x9e,0xb9
-+.byte	0xd9,0xe1,0xe1,0x38,	0xeb,0xf8,0xf8,0x13
-+.byte	0x2b,0x98,0x98,0xb3,	0x22,0x11,0x11,0x33
-+.byte	0xd2,0x69,0x69,0xbb,	0xa9,0xd9,0xd9,0x70
-+.byte	0x07,0x8e,0x8e,0x89,	0x33,0x94,0x94,0xa7
-+.byte	0x2d,0x9b,0x9b,0xb6,	0x3c,0x1e,0x1e,0x22
-+.byte	0x15,0x87,0x87,0x92,	0xc9,0xe9,0xe9,0x20
-+.byte	0x87,0xce,0xce,0x49,	0xaa,0x55,0x55,0xff
-+.byte	0x50,0x28,0x28,0x78,	0xa5,0xdf,0xdf,0x7a
-+.byte	0x03,0x8c,0x8c,0x8f,	0x59,0xa1,0xa1,0xf8
-+.byte	0x09,0x89,0x89,0x80,	0x1a,0x0d,0x0d,0x17
-+.byte	0x65,0xbf,0xbf,0xda,	0xd7,0xe6,0xe6,0x31
-+.byte	0x84,0x42,0x42,0xc6,	0xd0,0x68,0x68,0xb8
-+.byte	0x82,0x41,0x41,0xc3,	0x29,0x99,0x99,0xb0
-+.byte	0x5a,0x2d,0x2d,0x77,	0x1e,0x0f,0x0f,0x11
-+.byte	0x7b,0xb0,0xb0,0xcb,	0xa8,0x54,0x54,0xfc
-+.byte	0x6d,0xbb,0xbb,0xd6,	0x2c,0x16,0x16,0x3a
-+
-+.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5	# Te4
-+.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
-+.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
-+.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
-+.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
-+.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
-+.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
-+.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
-+.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
-+.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
-+.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
-+.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
-+.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
-+.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
-+.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
-+.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
-+.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
-+.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
-+.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
-+.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
-+.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
-+.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
-+.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
-+.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
-+.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
-+.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
-+.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
-+.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
-+.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
-+.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
-+.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
-+.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
-+
-+.byte	0x01,0x00,0x00,0x00,	0x02,0x00,0x00,0x00	# rcon
-+.byte	0x04,0x00,0x00,0x00,	0x08,0x00,0x00,0x00
-+.byte	0x10,0x00,0x00,0x00,	0x20,0x00,0x00,0x00
-+.byte	0x40,0x00,0x00,0x00,	0x80,0x00,0x00,0x00
-+.byte	0x1B,0x00,0x00,0x00,	0x36,0x00,0x00,0x00
-+
-+.align	6
-+AES_Td:
-+.byte	0x51,0xf4,0xa7,0x50,	0x7e,0x41,0x65,0x53	# Td0
-+.byte	0x1a,0x17,0xa4,0xc3,	0x3a,0x27,0x5e,0x96
-+.byte	0x3b,0xab,0x6b,0xcb,	0x1f,0x9d,0x45,0xf1
-+.byte	0xac,0xfa,0x58,0xab,	0x4b,0xe3,0x03,0x93
-+.byte	0x20,0x30,0xfa,0x55,	0xad,0x76,0x6d,0xf6
-+.byte	0x88,0xcc,0x76,0x91,	0xf5,0x02,0x4c,0x25
-+.byte	0x4f,0xe5,0xd7,0xfc,	0xc5,0x2a,0xcb,0xd7
-+.byte	0x26,0x35,0x44,0x80,	0xb5,0x62,0xa3,0x8f
-+.byte	0xde,0xb1,0x5a,0x49,	0x25,0xba,0x1b,0x67
-+.byte	0x45,0xea,0x0e,0x98,	0x5d,0xfe,0xc0,0xe1
-+.byte	0xc3,0x2f,0x75,0x02,	0x81,0x4c,0xf0,0x12
-+.byte	0x8d,0x46,0x97,0xa3,	0x6b,0xd3,0xf9,0xc6
-+.byte	0x03,0x8f,0x5f,0xe7,	0x15,0x92,0x9c,0x95
-+.byte	0xbf,0x6d,0x7a,0xeb,	0x95,0x52,0x59,0xda
-+.byte	0xd4,0xbe,0x83,0x2d,	0x58,0x74,0x21,0xd3
-+.byte	0x49,0xe0,0x69,0x29,	0x8e,0xc9,0xc8,0x44
-+.byte	0x75,0xc2,0x89,0x6a,	0xf4,0x8e,0x79,0x78
-+.byte	0x99,0x58,0x3e,0x6b,	0x27,0xb9,0x71,0xdd
-+.byte	0xbe,0xe1,0x4f,0xb6,	0xf0,0x88,0xad,0x17
-+.byte	0xc9,0x20,0xac,0x66,	0x7d,0xce,0x3a,0xb4
-+.byte	0x63,0xdf,0x4a,0x18,	0xe5,0x1a,0x31,0x82
-+.byte	0x97,0x51,0x33,0x60,	0x62,0x53,0x7f,0x45
-+.byte	0xb1,0x64,0x77,0xe0,	0xbb,0x6b,0xae,0x84
-+.byte	0xfe,0x81,0xa0,0x1c,	0xf9,0x08,0x2b,0x94
-+.byte	0x70,0x48,0x68,0x58,	0x8f,0x45,0xfd,0x19
-+.byte	0x94,0xde,0x6c,0x87,	0x52,0x7b,0xf8,0xb7
-+.byte	0xab,0x73,0xd3,0x23,	0x72,0x4b,0x02,0xe2
-+.byte	0xe3,0x1f,0x8f,0x57,	0x66,0x55,0xab,0x2a
-+.byte	0xb2,0xeb,0x28,0x07,	0x2f,0xb5,0xc2,0x03
-+.byte	0x86,0xc5,0x7b,0x9a,	0xd3,0x37,0x08,0xa5
-+.byte	0x30,0x28,0x87,0xf2,	0x23,0xbf,0xa5,0xb2
-+.byte	0x02,0x03,0x6a,0xba,	0xed,0x16,0x82,0x5c
-+.byte	0x8a,0xcf,0x1c,0x2b,	0xa7,0x79,0xb4,0x92
-+.byte	0xf3,0x07,0xf2,0xf0,	0x4e,0x69,0xe2,0xa1
-+.byte	0x65,0xda,0xf4,0xcd,	0x06,0x05,0xbe,0xd5
-+.byte	0xd1,0x34,0x62,0x1f,	0xc4,0xa6,0xfe,0x8a
-+.byte	0x34,0x2e,0x53,0x9d,	0xa2,0xf3,0x55,0xa0
-+.byte	0x05,0x8a,0xe1,0x32,	0xa4,0xf6,0xeb,0x75
-+.byte	0x0b,0x83,0xec,0x39,	0x40,0x60,0xef,0xaa
-+.byte	0x5e,0x71,0x9f,0x06,	0xbd,0x6e,0x10,0x51
-+.byte	0x3e,0x21,0x8a,0xf9,	0x96,0xdd,0x06,0x3d
-+.byte	0xdd,0x3e,0x05,0xae,	0x4d,0xe6,0xbd,0x46
-+.byte	0x91,0x54,0x8d,0xb5,	0x71,0xc4,0x5d,0x05
-+.byte	0x04,0x06,0xd4,0x6f,	0x60,0x50,0x15,0xff
-+.byte	0x19,0x98,0xfb,0x24,	0xd6,0xbd,0xe9,0x97
-+.byte	0x89,0x40,0x43,0xcc,	0x67,0xd9,0x9e,0x77
-+.byte	0xb0,0xe8,0x42,0xbd,	0x07,0x89,0x8b,0x88
-+.byte	0xe7,0x19,0x5b,0x38,	0x79,0xc8,0xee,0xdb
-+.byte	0xa1,0x7c,0x0a,0x47,	0x7c,0x42,0x0f,0xe9
-+.byte	0xf8,0x84,0x1e,0xc9,	0x00,0x00,0x00,0x00
-+.byte	0x09,0x80,0x86,0x83,	0x32,0x2b,0xed,0x48
-+.byte	0x1e,0x11,0x70,0xac,	0x6c,0x5a,0x72,0x4e
-+.byte	0xfd,0x0e,0xff,0xfb,	0x0f,0x85,0x38,0x56
-+.byte	0x3d,0xae,0xd5,0x1e,	0x36,0x2d,0x39,0x27
-+.byte	0x0a,0x0f,0xd9,0x64,	0x68,0x5c,0xa6,0x21
-+.byte	0x9b,0x5b,0x54,0xd1,	0x24,0x36,0x2e,0x3a
-+.byte	0x0c,0x0a,0x67,0xb1,	0x93,0x57,0xe7,0x0f
-+.byte	0xb4,0xee,0x96,0xd2,	0x1b,0x9b,0x91,0x9e
-+.byte	0x80,0xc0,0xc5,0x4f,	0x61,0xdc,0x20,0xa2
-+.byte	0x5a,0x77,0x4b,0x69,	0x1c,0x12,0x1a,0x16
-+.byte	0xe2,0x93,0xba,0x0a,	0xc0,0xa0,0x2a,0xe5
-+.byte	0x3c,0x22,0xe0,0x43,	0x12,0x1b,0x17,0x1d
-+.byte	0x0e,0x09,0x0d,0x0b,	0xf2,0x8b,0xc7,0xad
-+.byte	0x2d,0xb6,0xa8,0xb9,	0x14,0x1e,0xa9,0xc8
-+.byte	0x57,0xf1,0x19,0x85,	0xaf,0x75,0x07,0x4c
-+.byte	0xee,0x99,0xdd,0xbb,	0xa3,0x7f,0x60,0xfd
-+.byte	0xf7,0x01,0x26,0x9f,	0x5c,0x72,0xf5,0xbc
-+.byte	0x44,0x66,0x3b,0xc5,	0x5b,0xfb,0x7e,0x34
-+.byte	0x8b,0x43,0x29,0x76,	0xcb,0x23,0xc6,0xdc
-+.byte	0xb6,0xed,0xfc,0x68,	0xb8,0xe4,0xf1,0x63
-+.byte	0xd7,0x31,0xdc,0xca,	0x42,0x63,0x85,0x10
-+.byte	0x13,0x97,0x22,0x40,	0x84,0xc6,0x11,0x20
-+.byte	0x85,0x4a,0x24,0x7d,	0xd2,0xbb,0x3d,0xf8
-+.byte	0xae,0xf9,0x32,0x11,	0xc7,0x29,0xa1,0x6d
-+.byte	0x1d,0x9e,0x2f,0x4b,	0xdc,0xb2,0x30,0xf3
-+.byte	0x0d,0x86,0x52,0xec,	0x77,0xc1,0xe3,0xd0
-+.byte	0x2b,0xb3,0x16,0x6c,	0xa9,0x70,0xb9,0x99
-+.byte	0x11,0x94,0x48,0xfa,	0x47,0xe9,0x64,0x22
-+.byte	0xa8,0xfc,0x8c,0xc4,	0xa0,0xf0,0x3f,0x1a
-+.byte	0x56,0x7d,0x2c,0xd8,	0x22,0x33,0x90,0xef
-+.byte	0x87,0x49,0x4e,0xc7,	0xd9,0x38,0xd1,0xc1
-+.byte	0x8c,0xca,0xa2,0xfe,	0x98,0xd4,0x0b,0x36
-+.byte	0xa6,0xf5,0x81,0xcf,	0xa5,0x7a,0xde,0x28
-+.byte	0xda,0xb7,0x8e,0x26,	0x3f,0xad,0xbf,0xa4
-+.byte	0x2c,0x3a,0x9d,0xe4,	0x50,0x78,0x92,0x0d
-+.byte	0x6a,0x5f,0xcc,0x9b,	0x54,0x7e,0x46,0x62
-+.byte	0xf6,0x8d,0x13,0xc2,	0x90,0xd8,0xb8,0xe8
-+.byte	0x2e,0x39,0xf7,0x5e,	0x82,0xc3,0xaf,0xf5
-+.byte	0x9f,0x5d,0x80,0xbe,	0x69,0xd0,0x93,0x7c
-+.byte	0x6f,0xd5,0x2d,0xa9,	0xcf,0x25,0x12,0xb3
-+.byte	0xc8,0xac,0x99,0x3b,	0x10,0x18,0x7d,0xa7
-+.byte	0xe8,0x9c,0x63,0x6e,	0xdb,0x3b,0xbb,0x7b
-+.byte	0xcd,0x26,0x78,0x09,	0x6e,0x59,0x18,0xf4
-+.byte	0xec,0x9a,0xb7,0x01,	0x83,0x4f,0x9a,0xa8
-+.byte	0xe6,0x95,0x6e,0x65,	0xaa,0xff,0xe6,0x7e
-+.byte	0x21,0xbc,0xcf,0x08,	0xef,0x15,0xe8,0xe6
-+.byte	0xba,0xe7,0x9b,0xd9,	0x4a,0x6f,0x36,0xce
-+.byte	0xea,0x9f,0x09,0xd4,	0x29,0xb0,0x7c,0xd6
-+.byte	0x31,0xa4,0xb2,0xaf,	0x2a,0x3f,0x23,0x31
-+.byte	0xc6,0xa5,0x94,0x30,	0x35,0xa2,0x66,0xc0
-+.byte	0x74,0x4e,0xbc,0x37,	0xfc,0x82,0xca,0xa6
-+.byte	0xe0,0x90,0xd0,0xb0,	0x33,0xa7,0xd8,0x15
-+.byte	0xf1,0x04,0x98,0x4a,	0x41,0xec,0xda,0xf7
-+.byte	0x7f,0xcd,0x50,0x0e,	0x17,0x91,0xf6,0x2f
-+.byte	0x76,0x4d,0xd6,0x8d,	0x43,0xef,0xb0,0x4d
-+.byte	0xcc,0xaa,0x4d,0x54,	0xe4,0x96,0x04,0xdf
-+.byte	0x9e,0xd1,0xb5,0xe3,	0x4c,0x6a,0x88,0x1b
-+.byte	0xc1,0x2c,0x1f,0xb8,	0x46,0x65,0x51,0x7f
-+.byte	0x9d,0x5e,0xea,0x04,	0x01,0x8c,0x35,0x5d
-+.byte	0xfa,0x87,0x74,0x73,	0xfb,0x0b,0x41,0x2e
-+.byte	0xb3,0x67,0x1d,0x5a,	0x92,0xdb,0xd2,0x52
-+.byte	0xe9,0x10,0x56,0x33,	0x6d,0xd6,0x47,0x13
-+.byte	0x9a,0xd7,0x61,0x8c,	0x37,0xa1,0x0c,0x7a
-+.byte	0x59,0xf8,0x14,0x8e,	0xeb,0x13,0x3c,0x89
-+.byte	0xce,0xa9,0x27,0xee,	0xb7,0x61,0xc9,0x35
-+.byte	0xe1,0x1c,0xe5,0xed,	0x7a,0x47,0xb1,0x3c
-+.byte	0x9c,0xd2,0xdf,0x59,	0x55,0xf2,0x73,0x3f
-+.byte	0x18,0x14,0xce,0x79,	0x73,0xc7,0x37,0xbf
-+.byte	0x53,0xf7,0xcd,0xea,	0x5f,0xfd,0xaa,0x5b
-+.byte	0xdf,0x3d,0x6f,0x14,	0x78,0x44,0xdb,0x86
-+.byte	0xca,0xaf,0xf3,0x81,	0xb9,0x68,0xc4,0x3e
-+.byte	0x38,0x24,0x34,0x2c,	0xc2,0xa3,0x40,0x5f
-+.byte	0x16,0x1d,0xc3,0x72,	0xbc,0xe2,0x25,0x0c
-+.byte	0x28,0x3c,0x49,0x8b,	0xff,0x0d,0x95,0x41
-+.byte	0x39,0xa8,0x01,0x71,	0x08,0x0c,0xb3,0xde
-+.byte	0xd8,0xb4,0xe4,0x9c,	0x64,0x56,0xc1,0x90
-+.byte	0x7b,0xcb,0x84,0x61,	0xd5,0x32,0xb6,0x70
-+.byte	0x48,0x6c,0x5c,0x74,	0xd0,0xb8,0x57,0x42
-+
-+.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38	# Td4
-+.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
-+.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
-+.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
-+.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
-+.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
-+.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
-+.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
-+.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
-+.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
-+.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
-+.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
-+.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
-+.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
-+.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
-+.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
-+.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
-+.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
-+.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
-+.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
-+.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
-+.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
-+.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
-+.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
-+.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
-+.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
-+.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
-+.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
-+.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
-+.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
-+.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
-+.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
-+___
-+
-+foreach (split("\n",$code)) {
-+	s/\`([^\`]*)\`/eval $1/ge;
-+
-+	# made-up _instructions, _xtr, _ins, _ror and _bias, cope
-+	# with byte order dependencies...
-+	if (/^\s+_/) {
-+	    s/(_[a-z]+\s+)(\$[0-9]+),([^,]+)(#.*)*$/$1$2,$2,$3/;
-+
-+	    s/_xtr\s+(\$[0-9]+),(\$[0-9]+),([0-9]+(\-2)*)/
-+		sprintf("srl\t$1,$2,%d",$big_endian ?	eval($3)
-+					:		eval("24-$3"))/e or
-+	    s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
-+		sprintf("sll\t$1,$2,%d",$big_endian ?	eval($3)
-+					:		eval("24-$3"))/e or
-+	    s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/
-+		sprintf("srl\t$1,$2,%d",$big_endian ?	eval($3)
-+					:		eval("$3*-1"))/e or
-+	    s/_bias\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
-+		sprintf("sll\t$1,$2,%d",$big_endian ?	eval($3)
-+					:		eval("($3-16)&31"))/e;
-+
-+	    s/srl\s+(\$[0-9]+),(\$[0-9]+),\-([0-9]+)/
-+		sprintf("sll\t$1,$2,$3")/e				or
-+	    s/srl\s+(\$[0-9]+),(\$[0-9]+),0/
-+		sprintf("and\t$1,$2,0xff")/e				or
-+	    s/(sll\s+\$[0-9]+,\$[0-9]+,0)/#$1/;
-+	}
-+
-+	# convert lwl/lwr and swr/swl to little-endian order
-+	if (!$big_endian && /^\s+[sl]w[lr]\s+/) {
-+	    s/([sl]wl.*)([0-9]+)\((\$[0-9]+)\)/
-+		sprintf("$1%d($3)",eval("$2-$2%4+($2%4-1)&3"))/e	or
-+	    s/([sl]wr.*)([0-9]+)\((\$[0-9]+)\)/
-+		sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
-+	}
-+
-+	print $_,"\n";
-+}
-+
-+close STDOUT;
-diff --git a/crypto/bn/asm/mips-mont.pl b/crypto/bn/asm/mips-mont.pl
-new file mode 100644
-index 0000000..b944a12
---- /dev/null
-+++ b/crypto/bn/asm/mips-mont.pl
-@@ -0,0 +1,426 @@
-+#!/usr/bin/env perl
-+#
-+# ====================================================================
-+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-+# project. The module is, however, dual licensed under OpenSSL and
-+# CRYPTOGAMS licenses depending on where you obtain it. For further
-+# details see http://www.openssl.org/~appro/cryptogams/.
-+# ====================================================================
-+
-+# This module doesn't present direct interest for OpenSSL, because it
-+# doesn't provide better performance for longer keys, at least not on
-+# in-order-execution cores. While 512-bit RSA sign operations can be
-+# 65% faster in 64-bit mode, 1024-bit ones are only 15% faster, and
-+# 4096-bit ones are up to 15% slower. In 32-bit mode it varies from
-+# 16% improvement for 512-bit RSA sign to -33% for 4096-bit RSA
-+# verify:-( All comparisons are against bn_mul_mont-free assembler.
-+# The module might be of interest to embedded system developers, as
-+# the code is smaller than 1KB, yet offers >3x improvement on MIPS64
-+# and 75-30% [less for longer keys] on MIPS32 over compiler-generated
-+# code.
-+
-+######################################################################
-+# There is a number of MIPS ABI in use, O32 and N32/64 are most
-+# widely used. Then there is a new contender: NUBI. It appears that if
-+# one picks the latter, it's possible to arrange code in ABI neutral
-+# manner. Therefore let's stick to NUBI register layout:
-+#
-+($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
-+($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
-+($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
-+($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
-+#
-+# The return value is placed in $a0. Following coding rules facilitate
-+# interoperability:
-+#
-+# - never ever touch $tp, "thread pointer", former $gp;
-+# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
-+#   old code];
-+# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
-+#
-+# For reference here is register layout for N32/64 MIPS ABIs:
-+#
-+# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
-+# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
-+# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
-+# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
-+# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
-+#
-+$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64
-+
-+if ($flavour =~ /64|n32/i) {
-+	$PTR_ADD="dadd";	# incidentally works even on n32
-+	$PTR_SUB="dsub";	# incidentally works even on n32
-+	$REG_S="sd";
-+	$REG_L="ld";
-+	$SZREG=8;
-+} else {
-+	$PTR_ADD="add";
-+	$PTR_SUB="sub";
-+	$REG_S="sw";
-+	$REG_L="lw";
-+	$SZREG=4;
-+}
-+$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0x00fff000 : 0x00ff0000;
-+#
-+# <appro@openssl.org>
-+#
-+######################################################################
-+
-+while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
-+open STDOUT,">$output";
-+
-+if ($flavour =~ /64|n32/i) {
-+	$LD="ld";
-+	$ST="sd";
-+	$MULTU="dmultu";
-+	$ADDU="daddu";
-+	$SUBU="dsubu";
-+	$BNSZ=8;
-+} else {
-+	$LD="lw";
-+	$ST="sw";
-+	$MULTU="multu";
-+	$ADDU="addu";
-+	$SUBU="subu";
-+	$BNSZ=4;
-+}
-+
-+# int bn_mul_mont(
-+$rp=$a0;	# BN_ULONG *rp,
-+$ap=$a1;	# const BN_ULONG *ap,
-+$bp=$a2;	# const BN_ULONG *bp,
-+$np=$a3;	# const BN_ULONG *np,
-+$n0=$a4;	# const BN_ULONG *n0,
-+$num=$a5;	# int num);
-+
-+$lo0=$a6;
-+$hi0=$a7;
-+$lo1=$t1;
-+$hi1=$t2;
-+$aj=$s0;
-+$bi=$s1;
-+$nj=$s2;
-+$tp=$s3;
-+$alo=$s4;
-+$ahi=$s5;
-+$nlo=$s6;
-+$nhi=$s7;
-+$tj=$s8;
-+$i=$s9;
-+$j=$s10;
-+$m1=$s11;
-+
-+$FRAMESIZE=14;
-+
-+$code=<<___;
-+.text
-+
-+.set	noat
-+.set	noreorder
-+
-+.align	5
-+.globl	bn_mul_mont
-+.ent	bn_mul_mont
-+bn_mul_mont:
-+___
-+$code.=<<___ if ($flavour =~ /o32/i);
-+	lw	$n0,16($sp)
-+	lw	$num,20($sp)
-+___
-+$code.=<<___;
-+	slt	$at,$num,4
-+	bnez	$at,1f
-+	li	$t0,0
-+	slt	$at,$num,17	# on in-order CPU
-+	bnezl	$at,bn_mul_mont_internal
-+	nop
-+1:	jr	$ra
-+	li	$a0,0
-+.end	bn_mul_mont
-+
-+.align	5
-+.ent	bn_mul_mont_internal
-+bn_mul_mont_internal:
-+	.frame	$fp,$FRAMESIZE*$SZREG,$ra
-+	.mask	0x40000000|$SAVED_REGS_MASK,-$SZREG
-+	$PTR_SUB $sp,$FRAMESIZE*$SZREG
-+	$REG_S	$fp,($FRAMESIZE-1)*$SZREG($sp)
-+	$REG_S	$s11,($FRAMESIZE-2)*$SZREG($sp)
-+	$REG_S	$s10,($FRAMESIZE-3)*$SZREG($sp)
-+	$REG_S	$s9,($FRAMESIZE-4)*$SZREG($sp)
-+	$REG_S	$s8,($FRAMESIZE-5)*$SZREG($sp)
-+	$REG_S	$s7,($FRAMESIZE-6)*$SZREG($sp)
-+	$REG_S	$s6,($FRAMESIZE-7)*$SZREG($sp)
-+	$REG_S	$s5,($FRAMESIZE-8)*$SZREG($sp)
-+	$REG_S	$s4,($FRAMESIZE-9)*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+	$REG_S	$s3,($FRAMESIZE-10)*$SZREG($sp)
-+	$REG_S	$s2,($FRAMESIZE-11)*$SZREG($sp)
-+	$REG_S	$s1,($FRAMESIZE-12)*$SZREG($sp)
-+	$REG_S	$s0,($FRAMESIZE-13)*$SZREG($sp)
-+___
-+$code.=<<___;
-+	move	$fp,$sp
-+
-+	.set	reorder
-+	$LD	$n0,0($n0)
-+	$LD	$bi,0($bp)	# bp[0]
-+	$LD	$aj,0($ap)	# ap[0]
-+	$LD	$nj,0($np)	# np[0]
-+
-+	$PTR_SUB $sp,2*$BNSZ	# place for two extra words
-+	sll	$num,`log($BNSZ)/log(2)`
-+	li	$at,-4096
-+	$PTR_SUB $sp,$num
-+	and	$sp,$at
-+
-+	$MULTU	$aj,$bi
-+	$LD	$alo,$BNSZ($ap)
-+	$LD	$nlo,$BNSZ($np)
-+	mflo	$lo0
-+	mfhi	$hi0
-+	$MULTU	$lo0,$n0
-+	mflo	$m1
-+
-+	$MULTU	$alo,$bi
-+	mflo	$alo
-+	mfhi	$ahi
-+
-+	$MULTU	$nj,$m1
-+	mflo	$lo1
-+	mfhi	$hi1
-+	$MULTU	$nlo,$m1
-+	$ADDU	$lo1,$lo0
-+	sltu	$at,$lo1,$lo0
-+	$ADDU	$hi1,$at
-+	mflo	$nlo
-+	mfhi	$nhi
-+
-+	move	$tp,$sp
-+	li	$j,2*$BNSZ
-+.align	4
-+.L1st:
-+	.set	noreorder
-+	$PTR_ADD $aj,$ap,$j
-+	$PTR_ADD $nj,$np,$j
-+	$LD	$aj,($aj)
-+	$LD	$nj,($nj)
-+
-+	$MULTU	$aj,$bi
-+	$ADDU	$lo0,$alo,$hi0
-+	$ADDU	$lo1,$nlo,$hi1
-+	sltu	$at,$lo0,$hi0
-+	sltu	$t0,$lo1,$hi1
-+	$ADDU	$hi0,$ahi,$at
-+	$ADDU	$hi1,$nhi,$t0
-+	mflo	$alo
-+	mfhi	$ahi
-+
-+	$ADDU	$lo1,$lo0
-+	sltu	$at,$lo1,$lo0
-+	$MULTU	$nj,$m1
-+	$ADDU	$hi1,$at
-+	addu	$j,$BNSZ
-+	$ST	$lo1,($tp)
-+	sltu	$t0,$j,$num
-+	mflo	$nlo
-+	mfhi	$nhi
-+
-+	bnez	$t0,.L1st
-+	$PTR_ADD $tp,$BNSZ
-+	.set	reorder
-+
-+	$ADDU	$lo0,$alo,$hi0
-+	sltu	$at,$lo0,$hi0
-+	$ADDU	$hi0,$ahi,$at
-+
-+	$ADDU	$lo1,$nlo,$hi1
-+	sltu	$t0,$lo1,$hi1
-+	$ADDU	$hi1,$nhi,$t0
-+	$ADDU	$lo1,$lo0
-+	sltu	$at,$lo1,$lo0
-+	$ADDU	$hi1,$at
-+
-+	$ST	$lo1,($tp)
-+
-+	$ADDU	$hi1,$hi0
-+	sltu	$at,$hi1,$hi0
-+	$ST	$hi1,$BNSZ($tp)
-+	$ST	$at,2*$BNSZ($tp)
-+
-+	li	$i,$BNSZ
-+.align	4
-+.Louter:
-+	$PTR_ADD $bi,$bp,$i
-+	$LD	$bi,($bi)
-+	$LD	$aj,($ap)
-+	$LD	$alo,$BNSZ($ap)
-+	$LD	$tj,($sp)
-+
-+	$MULTU	$aj,$bi
-+	$LD	$nj,($np)
-+	$LD	$nlo,$BNSZ($np)
-+	mflo	$lo0
-+	mfhi	$hi0
-+	$ADDU	$lo0,$tj
-+	$MULTU	$lo0,$n0
-+	sltu	$at,$lo0,$tj
-+	$ADDU	$hi0,$at
-+	mflo	$m1
-+
-+	$MULTU	$alo,$bi
-+	mflo	$alo
-+	mfhi	$ahi
-+
-+	$MULTU	$nj,$m1
-+	mflo	$lo1
-+	mfhi	$hi1
-+
-+	$MULTU	$nlo,$m1
-+	$ADDU	$lo1,$lo0
-+	sltu	$at,$lo1,$lo0
-+	$ADDU	$hi1,$at
-+	mflo	$nlo
-+	mfhi	$nhi
-+
-+	move	$tp,$sp
-+	li	$j,2*$BNSZ
-+	$LD	$tj,$BNSZ($tp)
-+.align	4
-+.Linner:
-+	.set	noreorder
-+	$PTR_ADD $aj,$ap,$j
-+	$PTR_ADD $nj,$np,$j
-+	$LD	$aj,($aj)
-+	$LD	$nj,($nj)
-+
-+	$MULTU	$aj,$bi
-+	$ADDU	$lo0,$alo,$hi0
-+	$ADDU	$lo1,$nlo,$hi1
-+	sltu	$at,$lo0,$hi0
-+	sltu	$t0,$lo1,$hi1
-+	$ADDU	$hi0,$ahi,$at
-+	$ADDU	$hi1,$nhi,$t0
-+	mflo	$alo
-+	mfhi	$ahi
-+
-+	$ADDU	$lo0,$tj
-+	addu	$j,$BNSZ
-+	$MULTU	$nj,$m1
-+	sltu	$at,$lo0,$tj
-+	$ADDU	$lo1,$lo0
-+	$ADDU	$hi0,$at
-+	sltu	$t0,$lo1,$lo0
-+	$LD	$tj,2*$BNSZ($tp)
-+	$ADDU	$hi1,$t0
-+	sltu	$at,$j,$num
-+	mflo	$nlo
-+	mfhi	$nhi
-+	$ST	$lo1,($tp)
-+	bnez	$at,.Linner
-+	$PTR_ADD $tp,$BNSZ
-+	.set	reorder
-+
-+	$ADDU	$lo0,$alo,$hi0
-+	sltu	$at,$lo0,$hi0
-+	$ADDU	$hi0,$ahi,$at
-+	$ADDU	$lo0,$tj
-+	sltu	$t0,$lo0,$tj
-+	$ADDU	$hi0,$t0
-+
-+	$LD	$tj,2*$BNSZ($tp)
-+	$ADDU	$lo1,$nlo,$hi1
-+	sltu	$at,$lo1,$hi1
-+	$ADDU	$hi1,$nhi,$at
-+	$ADDU	$lo1,$lo0
-+	sltu	$t0,$lo1,$lo0
-+	$ADDU	$hi1,$t0
-+	$ST	$lo1,($tp)
-+
-+	$ADDU	$lo1,$hi1,$hi0
-+	sltu	$hi1,$lo1,$hi0
-+	$ADDU	$lo1,$tj
-+	sltu	$at,$lo1,$tj
-+	$ADDU	$hi1,$at
-+	$ST	$lo1,$BNSZ($tp)
-+	$ST	$hi1,2*$BNSZ($tp)
-+
-+	addu	$i,$BNSZ
-+	sltu	$t0,$i,$num
-+	bnez	$t0,.Louter
-+
-+	.set	noreorder
-+	$PTR_ADD $tj,$sp,$num	# &tp[num]
-+	move	$tp,$sp
-+	move	$ap,$sp
-+	li	$hi0,0		# clear borrow bit
-+
-+.align	4
-+.Lsub:	$LD	$lo0,($tp)
-+	$LD	$lo1,($np)
-+	$PTR_ADD $tp,$BNSZ
-+	$PTR_ADD $np,$BNSZ
-+	$SUBU	$lo1,$lo0,$lo1	# tp[i]-np[i]
-+	sgtu	$at,$lo1,$lo0
-+	$SUBU	$lo0,$lo1,$hi0
-+	sgtu	$hi0,$lo0,$lo1
-+	$ST	$lo0,($rp)
-+	or	$hi0,$at
-+	sltu	$at,$tp,$tj
-+	bnez	$at,.Lsub
-+	$PTR_ADD $rp,$BNSZ
-+
-+	$SUBU	$hi0,$hi1,$hi0	# handle upmost overflow bit
-+	move	$tp,$sp
-+	$PTR_SUB $rp,$num	# restore rp
-+	not	$hi1,$hi0
-+
-+	and	$ap,$hi0,$sp
-+	and	$bp,$hi1,$rp
-+	or	$ap,$ap,$bp	# ap=borrow?tp:rp
-+
-+.align	4
-+.Lcopy:	$LD	$aj,($ap)
-+	$PTR_ADD $ap,$BNSZ
-+	$ST	$zero,($tp)
-+	$PTR_ADD $tp,$BNSZ
-+	sltu	$at,$tp,$tj
-+	$ST	$aj,($rp)
-+	bnez	$at,.Lcopy
-+	$PTR_ADD $rp,$BNSZ
-+
-+	li	$a0,1
-+	li	$t0,1
-+
-+	.set	noreorder
-+	move	$sp,$fp
-+	$REG_L	$fp,($FRAMESIZE-1)*$SZREG($sp)
-+	$REG_L	$s11,($FRAMESIZE-2)*$SZREG($sp)
-+	$REG_L	$s10,($FRAMESIZE-3)*$SZREG($sp)
-+	$REG_L	$s9,($FRAMESIZE-4)*$SZREG($sp)
-+	$REG_L	$s8,($FRAMESIZE-5)*$SZREG($sp)
-+	$REG_L	$s7,($FRAMESIZE-6)*$SZREG($sp)
-+	$REG_L	$s6,($FRAMESIZE-7)*$SZREG($sp)
-+	$REG_L	$s5,($FRAMESIZE-8)*$SZREG($sp)
-+	$REG_L	$s4,($FRAMESIZE-9)*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+	$REG_L	$s3,($FRAMESIZE-10)*$SZREG($sp)
-+	$REG_L	$s2,($FRAMESIZE-11)*$SZREG($sp)
-+	$REG_L	$s1,($FRAMESIZE-12)*$SZREG($sp)
-+	$REG_L	$s0,($FRAMESIZE-13)*$SZREG($sp)
-+___
-+$code.=<<___;
-+	jr	$ra
-+	$PTR_ADD $sp,$FRAMESIZE*$SZREG
-+.end	bn_mul_mont_internal
-+.rdata
-+.asciiz	"Montgomery Multiplication for MIPS, CRYPTOGAMS by <appro\@openssl.org>"
-+___
-+
-+$code =~ s/\`([^\`]*)\`/eval $1/gem;
-+
-+print $code;
-+close STDOUT;
-diff --git a/crypto/bn/asm/mips.pl b/crypto/bn/asm/mips.pl
-new file mode 100644
-index 0000000..f04b3b9
---- /dev/null
-+++ b/crypto/bn/asm/mips.pl
-@@ -0,0 +1,2585 @@
-+#!/usr/bin/env perl
-+#
-+# ====================================================================
-+# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-+# project.
-+#
-+# Rights for redistribution and usage in source and binary forms are
-+# granted according to the OpenSSL license. Warranty of any kind is
-+# disclaimed.
-+# ====================================================================
-+
-+
-+# July 1999
-+#
-+# This is drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c.
-+#
-+# The module is designed to work with either of the "new" MIPS ABI(5),
-+# namely N32 or N64, offered by IRIX 6.x. It's not ment to work under
-+# IRIX 5.x not only because it doesn't support new ABIs but also
-+# because 5.x kernels put R4x00 CPU into 32-bit mode and all those
-+# 64-bit instructions (daddu, dmultu, etc.) found below gonna only
-+# cause illegal instruction exception:-(
-+#
-+# In addition the code depends on preprocessor flags set up by MIPSpro
-+# compiler driver (either as or cc) and therefore (probably?) can't be
-+# compiled by the GNU assembler. GNU C driver manages fine though...
-+# I mean as long as -mmips-as is specified or is the default option,
-+# because then it simply invokes /usr/bin/as which in turn takes
-+# perfect care of the preprocessor definitions. Another neat feature
-+# offered by the MIPSpro assembler is an optimization pass. This gave
-+# me the opportunity to have the code looking more regular as all those
-+# architecture dependent instruction rescheduling details were left to
-+# the assembler. Cool, huh?
-+#
-+# Performance improvement is astonishing! 'apps/openssl speed rsa dsa'
-+# goes way over 3 times faster!
-+#
-+#					<appro@fy.chalmers.se>
-+
-+# October 2010
-+#
-+# Adapt the module even for 32-bit ABIs and other OSes. The former was
-+# achieved by mechanical replacement of 64-bit arithmetic instructions
-+# such as dmultu, daddu, etc. with their 32-bit counterparts and
-+# adjusting offsets denoting multiples of BN_ULONG. Above mentioned
-+# >3x performance improvement naturally does not apply to 32-bit code
-+# [because there is no instruction 32-bit compiler can't use], one
-+# has to content with 40-85% improvement depending on benchmark and
-+# key length, more for longer keys.
-+
-+$flavour = shift;
-+while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
-+open STDOUT,">$output";
-+
-+if ($flavour =~ /64|n32/i) {
-+	$LD="ld";
-+	$ST="sd";
-+	$MULTU="dmultu";
-+	$DIVU="ddivu";
-+	$ADDU="daddu";
-+	$SUBU="dsubu";
-+	$SRL="dsrl";
-+	$SLL="dsll";
-+	$BNSZ=8;
-+	$PTR_ADD="daddu";
-+	$PTR_SUB="dsubu";
-+	$SZREG=8;
-+	$REG_S="sd";
-+	$REG_L="ld";
-+} else {
-+	$LD="lw";
-+	$ST="sw";
-+	$MULTU="multu";
-+	$DIVU="divu";
-+	$ADDU="addu";
-+	$SUBU="subu";
-+	$SRL="srl";
-+	$SLL="sll";
-+	$BNSZ=4;
-+	$PTR_ADD="addu";
-+	$PTR_SUB="subu";
-+	$SZREG=4;
-+	$REG_S="sw";
-+	$REG_L="lw";
-+	$code=".set	mips2\n";
-+}
-+
-+# Below is N32/64 register layout used in the original module.
-+#
-+($zero,$at,$v0,$v1)=map("\$$_",(0..3));
-+($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
-+($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
-+($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
-+($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
-+($ta0,$ta1,$ta2,$ta3)=($a4,$a5,$a6,$a7);
-+#
-+# No special adaptation is required for O32. NUBI on the other hand
-+# is treated by saving/restoring ($v1,$t0..$t3).
-+
-+$gp=$v1 if ($flavour =~ /nubi/i);
-+
-+$minus4=$v1;
-+
-+$code.=<<___;
-+.rdata
-+.asciiz	"mips3.s, Version 1.2"
-+.asciiz	"MIPS II/III/IV ISA artwork by Andy Polyakov <appro\@fy.chalmers.se>"
-+
-+.text
-+.set	noat
-+
-+.align	5
-+.globl	bn_mul_add_words
-+.ent	bn_mul_add_words
-+bn_mul_add_words:
-+	.set	noreorder
-+	bgtz	$a2,bn_mul_add_words_internal
-+	move	$v0,$zero
-+	jr	$ra
-+	move	$a0,$v0
-+.end	bn_mul_add_words
-+
-+.align	5
-+.ent	bn_mul_add_words_internal
-+bn_mul_add_words_internal:
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+	.frame	$sp,6*$SZREG,$ra
-+	.mask	0x8000f008,-$SZREG
-+	.set	noreorder
-+	$PTR_SUB $sp,6*$SZREG
-+	$REG_S	$ra,5*$SZREG($sp)
-+	$REG_S	$t3,4*$SZREG($sp)
-+	$REG_S	$t2,3*$SZREG($sp)
-+	$REG_S	$t1,2*$SZREG($sp)
-+	$REG_S	$t0,1*$SZREG($sp)
-+	$REG_S	$gp,0*$SZREG($sp)
-+___
-+$code.=<<___;
-+	.set	reorder
-+	li	$minus4,-4
-+	and	$ta0,$a2,$minus4
-+	$LD	$t0,0($a1)
-+	beqz	$ta0,.L_bn_mul_add_words_tail
-+
-+.L_bn_mul_add_words_loop:
-+	$MULTU	$t0,$a3
-+	$LD	$t1,0($a0)
-+	$LD	$t2,$BNSZ($a1)
-+	$LD	$t3,$BNSZ($a0)
-+	$LD	$ta0,2*$BNSZ($a1)
-+	$LD	$ta1,2*$BNSZ($a0)
-+	$ADDU	$t1,$v0
-+	sltu	$v0,$t1,$v0	# All manuals say it "compares 32-bit
-+				# values", but it seems to work fine
-+				# even on 64-bit registers.
-+	mflo	$at
-+	mfhi	$t0
-+	$ADDU	$t1,$at
-+	$ADDU	$v0,$t0
-+	 $MULTU	$t2,$a3
-+	sltu	$at,$t1,$at
-+	$ST	$t1,0($a0)
-+	$ADDU	$v0,$at
-+
-+	$LD	$ta2,3*$BNSZ($a1)
-+	$LD	$ta3,3*$BNSZ($a0)
-+	$ADDU	$t3,$v0
-+	sltu	$v0,$t3,$v0
-+	mflo	$at
-+	mfhi	$t2
-+	$ADDU	$t3,$at
-+	$ADDU	$v0,$t2
-+	 $MULTU	$ta0,$a3
-+	sltu	$at,$t3,$at
-+	$ST	$t3,$BNSZ($a0)
-+	$ADDU	$v0,$at
-+
-+	subu	$a2,4
-+	$PTR_ADD $a0,4*$BNSZ
-+	$PTR_ADD $a1,4*$BNSZ
-+	$ADDU	$ta1,$v0
-+	sltu	$v0,$ta1,$v0
-+	mflo	$at
-+	mfhi	$ta0
-+	$ADDU	$ta1,$at
-+	$ADDU	$v0,$ta0
-+	 $MULTU	$ta2,$a3
-+	sltu	$at,$ta1,$at
-+	$ST	$ta1,-2*$BNSZ($a0)
-+	$ADDU	$v0,$at
-+
-+
-+	and	$ta0,$a2,$minus4
-+	$ADDU	$ta3,$v0
-+	sltu	$v0,$ta3,$v0
-+	mflo	$at
-+	mfhi	$ta2
-+	$ADDU	$ta3,$at
-+	$ADDU	$v0,$ta2
-+	sltu	$at,$ta3,$at
-+	$ST	$ta3,-$BNSZ($a0)
-+	$ADDU	$v0,$at
-+	.set	noreorder
-+	bgtzl	$ta0,.L_bn_mul_add_words_loop
-+	$LD	$t0,0($a1)
-+
-+	beqz	$a2,.L_bn_mul_add_words_return
-+	nop
-+
-+.L_bn_mul_add_words_tail:
-+	.set	reorder
-+	$LD	$t0,0($a1)
-+	$MULTU	$t0,$a3
-+	$LD	$t1,0($a0)
-+	subu	$a2,1
-+	$ADDU	$t1,$v0
-+	sltu	$v0,$t1,$v0
-+	mflo	$at
-+	mfhi	$t0
-+	$ADDU	$t1,$at
-+	$ADDU	$v0,$t0
-+	sltu	$at,$t1,$at
-+	$ST	$t1,0($a0)
-+	$ADDU	$v0,$at
-+	beqz	$a2,.L_bn_mul_add_words_return
-+
-+	$LD	$t0,$BNSZ($a1)
-+	$MULTU	$t0,$a3
-+	$LD	$t1,$BNSZ($a0)
-+	subu	$a2,1
-+	$ADDU	$t1,$v0
-+	sltu	$v0,$t1,$v0
-+	mflo	$at
-+	mfhi	$t0
-+	$ADDU	$t1,$at
-+	$ADDU	$v0,$t0
-+	sltu	$at,$t1,$at
-+	$ST	$t1,$BNSZ($a0)
-+	$ADDU	$v0,$at
-+	beqz	$a2,.L_bn_mul_add_words_return
-+
-+	$LD	$t0,2*$BNSZ($a1)
-+	$MULTU	$t0,$a3
-+	$LD	$t1,2*$BNSZ($a0)
-+	$ADDU	$t1,$v0
-+	sltu	$v0,$t1,$v0
-+	mflo	$at
-+	mfhi	$t0
-+	$ADDU	$t1,$at
-+	$ADDU	$v0,$t0
-+	sltu	$at,$t1,$at
-+	$ST	$t1,2*$BNSZ($a0)
-+	$ADDU	$v0,$at
-+
-+.L_bn_mul_add_words_return:
-+	.set	noreorder
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+	$REG_L	$t3,4*$SZREG($sp)
-+	$REG_L	$t2,3*$SZREG($sp)
-+	$REG_L	$t1,2*$SZREG($sp)
-+	$REG_L	$t0,1*$SZREG($sp)
-+	$REG_L	$gp,0*$SZREG($sp)
-+	$PTR_ADD $sp,6*$SZREG
-+___
-+$code.=<<___;
-+	jr	$ra
-+	move	$a0,$v0
-+.end	bn_mul_add_words_internal
-+
-+.align	5
-+.globl	bn_mul_words
-+.ent	bn_mul_words
-+bn_mul_words:
-+	.set	noreorder
-+	bgtz	$a2,bn_mul_words_internal
-+	move	$v0,$zero
-+	jr	$ra
-+	move	$a0,$v0
-+.end	bn_mul_words
-+
-+.align	5
-+.ent	bn_mul_words_internal
-+bn_mul_words_internal:
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+	.frame	$sp,6*$SZREG,$ra
-+	.mask	0x8000f008,-$SZREG
-+	.set	noreorder
-+	$PTR_SUB $sp,6*$SZREG
-+	$REG_S	$ra,5*$SZREG($sp)
-+	$REG_S	$t3,4*$SZREG($sp)
-+	$REG_S	$t2,3*$SZREG($sp)
-+	$REG_S	$t1,2*$SZREG($sp)
-+	$REG_S	$t0,1*$SZREG($sp)
-+	$REG_S	$gp,0*$SZREG($sp)
-+___
-+$code.=<<___;
-+	.set	reorder
-+	li	$minus4,-4
-+	and	$ta0,$a2,$minus4
-+	$LD	$t0,0($a1)
-+	beqz	$ta0,.L_bn_mul_words_tail
-+
-+.L_bn_mul_words_loop:
-+	$MULTU	$t0,$a3
-+	$LD	$t2,$BNSZ($a1)
-+	$LD	$ta0,2*$BNSZ($a1)
-+	$LD	$ta2,3*$BNSZ($a1)
-+	mflo	$at
-+	mfhi	$t0
-+	$ADDU	$v0,$at
-+	sltu	$t1,$v0,$at
-+	 $MULTU	$t2,$a3
-+	$ST	$v0,0($a0)
-+	$ADDU	$v0,$t1,$t0
-+
-+	subu	$a2,4
-+	$PTR_ADD $a0,4*$BNSZ
-+	$PTR_ADD $a1,4*$BNSZ
-+	mflo	$at
-+	mfhi	$t2
-+	$ADDU	$v0,$at
-+	sltu	$t3,$v0,$at
-+	 $MULTU	$ta0,$a3
-+	$ST	$v0,-3*$BNSZ($a0)
-+	$ADDU	$v0,$t3,$t2
-+
-+	mflo	$at
-+	mfhi	$ta0
-+	$ADDU	$v0,$at
-+	sltu	$ta1,$v0,$at
-+	 $MULTU	$ta2,$a3
-+	$ST	$v0,-2*$BNSZ($a0)
-+	$ADDU	$v0,$ta1,$ta0
-+
-+	and	$ta0,$a2,$minus4
-+	mflo	$at
-+	mfhi	$ta2
-+	$ADDU	$v0,$at
-+	sltu	$ta3,$v0,$at
-+	$ST	$v0,-$BNSZ($a0)
-+	$ADDU	$v0,$ta3,$ta2
-+	.set	noreorder
-+	bgtzl	$ta0,.L_bn_mul_words_loop
-+	$LD	$t0,0($a1)
-+
-+	beqz	$a2,.L_bn_mul_words_return
-+	nop
-+
-+.L_bn_mul_words_tail:
-+	.set	reorder
-+	$LD	$t0,0($a1)
-+	$MULTU	$t0,$a3
-+	subu	$a2,1
-+	mflo	$at
-+	mfhi	$t0
-+	$ADDU	$v0,$at
-+	sltu	$t1,$v0,$at
-+	$ST	$v0,0($a0)
-+	$ADDU	$v0,$t1,$t0
-+	beqz	$a2,.L_bn_mul_words_return
-+
-+	$LD	$t0,$BNSZ($a1)
-+	$MULTU	$t0,$a3
-+	subu	$a2,1
-+	mflo	$at
-+	mfhi	$t0
-+	$ADDU	$v0,$at
-+	sltu	$t1,$v0,$at
-+	$ST	$v0,$BNSZ($a0)
-+	$ADDU	$v0,$t1,$t0
-+	beqz	$a2,.L_bn_mul_words_return
-+
-+	$LD	$t0,2*$BNSZ($a1)
-+	$MULTU	$t0,$a3
-+	mflo	$at
-+	mfhi	$t0
-+	$ADDU	$v0,$at
-+	sltu	$t1,$v0,$at
-+	$ST	$v0,2*$BNSZ($a0)
-+	$ADDU	$v0,$t1,$t0
-+
-+.L_bn_mul_words_return:
-+	.set	noreorder
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+	$REG_L	$t3,4*$SZREG($sp)
-+	$REG_L	$t2,3*$SZREG($sp)
-+	$REG_L	$t1,2*$SZREG($sp)
-+	$REG_L	$t0,1*$SZREG($sp)
-+	$REG_L	$gp,0*$SZREG($sp)
-+	$PTR_ADD $sp,6*$SZREG
-+___
-+$code.=<<___;
-+	jr	$ra
-+	move	$a0,$v0
-+.end	bn_mul_words_internal
-+
-+.align	5
-+.globl	bn_sqr_words
-+.ent	bn_sqr_words
-+bn_sqr_words:
-+	.set	noreorder
-+	bgtz	$a2,bn_sqr_words_internal
-+	move	$v0,$zero
-+	jr	$ra
-+	move	$a0,$v0
-+.end	bn_sqr_words
-+
-+.align	5
-+.ent	bn_sqr_words_internal
-+bn_sqr_words_internal:
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+	.frame	$sp,6*$SZREG,$ra
-+	.mask	0x8000f008,-$SZREG
-+	.set	noreorder
-+	$PTR_SUB $sp,6*$SZREG
-+	$REG_S	$ra,5*$SZREG($sp)
-+	$REG_S	$t3,4*$SZREG($sp)
-+	$REG_S	$t2,3*$SZREG($sp)
-+	$REG_S	$t1,2*$SZREG($sp)
-+	$REG_S	$t0,1*$SZREG($sp)
-+	$REG_S	$gp,0*$SZREG($sp)
-+___
-+$code.=<<___;
-+	.set	reorder
-+	li	$minus4,-4
-+	and	$ta0,$a2,$minus4
-+	$LD	$t0,0($a1)
-+	beqz	$ta0,.L_bn_sqr_words_tail
-+
-+.L_bn_sqr_words_loop:
-+	$MULTU	$t0,$t0
-+	$LD	$t2,$BNSZ($a1)
-+	$LD	$ta0,2*$BNSZ($a1)
-+	$LD	$ta2,3*$BNSZ($a1)
-+	mflo	$t1
-+	mfhi	$t0
-+	$ST	$t1,0($a0)
-+	$ST	$t0,$BNSZ($a0)
-+
-+	$MULTU	$t2,$t2
-+	subu	$a2,4
-+	$PTR_ADD $a0,8*$BNSZ
-+	$PTR_ADD $a1,4*$BNSZ
-+	mflo	$t3
-+	mfhi	$t2
-+	$ST	$t3,-6*$BNSZ($a0)
-+	$ST	$t2,-5*$BNSZ($a0)
-+
-+	$MULTU	$ta0,$ta0
-+	mflo	$ta1
-+	mfhi	$ta0
-+	$ST	$ta1,-4*$BNSZ($a0)
-+	$ST	$ta0,-3*$BNSZ($a0)
-+
-+
-+	$MULTU	$ta2,$ta2
-+	and	$ta0,$a2,$minus4
-+	mflo	$ta3
-+	mfhi	$ta2
-+	$ST	$ta3,-2*$BNSZ($a0)
-+	$ST	$ta2,-$BNSZ($a0)
-+
-+	.set	noreorder
-+	bgtzl	$ta0,.L_bn_sqr_words_loop
-+	$LD	$t0,0($a1)
-+
-+	beqz	$a2,.L_bn_sqr_words_return
-+	nop
-+
-+.L_bn_sqr_words_tail:
-+	.set	reorder
-+	$LD	$t0,0($a1)
-+	$MULTU	$t0,$t0
-+	subu	$a2,1
-+	mflo	$t1
-+	mfhi	$t0
-+	$ST	$t1,0($a0)
-+	$ST	$t0,$BNSZ($a0)
-+	beqz	$a2,.L_bn_sqr_words_return
-+
-+	$LD	$t0,$BNSZ($a1)
-+	$MULTU	$t0,$t0
-+	subu	$a2,1
-+	mflo	$t1
-+	mfhi	$t0
-+	$ST	$t1,2*$BNSZ($a0)
-+	$ST	$t0,3*$BNSZ($a0)
-+	beqz	$a2,.L_bn_sqr_words_return
-+
-+	$LD	$t0,2*$BNSZ($a1)
-+	$MULTU	$t0,$t0
-+	mflo	$t1
-+	mfhi	$t0
-+	$ST	$t1,4*$BNSZ($a0)
-+	$ST	$t0,5*$BNSZ($a0)
-+
-+.L_bn_sqr_words_return:
-+	.set	noreorder
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+	$REG_L	$t3,4*$SZREG($sp)
-+	$REG_L	$t2,3*$SZREG($sp)
-+	$REG_L	$t1,2*$SZREG($sp)
-+	$REG_L	$t0,1*$SZREG($sp)
-+	$REG_L	$gp,0*$SZREG($sp)
-+	$PTR_ADD $sp,6*$SZREG
-+___
-+$code.=<<___;
-+	jr	$ra
-+	move	$a0,$v0
-+
-+.end	bn_sqr_words_internal
-+
-+.align	5
-+.globl	bn_add_words
-+.ent	bn_add_words
-+bn_add_words:
-+	.set	noreorder
-+	bgtz	$a3,bn_add_words_internal
-+	move	$v0,$zero
-+	jr	$ra
-+	move	$a0,$v0
-+.end	bn_add_words
-+
-+.align	5
-+.ent	bn_add_words_internal
-+bn_add_words_internal:
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+	.frame	$sp,6*$SZREG,$ra
-+	.mask	0x8000f008,-$SZREG
-+	.set	noreorder
-+	$PTR_SUB $sp,6*$SZREG
-+	$REG_S	$ra,5*$SZREG($sp)
-+	$REG_S	$t3,4*$SZREG($sp)
-+	$REG_S	$t2,3*$SZREG($sp)
-+	$REG_S	$t1,2*$SZREG($sp)
-+	$REG_S	$t0,1*$SZREG($sp)
-+	$REG_S	$gp,0*$SZREG($sp)
-+___
-+$code.=<<___;
-+	.set	reorder
-+	li	$minus4,-4
-+	and	$at,$a3,$minus4
-+	$LD	$t0,0($a1)
-+	beqz	$at,.L_bn_add_words_tail
-+
-+.L_bn_add_words_loop:
-+	$LD	$ta0,0($a2)
-+	subu	$a3,4
-+	$LD	$t1,$BNSZ($a1)
-+	and	$at,$a3,$minus4
-+	$LD	$t2,2*$BNSZ($a1)
-+	$PTR_ADD $a2,4*$BNSZ
-+	$LD	$t3,3*$BNSZ($a1)
-+	$PTR_ADD $a0,4*$BNSZ
-+	$LD	$ta1,-3*$BNSZ($a2)
-+	$PTR_ADD $a1,4*$BNSZ
-+	$LD	$ta2,-2*$BNSZ($a2)
-+	$LD	$ta3,-$BNSZ($a2)
-+	$ADDU	$ta0,$t0
-+	sltu	$t8,$ta0,$t0
-+	$ADDU	$t0,$ta0,$v0
-+	sltu	$v0,$t0,$ta0
-+	$ST	$t0,-4*$BNSZ($a0)
-+	$ADDU	$v0,$t8
-+
-+	$ADDU	$ta1,$t1
-+	sltu	$t9,$ta1,$t1
-+	$ADDU	$t1,$ta1,$v0
-+	sltu	$v0,$t1,$ta1
-+	$ST	$t1,-3*$BNSZ($a0)
-+	$ADDU	$v0,$t9
-+
-+	$ADDU	$ta2,$t2
-+	sltu	$t8,$ta2,$t2
-+	$ADDU	$t2,$ta2,$v0
-+	sltu	$v0,$t2,$ta2
-+	$ST	$t2,-2*$BNSZ($a0)
-+	$ADDU	$v0,$t8
-+
-+	$ADDU	$ta3,$t3
-+	sltu	$t9,$ta3,$t3
-+	$ADDU	$t3,$ta3,$v0
-+	sltu	$v0,$t3,$ta3
-+	$ST	$t3,-$BNSZ($a0)
-+	$ADDU	$v0,$t9
-+
-+	.set	noreorder
-+	bgtzl	$at,.L_bn_add_words_loop
-+	$LD	$t0,0($a1)
-+
-+	beqz	$a3,.L_bn_add_words_return
-+	nop
-+
-+.L_bn_add_words_tail:
-+	.set	reorder
-+	$LD	$t0,0($a1)
-+	$LD	$ta0,0($a2)
-+	$ADDU	$ta0,$t0
-+	subu	$a3,1
-+	sltu	$t8,$ta0,$t0
-+	$ADDU	$t0,$ta0,$v0
-+	sltu	$v0,$t0,$ta0
-+	$ST	$t0,0($a0)
-+	$ADDU	$v0,$t8
-+	beqz	$a3,.L_bn_add_words_return
-+
-+	$LD	$t1,$BNSZ($a1)
-+	$LD	$ta1,$BNSZ($a2)
-+	$ADDU	$ta1,$t1
-+	subu	$a3,1
-+	sltu	$t9,$ta1,$t1
-+	$ADDU	$t1,$ta1,$v0
-+	sltu	$v0,$t1,$ta1
-+	$ST	$t1,$BNSZ($a0)
-+	$ADDU	$v0,$t9
-+	beqz	$a3,.L_bn_add_words_return
-+
-+	$LD	$t2,2*$BNSZ($a1)
-+	$LD	$ta2,2*$BNSZ($a2)
-+	$ADDU	$ta2,$t2
-+	sltu	$t8,$ta2,$t2
-+	$ADDU	$t2,$ta2,$v0
-+	sltu	$v0,$t2,$ta2
-+	$ST	$t2,2*$BNSZ($a0)
-+	$ADDU	$v0,$t8
-+
-+.L_bn_add_words_return:
-+	.set	noreorder
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+	$REG_L	$t3,4*$SZREG($sp)
-+	$REG_L	$t2,3*$SZREG($sp)
-+	$REG_L	$t1,2*$SZREG($sp)
-+	$REG_L	$t0,1*$SZREG($sp)
-+	$REG_L	$gp,0*$SZREG($sp)
-+	$PTR_ADD $sp,6*$SZREG
-+___
-+$code.=<<___;
-+	jr	$ra
-+	move	$a0,$v0
-+
-+.end	bn_add_words_internal
-+
-+.align	5
-+.globl	bn_sub_words
-+.ent	bn_sub_words
-+bn_sub_words:
-+	.set	noreorder
-+	bgtz	$a3,bn_sub_words_internal
-+	move	$v0,$zero
-+	jr	$ra
-+	move	$a0,$zero
-+.end	bn_sub_words
-+
-+.align	5
-+.ent	bn_sub_words_internal
-+bn_sub_words_internal:
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+	.frame	$sp,6*$SZREG,$ra
-+	.mask	0x8000f008,-$SZREG
-+	.set	noreorder
-+	$PTR_SUB $sp,6*$SZREG
-+	$REG_S	$ra,5*$SZREG($sp)
-+	$REG_S	$t3,4*$SZREG($sp)
-+	$REG_S	$t2,3*$SZREG($sp)
-+	$REG_S	$t1,2*$SZREG($sp)
-+	$REG_S	$t0,1*$SZREG($sp)
-+	$REG_S	$gp,0*$SZREG($sp)
-+___
-+$code.=<<___;
-+	.set	reorder
-+	li	$minus4,-4
-+	and	$at,$a3,$minus4
-+	$LD	$t0,0($a1)
-+	beqz	$at,.L_bn_sub_words_tail
-+
-+.L_bn_sub_words_loop:
-+	$LD	$ta0,0($a2)
-+	subu	$a3,4
-+	$LD	$t1,$BNSZ($a1)
-+	and	$at,$a3,$minus4
-+	$LD	$t2,2*$BNSZ($a1)
-+	$PTR_ADD $a2,4*$BNSZ
-+	$LD	$t3,3*$BNSZ($a1)
-+	$PTR_ADD $a0,4*$BNSZ
-+	$LD	$ta1,-3*$BNSZ($a2)
-+	$PTR_ADD $a1,4*$BNSZ
-+	$LD	$ta2,-2*$BNSZ($a2)
-+	$LD	$ta3,-$BNSZ($a2)
-+	sltu	$t8,$t0,$ta0
-+	$SUBU	$ta0,$t0,$ta0
-+	$SUBU	$t0,$ta0,$v0
-+	sgtu	$v0,$t0,$ta0
-+	$ST	$t0,-4*$BNSZ($a0)
-+	$ADDU	$v0,$t8
-+
-+	sltu	$t9,$t1,$ta1
-+	$SUBU	$ta1,$t1,$ta1
-+	$SUBU	$t1,$ta1,$v0
-+	sgtu	$v0,$t1,$ta1
-+	$ST	$t1,-3*$BNSZ($a0)
-+	$ADDU	$v0,$t9
-+
-+
-+	sltu	$t8,$t2,$ta2
-+	$SUBU	$ta2,$t2,$ta2
-+	$SUBU	$t2,$ta2,$v0
-+	sgtu	$v0,$t2,$ta2
-+	$ST	$t2,-2*$BNSZ($a0)
-+	$ADDU	$v0,$t8
-+
-+	sltu	$t9,$t3,$ta3
-+	$SUBU	$ta3,$t3,$ta3
-+	$SUBU	$t3,$ta3,$v0
-+	sgtu	$v0,$t3,$ta3
-+	$ST	$t3,-$BNSZ($a0)
-+	$ADDU	$v0,$t9
-+
-+	.set	noreorder
-+	bgtzl	$at,.L_bn_sub_words_loop
-+	$LD	$t0,0($a1)
-+
-+	beqz	$a3,.L_bn_sub_words_return
-+	nop
-+
-+.L_bn_sub_words_tail:
-+	.set	reorder
-+	$LD	$t0,0($a1)
-+	$LD	$ta0,0($a2)
-+	subu	$a3,1
-+	sltu	$t8,$t0,$ta0
-+	$SUBU	$ta0,$t0,$ta0
-+	$SUBU	$t0,$ta0,$v0
-+	sgtu	$v0,$t0,$ta0
-+	$ST	$t0,0($a0)
-+	$ADDU	$v0,$t8
-+	beqz	$a3,.L_bn_sub_words_return
-+
-+	$LD	$t1,$BNSZ($a1)
-+	subu	$a3,1
-+	$LD	$ta1,$BNSZ($a2)
-+	sltu	$t9,$t1,$ta1
-+	$SUBU	$ta1,$t1,$ta1
-+	$SUBU	$t1,$ta1,$v0
-+	sgtu	$v0,$t1,$ta1
-+	$ST	$t1,$BNSZ($a0)
-+	$ADDU	$v0,$t9
-+	beqz	$a3,.L_bn_sub_words_return
-+
-+	$LD	$t2,2*$BNSZ($a1)
-+	$LD	$ta2,2*$BNSZ($a2)
-+	sltu	$t8,$t2,$ta2
-+	$SUBU	$ta2,$t2,$ta2
-+	$SUBU	$t2,$ta2,$v0
-+	sgtu	$v0,$t2,$ta2
-+	$ST	$t2,2*$BNSZ($a0)
-+	$ADDU	$v0,$t8
-+
-+.L_bn_sub_words_return:
-+	.set	noreorder
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+	$REG_L	$t3,4*$SZREG($sp)
-+	$REG_L	$t2,3*$SZREG($sp)
-+	$REG_L	$t1,2*$SZREG($sp)
-+	$REG_L	$t0,1*$SZREG($sp)
-+	$REG_L	$gp,0*$SZREG($sp)
-+	$PTR_ADD $sp,6*$SZREG
-+___
-+$code.=<<___;
-+	jr	$ra
-+	move	$a0,$v0
-+.end	bn_sub_words_internal
-+
-+.align 5
-+.globl	bn_div_3_words
-+.ent	bn_div_3_words
-+bn_div_3_words:
-+	.set	noreorder
-+	move	$a3,$a0		# we know that bn_div_words does not
-+				# touch $a3, $ta2, $ta3 and preserves $a2
-+				# so that we can save two arguments
-+				# and return address in registers
-+				# instead of stack:-)
-+
-+	$LD	$a0,($a3)
-+	move	$ta2,$a1
-+	bne	$a0,$a2,bn_div_3_words_internal
-+	$LD	$a1,-$BNSZ($a3)
-+	li	$v0,-1
-+	jr	$ra
-+	move	$a0,$v0
-+.end	bn_div_3_words
-+
-+.align	5
-+.ent	bn_div_3_words_internal
-+bn_div_3_words_internal:
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+	.frame	$sp,6*$SZREG,$ra
-+	.mask	0x8000f008,-$SZREG
-+	.set	noreorder
-+	$PTR_SUB $sp,6*$SZREG
-+	$REG_S	$ra,5*$SZREG($sp)
-+	$REG_S	$t3,4*$SZREG($sp)
-+	$REG_S	$t2,3*$SZREG($sp)
-+	$REG_S	$t1,2*$SZREG($sp)
-+	$REG_S	$t0,1*$SZREG($sp)
-+	$REG_S	$gp,0*$SZREG($sp)
-+___
-+$code.=<<___;
-+	.set	reorder
-+	move	$ta3,$ra
-+	bal	bn_div_words
-+	move	$ra,$ta3
-+	$MULTU	$ta2,$v0
-+	$LD	$t2,-2*$BNSZ($a3)
-+	move	$ta0,$zero
-+	mfhi	$t1
-+	mflo	$t0
-+	sltu	$t8,$t1,$a1
-+.L_bn_div_3_words_inner_loop:
-+	bnez	$t8,.L_bn_div_3_words_inner_loop_done
-+	sgeu	$at,$t2,$t0
-+	seq	$t9,$t1,$a1
-+	and	$at,$t9
-+	sltu	$t3,$t0,$ta2
-+	$ADDU	$a1,$a2
-+	$SUBU	$t1,$t3
-+	$SUBU	$t0,$ta2
-+	sltu	$t8,$t1,$a1
-+	sltu	$ta0,$a1,$a2
-+	or	$t8,$ta0
-+	.set	noreorder
-+	beqzl	$at,.L_bn_div_3_words_inner_loop
-+	$SUBU	$v0,1
-+	.set	reorder
-+.L_bn_div_3_words_inner_loop_done:
-+	.set	noreorder
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+	$REG_L	$t3,4*$SZREG($sp)
-+	$REG_L	$t2,3*$SZREG($sp)
-+	$REG_L	$t1,2*$SZREG($sp)
-+	$REG_L	$t0,1*$SZREG($sp)
-+	$REG_L	$gp,0*$SZREG($sp)
-+	$PTR_ADD $sp,6*$SZREG
-+___
-+$code.=<<___;
-+	jr	$ra
-+	move	$a0,$v0
-+.end	bn_div_3_words_internal
-+
-+.align	5
-+.globl	bn_div_words
-+.ent	bn_div_words
-+bn_div_words:
-+	.set	noreorder
-+	bnez	$a2,bn_div_words_internal
-+	li	$v0,-1		# I would rather signal div-by-zero
-+				# which can be done with 'break 7'
-+	jr	$ra
-+	move	$a0,$v0
-+.end	bn_div_words
-+
-+.align	5
-+.ent	bn_div_words_internal
-+bn_div_words_internal:
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+	.frame	$sp,6*$SZREG,$ra
-+	.mask	0x8000f008,-$SZREG
-+	.set	noreorder
-+	$PTR_SUB $sp,6*$SZREG
-+	$REG_S	$ra,5*$SZREG($sp)
-+	$REG_S	$t3,4*$SZREG($sp)
-+	$REG_S	$t2,3*$SZREG($sp)
-+	$REG_S	$t1,2*$SZREG($sp)
-+	$REG_S	$t0,1*$SZREG($sp)
-+	$REG_S	$gp,0*$SZREG($sp)
-+___
-+$code.=<<___;
-+	move	$v1,$zero
-+	bltz	$a2,.L_bn_div_words_body
-+	move	$t9,$v1
-+	$SLL	$a2,1
-+	bgtz	$a2,.-4
-+	addu	$t9,1
-+
-+	.set	reorder
-+	negu	$t1,$t9
-+	li	$t2,-1
-+	$SLL	$t2,$t1
-+	and	$t2,$a0
-+	$SRL	$at,$a1,$t1
-+	.set	noreorder
-+	bnezl	$t2,.+8
-+	break	6		# signal overflow
-+	.set	reorder
-+	$SLL	$a0,$t9
-+	$SLL	$a1,$t9
-+	or	$a0,$at
-+___
-+$QT=$ta0;
-+$HH=$ta1;
-+$DH=$v1;
-+$code.=<<___;
-+.L_bn_div_words_body:
-+	$SRL	$DH,$a2,4*$BNSZ	# bits
-+	sgeu	$at,$a0,$a2
-+	.set	noreorder
-+	bnezl	$at,.+8
-+	$SUBU	$a0,$a2
-+	.set	reorder
-+
-+	li	$QT,-1
-+	$SRL	$HH,$a0,4*$BNSZ	# bits
-+	$SRL	$QT,4*$BNSZ	# q=0xffffffff
-+	beq	$DH,$HH,.L_bn_div_words_skip_div1
-+	$DIVU	$zero,$a0,$DH
-+	mflo	$QT
-+.L_bn_div_words_skip_div1:
-+	$MULTU	$a2,$QT
-+	$SLL	$t3,$a0,4*$BNSZ	# bits
-+	$SRL	$at,$a1,4*$BNSZ	# bits
-+	or	$t3,$at
-+	mflo	$t0
-+	mfhi	$t1
-+.L_bn_div_words_inner_loop1:
-+	sltu	$t2,$t3,$t0
-+	seq	$t8,$HH,$t1
-+	sltu	$at,$HH,$t1
-+	and	$t2,$t8
-+	sltu	$v0,$t0,$a2
-+	or	$at,$t2
-+	.set	noreorder
-+	beqz	$at,.L_bn_div_words_inner_loop1_done
-+	$SUBU	$t1,$v0
-+	$SUBU	$t0,$a2
-+	b	.L_bn_div_words_inner_loop1
-+	$SUBU	$QT,1
-+	.set	reorder
-+.L_bn_div_words_inner_loop1_done:
-+
-+	$SLL	$a1,4*$BNSZ	# bits
-+	$SUBU	$a0,$t3,$t0
-+	$SLL	$v0,$QT,4*$BNSZ	# bits
-+
-+	li	$QT,-1
-+	$SRL	$HH,$a0,4*$BNSZ	# bits
-+	$SRL	$QT,4*$BNSZ	# q=0xffffffff
-+	beq	$DH,$HH,.L_bn_div_words_skip_div2
-+	$DIVU	$zero,$a0,$DH
-+	mflo	$QT
-+.L_bn_div_words_skip_div2:
-+	$MULTU	$a2,$QT
-+	$SLL	$t3,$a0,4*$BNSZ	# bits
-+	$SRL	$at,$a1,4*$BNSZ	# bits
-+	or	$t3,$at
-+	mflo	$t0
-+	mfhi	$t1
-+.L_bn_div_words_inner_loop2:
-+	sltu	$t2,$t3,$t0
-+	seq	$t8,$HH,$t1
-+	sltu	$at,$HH,$t1
-+	and	$t2,$t8
-+	sltu	$v1,$t0,$a2
-+	or	$at,$t2
-+	.set	noreorder
-+	beqz	$at,.L_bn_div_words_inner_loop2_done
-+	$SUBU	$t1,$v1
-+	$SUBU	$t0,$a2
-+	b	.L_bn_div_words_inner_loop2
-+	$SUBU	$QT,1
-+	.set	reorder
-+.L_bn_div_words_inner_loop2_done:
-+
-+	$SUBU	$a0,$t3,$t0
-+	or	$v0,$QT
-+	$SRL	$v1,$a0,$t9	# $v1 contains remainder if anybody wants it
-+	$SRL	$a2,$t9		# restore $a2
-+
-+	.set	noreorder
-+	move	$a1,$v1
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+	$REG_L	$t3,4*$SZREG($sp)
-+	$REG_L	$t2,3*$SZREG($sp)
-+	$REG_L	$t1,2*$SZREG($sp)
-+	$REG_L	$t0,1*$SZREG($sp)
-+	$REG_L	$gp,0*$SZREG($sp)
-+	$PTR_ADD $sp,6*$SZREG
-+___
-+$code.=<<___;
-+	jr	$ra
-+	move	$a0,$v0
-+.end	bn_div_words_internal
-+___
-+undef $HH; undef $QT; undef $DH;
-+
-+($a_0,$a_1,$a_2,$a_3)=($t0,$t1,$t2,$t3);
-+($b_0,$b_1,$b_2,$b_3)=($ta0,$ta1,$ta2,$ta3);
-+
-+($a_4,$a_5,$a_6,$a_7)=($s0,$s2,$s4,$a1); # once we load a[7], no use for $a1
-+($b_4,$b_5,$b_6,$b_7)=($s1,$s3,$s5,$a2); # once we load b[7], no use for $a2
-+
-+($t_1,$t_2,$c_1,$c_2,$c_3)=($t8,$t9,$v0,$v1,$a3);
-+
-+$code.=<<___;
-+
-+.align	5
-+.globl	bn_mul_comba8
-+.ent	bn_mul_comba8
-+bn_mul_comba8:
-+	.set	noreorder
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+	.frame	$sp,12*$SZREG,$ra
-+	.mask	0x803ff008,-$SZREG
-+	$PTR_SUB $sp,12*$SZREG
-+	$REG_S	$ra,11*$SZREG($sp)
-+	$REG_S	$s5,10*$SZREG($sp)
-+	$REG_S	$s4,9*$SZREG($sp)
-+	$REG_S	$s3,8*$SZREG($sp)
-+	$REG_S	$s2,7*$SZREG($sp)
-+	$REG_S	$s1,6*$SZREG($sp)
-+	$REG_S	$s0,5*$SZREG($sp)
-+	$REG_S	$t3,4*$SZREG($sp)
-+	$REG_S	$t2,3*$SZREG($sp)
-+	$REG_S	$t1,2*$SZREG($sp)
-+	$REG_S	$t0,1*$SZREG($sp)
-+	$REG_S	$gp,0*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour !~ /nubi/i);
-+	.frame	$sp,6*$SZREG,$ra
-+	.mask	0x003f0000,-$SZREG
-+	$PTR_SUB $sp,6*$SZREG
-+	$REG_S	$s5,5*$SZREG($sp)
-+	$REG_S	$s4,4*$SZREG($sp)
-+	$REG_S	$s3,3*$SZREG($sp)
-+	$REG_S	$s2,2*$SZREG($sp)
-+	$REG_S	$s1,1*$SZREG($sp)
-+	$REG_S	$s0,0*$SZREG($sp)
-+___
-+$code.=<<___;
-+
-+	.set	reorder
-+	$LD	$a_0,0($a1)	# If compiled with -mips3 option on
-+				# R5000 box assembler barks on this
-+				# 1ine with "should not have mult/div
-+				# as last instruction in bb (R10K
-+				# bug)" warning. If anybody out there
-+				# has a clue about how to circumvent
-+				# this do send me a note.
-+				#		<appro\@fy.chalmers.se>
-+
-+	$LD	$b_0,0($a2)
-+	$LD	$a_1,$BNSZ($a1)
-+	$LD	$a_2,2*$BNSZ($a1)
-+	$MULTU	$a_0,$b_0		# mul_add_c(a[0],b[0],c1,c2,c3);
-+	$LD	$a_3,3*$BNSZ($a1)
-+	$LD	$b_1,$BNSZ($a2)
-+	$LD	$b_2,2*$BNSZ($a2)
-+	$LD	$b_3,3*$BNSZ($a2)
-+	mflo	$c_1
-+	mfhi	$c_2
-+
-+	$LD	$a_4,4*$BNSZ($a1)
-+	$LD	$a_5,5*$BNSZ($a1)
-+	$MULTU	$a_0,$b_1		# mul_add_c(a[0],b[1],c2,c3,c1);
-+	$LD	$a_6,6*$BNSZ($a1)
-+	$LD	$a_7,7*$BNSZ($a1)
-+	$LD	$b_4,4*$BNSZ($a2)
-+	$LD	$b_5,5*$BNSZ($a2)
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$MULTU	$a_1,$b_0		# mul_add_c(a[1],b[0],c2,c3,c1);
-+	$ADDU	$c_3,$t_2,$at
-+	$LD	$b_6,6*$BNSZ($a2)
-+	$LD	$b_7,7*$BNSZ($a2)
-+	$ST	$c_1,0($a0)	# r[0]=c1;
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	 $MULTU	$a_2,$b_0		# mul_add_c(a[2],b[0],c3,c1,c2);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$c_1,$c_3,$t_2
-+	$ST	$c_2,$BNSZ($a0)	# r[1]=c2;
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$MULTU	$a_1,$b_1		# mul_add_c(a[1],b[1],c3,c1,c2);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$MULTU	$a_0,$b_2		# mul_add_c(a[0],b[2],c3,c1,c2);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$c_2,$c_1,$t_2
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	 $MULTU	$a_0,$b_3		# mul_add_c(a[0],b[3],c1,c2,c3);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$at,$c_1,$t_2
-+	$ADDU	$c_2,$at
-+	$ST	$c_3,2*$BNSZ($a0)	# r[2]=c3;
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$MULTU	$a_1,$b_2		# mul_add_c(a[1],b[2],c1,c2,c3);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$c_3,$c_2,$t_2
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$MULTU	$a_2,$b_1		# mul_add_c(a[2],b[1],c1,c2,c3);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$MULTU	$a_3,$b_0		# mul_add_c(a[3],b[0],c1,c2,c3);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	 $MULTU	$a_4,$b_0		# mul_add_c(a[4],b[0],c2,c3,c1);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	$ST	$c_1,3*$BNSZ($a0)	# r[3]=c1;
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$MULTU	$a_3,$b_1		# mul_add_c(a[3],b[1],c2,c3,c1);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$c_1,$c_3,$t_2
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$MULTU	$a_2,$b_2		# mul_add_c(a[2],b[2],c2,c3,c1);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$at,$c_3,$t_2
-+	$ADDU	$c_1,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$MULTU	$a_1,$b_3		# mul_add_c(a[1],b[3],c2,c3,c1);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$at,$c_3,$t_2
-+	$ADDU	$c_1,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$MULTU	$a_0,$b_4		# mul_add_c(a[0],b[4],c2,c3,c1);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$at,$c_3,$t_2
-+	$ADDU	$c_1,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	 $MULTU	$a_0,$b_5		# mul_add_c(a[0],b[5],c3,c1,c2);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$at,$c_3,$t_2
-+	$ADDU	$c_1,$at
-+	$ST	$c_2,4*$BNSZ($a0)	# r[4]=c2;
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$MULTU	$a_1,$b_4		# mul_add_c(a[1],b[4],c3,c1,c2);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$c_2,$c_1,$t_2
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$MULTU	$a_2,$b_3		# mul_add_c(a[2],b[3],c3,c1,c2);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$at,$c_1,$t_2
-+	$ADDU	$c_2,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$MULTU	$a_3,$b_2		# mul_add_c(a[3],b[2],c3,c1,c2);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$at,$c_1,$t_2
-+	$ADDU	$c_2,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$MULTU	$a_4,$b_1		# mul_add_c(a[4],b[1],c3,c1,c2);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$at,$c_1,$t_2
-+	$ADDU	$c_2,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$MULTU	$a_5,$b_0		# mul_add_c(a[5],b[0],c3,c1,c2);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$at,$c_1,$t_2
-+	$ADDU	$c_2,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	 $MULTU	$a_6,$b_0		# mul_add_c(a[6],b[0],c1,c2,c3);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$at,$c_1,$t_2
-+	$ADDU	$c_2,$at
-+	$ST	$c_3,5*$BNSZ($a0)	# r[5]=c3;
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$MULTU	$a_5,$b_1		# mul_add_c(a[5],b[1],c1,c2,c3);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$c_3,$c_2,$t_2
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$MULTU	$a_4,$b_2		# mul_add_c(a[4],b[2],c1,c2,c3);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$MULTU	$a_3,$b_3		# mul_add_c(a[3],b[3],c1,c2,c3);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$MULTU	$a_2,$b_4		# mul_add_c(a[2],b[4],c1,c2,c3);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$MULTU	$a_1,$b_5		# mul_add_c(a[1],b[5],c1,c2,c3);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$MULTU	$a_0,$b_6		# mul_add_c(a[0],b[6],c1,c2,c3);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	 $MULTU	$a_0,$b_7		# mul_add_c(a[0],b[7],c2,c3,c1);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	$ST	$c_1,6*$BNSZ($a0)	# r[6]=c1;
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$MULTU	$a_1,$b_6		# mul_add_c(a[1],b[6],c2,c3,c1);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$c_1,$c_3,$t_2
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$MULTU	$a_2,$b_5		# mul_add_c(a[2],b[5],c2,c3,c1);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$at,$c_3,$t_2
-+	$ADDU	$c_1,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$MULTU	$a_3,$b_4		# mul_add_c(a[3],b[4],c2,c3,c1);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$at,$c_3,$t_2
-+	$ADDU	$c_1,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$MULTU	$a_4,$b_3		# mul_add_c(a[4],b[3],c2,c3,c1);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$at,$c_3,$t_2
-+	$ADDU	$c_1,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$MULTU	$a_5,$b_2		# mul_add_c(a[5],b[2],c2,c3,c1);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$at,$c_3,$t_2
-+	$ADDU	$c_1,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$MULTU	$a_6,$b_1		# mul_add_c(a[6],b[1],c2,c3,c1);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$at,$c_3,$t_2
-+	$ADDU	$c_1,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$MULTU	$a_7,$b_0		# mul_add_c(a[7],b[0],c2,c3,c1);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$at,$c_3,$t_2
-+	$ADDU	$c_1,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	 $MULTU	$a_7,$b_1		# mul_add_c(a[7],b[1],c3,c1,c2);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$at,$c_3,$t_2
-+	$ADDU	$c_1,$at
-+	$ST	$c_2,7*$BNSZ($a0)	# r[7]=c2;
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$MULTU	$a_6,$b_2		# mul_add_c(a[6],b[2],c3,c1,c2);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$c_2,$c_1,$t_2
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$MULTU	$a_5,$b_3		# mul_add_c(a[5],b[3],c3,c1,c2);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$at,$c_1,$t_2
-+	$ADDU	$c_2,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$MULTU	$a_4,$b_4		# mul_add_c(a[4],b[4],c3,c1,c2);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$at,$c_1,$t_2
-+	$ADDU	$c_2,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$MULTU	$a_3,$b_5		# mul_add_c(a[3],b[5],c3,c1,c2);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$at,$c_1,$t_2
-+	$ADDU	$c_2,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$MULTU	$a_2,$b_6		# mul_add_c(a[2],b[6],c3,c1,c2);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$at,$c_1,$t_2
-+	$ADDU	$c_2,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$MULTU	$a_1,$b_7		# mul_add_c(a[1],b[7],c3,c1,c2);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$at,$c_1,$t_2
-+	$ADDU	$c_2,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	 $MULTU	$a_2,$b_7		# mul_add_c(a[2],b[7],c1,c2,c3);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$at,$c_1,$t_2
-+	$ADDU	$c_2,$at
-+	$ST	$c_3,8*$BNSZ($a0)	# r[8]=c3;
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$MULTU	$a_3,$b_6		# mul_add_c(a[3],b[6],c1,c2,c3);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$c_3,$c_2,$t_2
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$MULTU	$a_4,$b_5		# mul_add_c(a[4],b[5],c1,c2,c3);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$MULTU	$a_5,$b_4		# mul_add_c(a[5],b[4],c1,c2,c3);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$MULTU	$a_6,$b_3		# mul_add_c(a[6],b[3],c1,c2,c3);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$MULTU	$a_7,$b_2		# mul_add_c(a[7],b[2],c1,c2,c3);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	 $MULTU	$a_7,$b_3		# mul_add_c(a[7],b[3],c2,c3,c1);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	$ST	$c_1,9*$BNSZ($a0)	# r[9]=c1;
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$MULTU	$a_6,$b_4		# mul_add_c(a[6],b[4],c2,c3,c1);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$c_1,$c_3,$t_2
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$MULTU	$a_5,$b_5		# mul_add_c(a[5],b[5],c2,c3,c1);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$at,$c_3,$t_2
-+	$ADDU	$c_1,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$MULTU	$a_4,$b_6		# mul_add_c(a[4],b[6],c2,c3,c1);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$at,$c_3,$t_2
-+	$ADDU	$c_1,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$MULTU	$a_3,$b_7		# mul_add_c(a[3],b[7],c2,c3,c1);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$at,$c_3,$t_2
-+	$ADDU	$c_1,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$MULTU	$a_4,$b_7		# mul_add_c(a[4],b[7],c3,c1,c2);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$at,$c_3,$t_2
-+	$ADDU	$c_1,$at
-+	$ST	$c_2,10*$BNSZ($a0)	# r[10]=c2;
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$MULTU	$a_5,$b_6		# mul_add_c(a[5],b[6],c3,c1,c2);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$c_2,$c_1,$t_2
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$MULTU	$a_6,$b_5		# mul_add_c(a[6],b[5],c3,c1,c2);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$at,$c_1,$t_2
-+	$ADDU	$c_2,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$MULTU	$a_7,$b_4		# mul_add_c(a[7],b[4],c3,c1,c2);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$at,$c_1,$t_2
-+	$ADDU	$c_2,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	 $MULTU	$a_7,$b_5		# mul_add_c(a[7],b[5],c1,c2,c3);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$at,$c_1,$t_2
-+	$ADDU	$c_2,$at
-+	$ST	$c_3,11*$BNSZ($a0)	# r[11]=c3;
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$MULTU	$a_6,$b_6		# mul_add_c(a[6],b[6],c1,c2,c3);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$c_3,$c_2,$t_2
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$MULTU	$a_5,$b_7		# mul_add_c(a[5],b[7],c1,c2,c3);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	 $MULTU	$a_6,$b_7		# mul_add_c(a[6],b[7],c2,c3,c1);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	$ST	$c_1,12*$BNSZ($a0)	# r[12]=c1;
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$MULTU	$a_7,$b_6		# mul_add_c(a[7],b[6],c2,c3,c1);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$c_1,$c_3,$t_2
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$MULTU	$a_7,$b_7		# mul_add_c(a[7],b[7],c3,c1,c2);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$at,$c_3,$t_2
-+	$ADDU	$c_1,$at
-+	$ST	$c_2,13*$BNSZ($a0)	# r[13]=c2;
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	$ST	$c_3,14*$BNSZ($a0)	# r[14]=c3;
-+	$ST	$c_1,15*$BNSZ($a0)	# r[15]=c1;
-+
-+	.set	noreorder
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+	$REG_L	$s5,10*$SZREG($sp)
-+	$REG_L	$s4,9*$SZREG($sp)
-+	$REG_L	$s3,8*$SZREG($sp)
-+	$REG_L	$s2,7*$SZREG($sp)
-+	$REG_L	$s1,6*$SZREG($sp)
-+	$REG_L	$s0,5*$SZREG($sp)
-+	$REG_L	$t3,4*$SZREG($sp)
-+	$REG_L	$t2,3*$SZREG($sp)
-+	$REG_L	$t1,2*$SZREG($sp)
-+	$REG_L	$t0,1*$SZREG($sp)
-+	$REG_L	$gp,0*$SZREG($sp)
-+	jr	$ra
-+	$PTR_ADD $sp,12*$SZREG
-+___
-+$code.=<<___ if ($flavour !~ /nubi/i);
-+	$REG_L	$s5,5*$SZREG($sp)
-+	$REG_L	$s4,4*$SZREG($sp)
-+	$REG_L	$s3,3*$SZREG($sp)
-+	$REG_L	$s2,2*$SZREG($sp)
-+	$REG_L	$s1,1*$SZREG($sp)
-+	$REG_L	$s0,0*$SZREG($sp)
-+	jr	$ra
-+	$PTR_ADD $sp,6*$SZREG
-+___
-+$code.=<<___;
-+.end	bn_mul_comba8
-+
-+.align	5
-+.globl	bn_mul_comba4
-+.ent	bn_mul_comba4
-+bn_mul_comba4:
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+	.frame	$sp,6*$SZREG,$ra
-+	.mask	0x8000f008,-$SZREG
-+	.set	noreorder
-+	$PTR_SUB $sp,6*$SZREG
-+	$REG_S	$ra,5*$SZREG($sp)
-+	$REG_S	$t3,4*$SZREG($sp)
-+	$REG_S	$t2,3*$SZREG($sp)
-+	$REG_S	$t1,2*$SZREG($sp)
-+	$REG_S	$t0,1*$SZREG($sp)
-+	$REG_S	$gp,0*$SZREG($sp)
-+___
-+$code.=<<___;
-+	.set	reorder
-+	$LD	$a_0,0($a1)
-+	$LD	$b_0,0($a2)
-+	$LD	$a_1,$BNSZ($a1)
-+	$LD	$a_2,2*$BNSZ($a1)
-+	$MULTU	$a_0,$b_0		# mul_add_c(a[0],b[0],c1,c2,c3);
-+	$LD	$a_3,3*$BNSZ($a1)
-+	$LD	$b_1,$BNSZ($a2)
-+	$LD	$b_2,2*$BNSZ($a2)
-+	$LD	$b_3,3*$BNSZ($a2)
-+	mflo	$c_1
-+	mfhi	$c_2
-+	$ST	$c_1,0($a0)
-+
-+	$MULTU	$a_0,$b_1		# mul_add_c(a[0],b[1],c2,c3,c1);
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$MULTU	$a_1,$b_0		# mul_add_c(a[1],b[0],c2,c3,c1);
-+	$ADDU	$c_3,$t_2,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	 $MULTU	$a_2,$b_0		# mul_add_c(a[2],b[0],c3,c1,c2);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$c_1,$c_3,$t_2
-+	$ST	$c_2,$BNSZ($a0)
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$MULTU	$a_1,$b_1		# mul_add_c(a[1],b[1],c3,c1,c2);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$MULTU	$a_0,$b_2		# mul_add_c(a[0],b[2],c3,c1,c2);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$c_2,$c_1,$t_2
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	 $MULTU	$a_0,$b_3		# mul_add_c(a[0],b[3],c1,c2,c3);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$at,$c_1,$t_2
-+	$ADDU	$c_2,$at
-+	$ST	$c_3,2*$BNSZ($a0)
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$MULTU	$a_1,$b_2		# mul_add_c(a[1],b[2],c1,c2,c3);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$c_3,$c_2,$t_2
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$MULTU	$a_2,$b_1		# mul_add_c(a[2],b[1],c1,c2,c3);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$MULTU	$a_3,$b_0		# mul_add_c(a[3],b[0],c1,c2,c3);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	 $MULTU	$a_3,$b_1		# mul_add_c(a[3],b[1],c2,c3,c1);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	$ST	$c_1,3*$BNSZ($a0)
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$MULTU	$a_2,$b_2		# mul_add_c(a[2],b[2],c2,c3,c1);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$c_1,$c_3,$t_2
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$MULTU	$a_1,$b_3		# mul_add_c(a[1],b[3],c2,c3,c1);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$at,$c_3,$t_2
-+	$ADDU	$c_1,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	 $MULTU	$a_2,$b_3		# mul_add_c(a[2],b[3],c3,c1,c2);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$at,$c_3,$t_2
-+	$ADDU	$c_1,$at
-+	$ST	$c_2,4*$BNSZ($a0)
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$MULTU	$a_3,$b_2		# mul_add_c(a[3],b[2],c3,c1,c2);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$c_2,$c_1,$t_2
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	 $MULTU	$a_3,$b_3		# mul_add_c(a[3],b[3],c1,c2,c3);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$at,$c_1,$t_2
-+	$ADDU	$c_2,$at
-+	$ST	$c_3,5*$BNSZ($a0)
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	$ST	$c_1,6*$BNSZ($a0)
-+	$ST	$c_2,7*$BNSZ($a0)
-+
-+	.set	noreorder
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+	$REG_L	$t3,4*$SZREG($sp)
-+	$REG_L	$t2,3*$SZREG($sp)
-+	$REG_L	$t1,2*$SZREG($sp)
-+	$REG_L	$t0,1*$SZREG($sp)
-+	$REG_L	$gp,0*$SZREG($sp)
-+	$PTR_ADD $sp,6*$SZREG
-+___
-+$code.=<<___;
-+	jr	$ra
-+	nop
-+.end	bn_mul_comba4
-+___
-+
-+($a_4,$a_5,$a_6,$a_7)=($b_0,$b_1,$b_2,$b_3);
-+
-+$code.=<<___;
-+
-+.align	5
-+.globl	bn_sqr_comba8
-+.ent	bn_sqr_comba8
-+bn_sqr_comba8:
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+	.frame	$sp,6*$SZREG,$ra
-+	.mask	0x8000f008,-$SZREG
-+	.set	noreorder
-+	$PTR_SUB $sp,6*$SZREG
-+	$REG_S	$ra,5*$SZREG($sp)
-+	$REG_S	$t3,4*$SZREG($sp)
-+	$REG_S	$t2,3*$SZREG($sp)
-+	$REG_S	$t1,2*$SZREG($sp)
-+	$REG_S	$t0,1*$SZREG($sp)
-+	$REG_S	$gp,0*$SZREG($sp)
-+___
-+$code.=<<___;
-+	.set	reorder
-+	$LD	$a_0,0($a1)
-+	$LD	$a_1,$BNSZ($a1)
-+	$LD	$a_2,2*$BNSZ($a1)
-+	$LD	$a_3,3*$BNSZ($a1)
-+
-+	$MULTU	$a_0,$a_0		# mul_add_c(a[0],b[0],c1,c2,c3);
-+	$LD	$a_4,4*$BNSZ($a1)
-+	$LD	$a_5,5*$BNSZ($a1)
-+	$LD	$a_6,6*$BNSZ($a1)
-+	$LD	$a_7,7*$BNSZ($a1)
-+	mflo	$c_1
-+	mfhi	$c_2
-+	$ST	$c_1,0($a0)
-+
-+	$MULTU	$a_0,$a_1		# mul_add_c2(a[0],b[1],c2,c3,c1);
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$c_1,$t_2,$zero
-+	$SLL	$t_2,1
-+	 $MULTU	$a_2,$a_0		# mul_add_c2(a[2],b[0],c3,c1,c2);
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$ADDU	$c_3,$t_2,$at
-+	$ST	$c_2,$BNSZ($a0)
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$c_2,$t_2,$zero
-+	$SLL	$t_2,1
-+	$MULTU	$a_1,$a_1		# mul_add_c(a[1],b[1],c3,c1,c2);
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$at,$c_1,$t_2
-+	$ADDU	$c_2,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	 $MULTU	$a_0,$a_3		# mul_add_c2(a[0],b[3],c1,c2,c3);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$at,$c_1,$t_2
-+	$ADDU	$c_2,$at
-+	$ST	$c_3,2*$BNSZ($a0)
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$c_3,$t_2,$zero
-+	$SLL	$t_2,1
-+	$MULTU	$a_1,$a_2		# mul_add_c2(a[1],b[2],c1,c2,c3);
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$at,$t_2,$zero
-+	$ADDU	$c_3,$at
-+	 $MULTU	$a_4,$a_0		# mul_add_c2(a[4],b[0],c2,c3,c1);
-+	$SLL	$t_2,1
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	$ST	$c_1,3*$BNSZ($a0)
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$c_1,$t_2,$zero
-+	$SLL	$t_2,1
-+	$MULTU	$a_3,$a_1		# mul_add_c2(a[3],b[1],c2,c3,c1);
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$at,$c_3,$t_2
-+	$ADDU	$c_1,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$at,$t_2,$zero
-+	$ADDU	$c_1,$at
-+	$MULTU	$a_2,$a_2		# mul_add_c(a[2],b[2],c2,c3,c1);
-+	$SLL	$t_2,1
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$at,$c_3,$t_2
-+	$ADDU	$c_1,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	 $MULTU	$a_0,$a_5		# mul_add_c2(a[0],b[5],c3,c1,c2);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$at,$c_3,$t_2
-+	$ADDU	$c_1,$at
-+	$ST	$c_2,4*$BNSZ($a0)
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$c_2,$t_2,$zero
-+	$SLL	$t_2,1
-+	$MULTU	$a_1,$a_4		# mul_add_c2(a[1],b[4],c3,c1,c2);
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$at,$c_1,$t_2
-+	$ADDU	$c_2,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$at,$t_2,$zero
-+	$ADDU	$c_2,$at
-+	$MULTU	$a_2,$a_3		# mul_add_c2(a[2],b[3],c3,c1,c2);
-+	$SLL	$t_2,1
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$at,$c_1,$t_2
-+	$ADDU	$c_2,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$at,$t_2,$zero
-+	 $MULTU	$a_6,$a_0		# mul_add_c2(a[6],b[0],c1,c2,c3);
-+	$ADDU	$c_2,$at
-+	$SLL	$t_2,1
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$at,$c_1,$t_2
-+	$ADDU	$c_2,$at
-+	$ST	$c_3,5*$BNSZ($a0)
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$c_3,$t_2,$zero
-+	$SLL	$t_2,1
-+	$MULTU	$a_5,$a_1		# mul_add_c2(a[5],b[1],c1,c2,c3);
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$at,$t_2,$zero
-+	$ADDU	$c_3,$at
-+	$MULTU	$a_4,$a_2		# mul_add_c2(a[4],b[2],c1,c2,c3);
-+	$SLL	$t_2,1
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$at,$t_2,$zero
-+	$ADDU	$c_3,$at
-+	$MULTU	$a_3,$a_3		# mul_add_c(a[3],b[3],c1,c2,c3);
-+	$SLL	$t_2,1
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	 $MULTU	$a_0,$a_7		# mul_add_c2(a[0],b[7],c2,c3,c1);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	$ST	$c_1,6*$BNSZ($a0)
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$c_1,$t_2,$zero
-+	$SLL	$t_2,1
-+	$MULTU	$a_1,$a_6		# mul_add_c2(a[1],b[6],c2,c3,c1);
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$at,$c_3,$t_2
-+	$ADDU	$c_1,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$at,$t_2,$zero
-+	$ADDU	$c_1,$at
-+	$MULTU	$a_2,$a_5		# mul_add_c2(a[2],b[5],c2,c3,c1);
-+	$SLL	$t_2,1
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$at,$c_3,$t_2
-+	$ADDU	$c_1,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$at,$t_2,$zero
-+	$ADDU	$c_1,$at
-+	$MULTU	$a_3,$a_4		# mul_add_c2(a[3],b[4],c2,c3,c1);
-+	$SLL	$t_2,1
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$at,$c_3,$t_2
-+	$ADDU	$c_1,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$at,$t_2,$zero
-+	$ADDU	$c_1,$at
-+	 $MULTU	$a_7,$a_1		# mul_add_c2(a[7],b[1],c3,c1,c2);
-+	$SLL	$t_2,1
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$at,$c_3,$t_2
-+	$ADDU	$c_1,$at
-+	$ST	$c_2,7*$BNSZ($a0)
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$c_2,$t_2,$zero
-+	$SLL	$t_2,1
-+	$MULTU	$a_6,$a_2		# mul_add_c2(a[6],b[2],c3,c1,c2);
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$at,$c_1,$t_2
-+	$ADDU	$c_2,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$at,$t_2,$zero
-+	$ADDU	$c_2,$at
-+	$MULTU	$a_5,$a_3		# mul_add_c2(a[5],b[3],c3,c1,c2);
-+	$SLL	$t_2,1
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$at,$c_1,$t_2
-+	$ADDU	$c_2,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$at,$t_2,$zero
-+	$ADDU	$c_2,$at
-+	$MULTU	$a_4,$a_4		# mul_add_c(a[4],b[4],c3,c1,c2);
-+	$SLL	$t_2,1
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$at,$c_1,$t_2
-+	$ADDU	$c_2,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	 $MULTU	$a_2,$a_7		# mul_add_c2(a[2],b[7],c1,c2,c3);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$at,$c_1,$t_2
-+	$ADDU	$c_2,$at
-+	$ST	$c_3,8*$BNSZ($a0)
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$c_3,$t_2,$zero
-+	$SLL	$t_2,1
-+	$MULTU	$a_3,$a_6		# mul_add_c2(a[3],b[6],c1,c2,c3);
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$at,$t_2,$zero
-+	$ADDU	$c_3,$at
-+	$MULTU	$a_4,$a_5		# mul_add_c2(a[4],b[5],c1,c2,c3);
-+	$SLL	$t_2,1
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$at,$t_2,$zero
-+	$ADDU	$c_3,$at
-+	 $MULTU	$a_7,$a_3		# mul_add_c2(a[7],b[3],c2,c3,c1);
-+	$SLL	$t_2,1
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	$ST	$c_1,9*$BNSZ($a0)
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$c_1,$t_2,$zero
-+	$SLL	$t_2,1
-+	$MULTU	$a_6,$a_4		# mul_add_c2(a[6],b[4],c2,c3,c1);
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$at,$c_3,$t_2
-+	$ADDU	$c_1,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$at,$t_2,$zero
-+	$ADDU	$c_1,$at
-+	$MULTU	$a_5,$a_5		# mul_add_c(a[5],b[5],c2,c3,c1);
-+	$SLL	$t_2,1
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$at,$c_3,$t_2
-+	$ADDU	$c_1,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	 $MULTU	$a_4,$a_7		# mul_add_c2(a[4],b[7],c3,c1,c2);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$at,$c_3,$t_2
-+	$ADDU	$c_1,$at
-+	$ST	$c_2,10*$BNSZ($a0)
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$c_2,$t_2,$zero
-+	$SLL	$t_2,1
-+	$MULTU	$a_5,$a_6		# mul_add_c2(a[5],b[6],c3,c1,c2);
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$at,$c_1,$t_2
-+	$ADDU	$c_2,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$at,$t_2,$zero
-+	$ADDU	$c_2,$at
-+	 $MULTU	$a_7,$a_5		# mul_add_c2(a[7],b[5],c1,c2,c3);
-+	$SLL	$t_2,1
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$at,$c_1,$t_2
-+	$ADDU	$c_2,$at
-+	$ST	$c_3,11*$BNSZ($a0)
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$c_3,$t_2,$zero
-+	$SLL	$t_2,1
-+	$MULTU	$a_6,$a_6		# mul_add_c(a[6],b[6],c1,c2,c3);
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	 $MULTU	$a_6,$a_7		# mul_add_c2(a[6],b[7],c2,c3,c1);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	$ST	$c_1,12*$BNSZ($a0)
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$c_1,$t_2,$zero
-+	$SLL	$t_2,1
-+	 $MULTU	$a_7,$a_7		# mul_add_c(a[7],b[7],c3,c1,c2);
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$at,$c_3,$t_2
-+	$ADDU	$c_1,$at
-+	$ST	$c_2,13*$BNSZ($a0)
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	$ST	$c_3,14*$BNSZ($a0)
-+	$ST	$c_1,15*$BNSZ($a0)
-+
-+	.set	noreorder
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+	$REG_L	$t3,4*$SZREG($sp)
-+	$REG_L	$t2,3*$SZREG($sp)
-+	$REG_L	$t1,2*$SZREG($sp)
-+	$REG_L	$t0,1*$SZREG($sp)
-+	$REG_L	$gp,0*$SZREG($sp)
-+	$PTR_ADD $sp,6*$SZREG
-+___
-+$code.=<<___;
-+	jr	$ra
-+	nop
-+.end	bn_sqr_comba8
-+
-+.align	5
-+.globl	bn_sqr_comba4
-+.ent	bn_sqr_comba4
-+bn_sqr_comba4:
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+	.frame	$sp,6*$SZREG,$ra
-+	.mask	0x8000f008,-$SZREG
-+	.set	noreorder
-+	$PTR_SUB $sp,6*$SZREG
-+	$REG_S	$ra,5*$SZREG($sp)
-+	$REG_S	$t3,4*$SZREG($sp)
-+	$REG_S	$t2,3*$SZREG($sp)
-+	$REG_S	$t1,2*$SZREG($sp)
-+	$REG_S	$t0,1*$SZREG($sp)
-+	$REG_S	$gp,0*$SZREG($sp)
-+___
-+$code.=<<___;
-+	.set	reorder
-+	$LD	$a_0,0($a1)
-+	$LD	$a_1,$BNSZ($a1)
-+	$MULTU	$a_0,$a_0		# mul_add_c(a[0],b[0],c1,c2,c3);
-+	$LD	$a_2,2*$BNSZ($a1)
-+	$LD	$a_3,3*$BNSZ($a1)
-+	mflo	$c_1
-+	mfhi	$c_2
-+	$ST	$c_1,0($a0)
-+
-+	$MULTU	$a_0,$a_1		# mul_add_c2(a[0],b[1],c2,c3,c1);
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$c_1,$t_2,$zero
-+	$SLL	$t_2,1
-+	 $MULTU	$a_2,$a_0		# mul_add_c2(a[2],b[0],c3,c1,c2);
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$ADDU	$c_3,$t_2,$at
-+	$ST	$c_2,$BNSZ($a0)
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$c_2,$t_2,$zero
-+	$SLL	$t_2,1
-+	$MULTU	$a_1,$a_1		# mul_add_c(a[1],b[1],c3,c1,c2);
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$at,$c_1,$t_2
-+	$ADDU	$c_2,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	 $MULTU	$a_0,$a_3		# mul_add_c2(a[0],b[3],c1,c2,c3);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$at,$c_1,$t_2
-+	$ADDU	$c_2,$at
-+	$ST	$c_3,2*$BNSZ($a0)
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$c_3,$t_2,$zero
-+	$SLL	$t_2,1
-+	$MULTU	$a_1,$a_2		# mul_add_c(a2[1],b[2],c1,c2,c3);
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$at,$t_2,$zero
-+	$ADDU	$c_3,$at
-+	 $MULTU	$a_3,$a_1		# mul_add_c2(a[3],b[1],c2,c3,c1);
-+	$SLL	$t_2,1
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	sltu	$at,$c_2,$t_2
-+	$ADDU	$c_3,$at
-+	$ST	$c_1,3*$BNSZ($a0)
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$c_1,$t_2,$zero
-+	$SLL	$t_2,1
-+	$MULTU	$a_2,$a_2		# mul_add_c(a[2],b[2],c2,c3,c1);
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$at,$c_3,$t_2
-+	$ADDU	$c_1,$at
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_2,$t_1
-+	sltu	$at,$c_2,$t_1
-+	 $MULTU	$a_2,$a_3		# mul_add_c2(a[2],b[3],c3,c1,c2);
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_3,$t_2
-+	sltu	$at,$c_3,$t_2
-+	$ADDU	$c_1,$at
-+	$ST	$c_2,4*$BNSZ($a0)
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	slt	$c_2,$t_2,$zero
-+	$SLL	$t_2,1
-+	 $MULTU	$a_3,$a_3		# mul_add_c(a[3],b[3],c1,c2,c3);
-+	slt	$a2,$t_1,$zero
-+	$ADDU	$t_2,$a2
-+	$SLL	$t_1,1
-+	$ADDU	$c_3,$t_1
-+	sltu	$at,$c_3,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_1,$t_2
-+	sltu	$at,$c_1,$t_2
-+	$ADDU	$c_2,$at
-+	$ST	$c_3,5*$BNSZ($a0)
-+
-+	mflo	$t_1
-+	mfhi	$t_2
-+	$ADDU	$c_1,$t_1
-+	sltu	$at,$c_1,$t_1
-+	$ADDU	$t_2,$at
-+	$ADDU	$c_2,$t_2
-+	$ST	$c_1,6*$BNSZ($a0)
-+	$ST	$c_2,7*$BNSZ($a0)
-+
-+	.set	noreorder
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+	$REG_L	$t3,4*$SZREG($sp)
-+	$REG_L	$t2,3*$SZREG($sp)
-+	$REG_L	$t1,2*$SZREG($sp)
-+	$REG_L	$t0,1*$SZREG($sp)
-+	$REG_L	$gp,0*$SZREG($sp)
-+	$PTR_ADD $sp,6*$SZREG
-+___
-+$code.=<<___;
-+	jr	$ra
-+	nop
-+.end	bn_sqr_comba4
-+___
-+print $code;
-+close STDOUT;
-diff --git a/crypto/sha/asm/sha1-mips.pl b/crypto/sha/asm/sha1-mips.pl
-new file mode 100644
-index 0000000..f1a702f
---- /dev/null
-+++ b/crypto/sha/asm/sha1-mips.pl
-@@ -0,0 +1,354 @@
-+#!/usr/bin/env perl
-+
-+# ====================================================================
-+# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-+# project. The module is, however, dual licensed under OpenSSL and
-+# CRYPTOGAMS licenses depending on where you obtain it. For further
-+# details see http://www.openssl.org/~appro/cryptogams/.
-+# ====================================================================
-+
-+# SHA1 block procedure for MIPS.
-+
-+# Performance improvement is 30% on unaligned input. The "secret" is
-+# to deploy lwl/lwr pair to load unaligned input. One could have
-+# vectorized Xupdate on MIPSIII/IV, but the goal was to code MIPS32-
-+# compatible subroutine. There is room for minor optimization on
-+# little-endian platforms...
-+
-+######################################################################
-+# There is a number of MIPS ABI in use, O32 and N32/64 are most
-+# widely used. Then there is a new contender: NUBI. It appears that if
-+# one picks the latter, it's possible to arrange code in ABI neutral
-+# manner. Therefore let's stick to NUBI register layout:
-+#
-+($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
-+($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
-+($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
-+($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
-+#
-+# The return value is placed in $a0. Following coding rules facilitate
-+# interoperability:
-+#
-+# - never ever touch $tp, "thread pointer", former $gp;
-+# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
-+#   old code];
-+# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
-+#
-+# For reference here is register layout for N32/64 MIPS ABIs:
-+#
-+# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
-+# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
-+# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
-+# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
-+# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
-+#
-+$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64
-+
-+if ($flavour =~ /64|n32/i) {
-+	$PTR_ADD="dadd";	# incidentally works even on n32
-+	$PTR_SUB="dsub";	# incidentally works even on n32
-+	$REG_S="sd";
-+	$REG_L="ld";
-+	$PTR_SLL="dsll";	# incidentally works even on n32
-+	$SZREG=8;
-+} else {
-+	$PTR_ADD="add";
-+	$PTR_SUB="sub";
-+	$REG_S="sw";
-+	$REG_L="lw";
-+	$PTR_SLL="sll";
-+	$SZREG=4;
-+}
-+#
-+# <appro@openssl.org>
-+#
-+######################################################################
-+
-+$big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0;
-+
-+for (@ARGV) {	$output=$_ if (/^\w[\w\-]*\.\w+$/);   }
-+open STDOUT,">$output";
-+
-+if (!defined($big_endian))
-+            {   $big_endian=(unpack('L',pack('N',1))==1);   }
-+
-+# offsets of the Most and Least Significant Bytes
-+$MSB=$big_endian?0:3;
-+$LSB=3&~$MSB;
-+
-+@X=map("\$$_",(8..23));	# a4-a7,s0-s11
-+
-+$ctx=$a0;
-+$inp=$a1;
-+$num=$a2;
-+$A="\$1";
-+$B="\$2";
-+$C="\$3";
-+$D="\$7";
-+$E="\$24";	@V=($A,$B,$C,$D,$E);
-+$t0="\$25";
-+$t1=$num;	# $num is offloaded to stack
-+$t2="\$30";	# fp
-+$K="\$31";	# ra
-+
-+sub BODY_00_14 {
-+my ($i,$a,$b,$c,$d,$e)=@_;
-+my $j=$i+1;
-+$code.=<<___	if (!$big_endian);
-+	srl	$t0,@X[$i],24	# byte swap($i)
-+	srl	$t1,@X[$i],8
-+	andi	$t2,@X[$i],0xFF00
-+	sll	@X[$i],@X[$i],24
-+	andi	$t1,0xFF00
-+	sll	$t2,$t2,8
-+	or	@X[$i],$t0
-+	or	$t1,$t2
-+	or	@X[$i],$t1
-+___
-+$code.=<<___;
-+	 lwl	@X[$j],$j*4+$MSB($inp)
-+	sll	$t0,$a,5	# $i
-+	addu	$e,$K
-+	 lwr	@X[$j],$j*4+$LSB($inp)
-+	srl	$t1,$a,27
-+	addu	$e,$t0
-+	xor	$t0,$c,$d
-+	addu	$e,$t1
-+	sll	$t2,$b,30
-+	and	$t0,$b
-+	srl	$b,$b,2
-+	xor	$t0,$d
-+	addu	$e,@X[$i]
-+	or	$b,$t2
-+	addu	$e,$t0
-+___
-+}
-+
-+sub BODY_15_19 {
-+my ($i,$a,$b,$c,$d,$e)=@_;
-+my $j=$i+1;
-+
-+$code.=<<___	if (!$big_endian && $i==15);
-+	srl	$t0,@X[$i],24	# byte swap($i)
-+	srl	$t1,@X[$i],8
-+	andi	$t2,@X[$i],0xFF00
-+	sll	@X[$i],@X[$i],24
-+	andi	$t1,0xFF00
-+	sll	$t2,$t2,8
-+	or	@X[$i],$t0
-+	or	@X[$i],$t1
-+	or	@X[$i],$t2
-+___
-+$code.=<<___;
-+	 xor	@X[$j%16],@X[($j+2)%16]
-+	sll	$t0,$a,5	# $i
-+	addu	$e,$K
-+	srl	$t1,$a,27
-+	addu	$e,$t0
-+	 xor	@X[$j%16],@X[($j+8)%16]
-+	xor	$t0,$c,$d
-+	addu	$e,$t1
-+	 xor	@X[$j%16],@X[($j+13)%16]
-+	sll	$t2,$b,30
-+	and	$t0,$b
-+	 srl	$t1,@X[$j%16],31
-+	 addu	@X[$j%16],@X[$j%16]
-+	srl	$b,$b,2
-+	xor	$t0,$d
-+	 or	@X[$j%16],$t1
-+	addu	$e,@X[$i%16]
-+	or	$b,$t2
-+	addu	$e,$t0
-+___
-+}
-+
-+sub BODY_20_39 {
-+my ($i,$a,$b,$c,$d,$e)=@_;
-+my $j=$i+1;
-+$code.=<<___ if ($i<79);
-+	 xor	@X[$j%16],@X[($j+2)%16]
-+	sll	$t0,$a,5	# $i
-+	addu	$e,$K
-+	srl	$t1,$a,27
-+	addu	$e,$t0
-+	 xor	@X[$j%16],@X[($j+8)%16]
-+	xor	$t0,$c,$d
-+	addu	$e,$t1
-+	 xor	@X[$j%16],@X[($j+13)%16]
-+	sll	$t2,$b,30
-+	xor	$t0,$b
-+	 srl	$t1,@X[$j%16],31
-+	 addu	@X[$j%16],@X[$j%16]
-+	srl	$b,$b,2
-+	addu	$e,@X[$i%16]
-+	 or	@X[$j%16],$t1
-+	or	$b,$t2
-+	addu	$e,$t0
-+___
-+$code.=<<___ if ($i==79);
-+	 lw	@X[0],0($ctx)
-+	sll	$t0,$a,5	# $i
-+	addu	$e,$K
-+	 lw	@X[1],4($ctx)
-+	srl	$t1,$a,27
-+	addu	$e,$t0
-+	 lw	@X[2],8($ctx)
-+	xor	$t0,$c,$d
-+	addu	$e,$t1
-+	 lw	@X[3],12($ctx)
-+	sll	$t2,$b,30
-+	xor	$t0,$b
-+	 lw	@X[4],16($ctx)
-+	srl	$b,$b,2
-+	addu	$e,@X[$i%16]
-+	or	$b,$t2
-+	addu	$e,$t0
-+___
-+}
-+
-+sub BODY_40_59 {
-+my ($i,$a,$b,$c,$d,$e)=@_;
-+my $j=$i+1;
-+$code.=<<___ if ($i<79);
-+	 xor	@X[$j%16],@X[($j+2)%16]
-+	sll	$t0,$a,5	# $i
-+	addu	$e,$K
-+	srl	$t1,$a,27
-+	addu	$e,$t0
-+	 xor	@X[$j%16],@X[($j+8)%16]
-+	and	$t0,$c,$d
-+	addu	$e,$t1
-+	 xor	@X[$j%16],@X[($j+13)%16]
-+	sll	$t2,$b,30
-+	addu	$e,$t0
-+	 srl	$t1,@X[$j%16],31
-+	xor	$t0,$c,$d
-+	 addu	@X[$j%16],@X[$j%16]
-+	and	$t0,$b
-+	srl	$b,$b,2
-+	 or	@X[$j%16],$t1
-+	addu	$e,@X[$i%16]
-+	or	$b,$t2
-+	addu	$e,$t0
-+___
-+}
-+
-+$FRAMESIZE=16;	# large enough to accomodate NUBI saved registers
-+$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000;
-+
-+$code=<<___;
-+#ifdef OPENSSL_FIPSCANISTER
-+# include <openssl/fipssyms.h>
-+#endif
-+
-+.text
-+
-+.set	noat
-+.set	noreorder
-+.align	5
-+.globl	sha1_block_data_order
-+.ent	sha1_block_data_order
-+sha1_block_data_order:
-+	.frame	$sp,$FRAMESIZE*$SZREG,$ra
-+	.mask	$SAVED_REGS_MASK,-$SZREG
-+	.set	noreorder
-+	$PTR_SUB $sp,$FRAMESIZE*$SZREG
-+	$REG_S	$ra,($FRAMESIZE-1)*$SZREG($sp)
-+	$REG_S	$fp,($FRAMESIZE-2)*$SZREG($sp)
-+	$REG_S	$s11,($FRAMESIZE-3)*$SZREG($sp)
-+	$REG_S	$s10,($FRAMESIZE-4)*$SZREG($sp)
-+	$REG_S	$s9,($FRAMESIZE-5)*$SZREG($sp)
-+	$REG_S	$s8,($FRAMESIZE-6)*$SZREG($sp)
-+	$REG_S	$s7,($FRAMESIZE-7)*$SZREG($sp)
-+	$REG_S	$s6,($FRAMESIZE-8)*$SZREG($sp)
-+	$REG_S	$s5,($FRAMESIZE-9)*$SZREG($sp)
-+	$REG_S	$s4,($FRAMESIZE-10)*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
-+	$REG_S	$s3,($FRAMESIZE-11)*$SZREG($sp)
-+	$REG_S	$s2,($FRAMESIZE-12)*$SZREG($sp)
-+	$REG_S	$s1,($FRAMESIZE-13)*$SZREG($sp)
-+	$REG_S	$s0,($FRAMESIZE-14)*$SZREG($sp)
-+	$REG_S	$gp,($FRAMESIZE-15)*$SZREG($sp)
-+___
-+$code.=<<___;
-+	$PTR_SLL $num,6
-+	$PTR_ADD $num,$inp
-+	$REG_S	$num,0($sp)
-+	lw	$A,0($ctx)
-+	lw	$B,4($ctx)
-+	lw	$C,8($ctx)
-+	lw	$D,12($ctx)
-+	b	.Loop
-+	lw	$E,16($ctx)
-+.align	4
-+.Loop:
-+	.set	reorder
-+	lwl	@X[0],$MSB($inp)
-+	lui	$K,0x5a82
-+	lwr	@X[0],$LSB($inp)
-+	ori	$K,0x7999	# K_00_19
-+___
-+for ($i=0;$i<15;$i++)	{ &BODY_00_14($i,@V); unshift(@V,pop(@V)); }
-+for (;$i<20;$i++)	{ &BODY_15_19($i,@V); unshift(@V,pop(@V)); }
-+$code.=<<___;
-+	lui	$K,0x6ed9
-+	ori	$K,0xeba1	# K_20_39
-+___
-+for (;$i<40;$i++)	{ &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
-+$code.=<<___;
-+	lui	$K,0x8f1b
-+	ori	$K,0xbcdc	# K_40_59
-+___
-+for (;$i<60;$i++)	{ &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
-+$code.=<<___;
-+	lui	$K,0xca62
-+	ori	$K,0xc1d6	# K_60_79
-+___
-+for (;$i<80;$i++)	{ &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
-+$code.=<<___;
-+	$PTR_ADD $inp,64
-+	$REG_L	$num,0($sp)
-+
-+	addu	$A,$X[0]
-+	addu	$B,$X[1]
-+	sw	$A,0($ctx)
-+	addu	$C,$X[2]
-+	addu	$D,$X[3]
-+	sw	$B,4($ctx)
-+	addu	$E,$X[4]
-+	sw	$C,8($ctx)
-+	sw	$D,12($ctx)
-+	sw	$E,16($ctx)
-+	.set	noreorder
-+	bne	$inp,$num,.Loop
-+	nop
-+
-+	.set	noreorder
-+	$REG_L	$ra,($FRAMESIZE-1)*$SZREG($sp)
-+	$REG_L	$fp,($FRAMESIZE-2)*$SZREG($sp)
-+	$REG_L	$s11,($FRAMESIZE-3)*$SZREG($sp)
-+	$REG_L	$s10,($FRAMESIZE-4)*$SZREG($sp)
-+	$REG_L	$s9,($FRAMESIZE-5)*$SZREG($sp)
-+	$REG_L	$s8,($FRAMESIZE-6)*$SZREG($sp)
-+	$REG_L	$s7,($FRAMESIZE-7)*$SZREG($sp)
-+	$REG_L	$s6,($FRAMESIZE-8)*$SZREG($sp)
-+	$REG_L	$s5,($FRAMESIZE-9)*$SZREG($sp)
-+	$REG_L	$s4,($FRAMESIZE-10)*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+	$REG_L	$s3,($FRAMESIZE-11)*$SZREG($sp)
-+	$REG_L	$s2,($FRAMESIZE-12)*$SZREG($sp)
-+	$REG_L	$s1,($FRAMESIZE-13)*$SZREG($sp)
-+	$REG_L	$s0,($FRAMESIZE-14)*$SZREG($sp)
-+	$REG_L	$gp,($FRAMESIZE-15)*$SZREG($sp)
-+___
-+$code.=<<___;
-+	jr	$ra
-+	$PTR_ADD $sp,$FRAMESIZE*$SZREG
-+.end	sha1_block_data_order
-+.rdata
-+.asciiz	"SHA1 for MIPS, CRYPTOGAMS by <appro\@openssl.org>"
-+___
-+print $code;
-+close STDOUT;
-diff --git a/crypto/sha/asm/sha512-mips.pl b/crypto/sha/asm/sha512-mips.pl
-new file mode 100644
-index 0000000..ba5b250
---- /dev/null
-+++ b/crypto/sha/asm/sha512-mips.pl
-@@ -0,0 +1,455 @@
-+#!/usr/bin/env perl
-+
-+# ====================================================================
-+# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-+# project. The module is, however, dual licensed under OpenSSL and
-+# CRYPTOGAMS licenses depending on where you obtain it. For further
-+# details see http://www.openssl.org/~appro/cryptogams/.
-+# ====================================================================
-+
-+# SHA2 block procedures for MIPS.
-+
-+# October 2010.
-+#
-+# SHA256 performance improvement on MIPS R5000 CPU is ~27% over gcc-
-+# generated code in o32 build and ~55% in n32/64 build. SHA512 [which
-+# for now can only be compiled for MIPS64 ISA] improvement is modest
-+# ~17%, but it comes for free, because it's same instruction sequence.
-+# Improvement coefficients are for aligned input.
-+
-+######################################################################
-+# There is a number of MIPS ABI in use, O32 and N32/64 are most
-+# widely used. Then there is a new contender: NUBI. It appears that if
-+# one picks the latter, it's possible to arrange code in ABI neutral
-+# manner. Therefore let's stick to NUBI register layout:
-+#
-+($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
-+($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
-+($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
-+($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
-+#
-+# The return value is placed in $a0. Following coding rules facilitate
-+# interoperability:
-+#
-+# - never ever touch $tp, "thread pointer", former $gp [o32 can be
-+#   excluded from the rule, because it's specified volatile];
-+# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
-+#   old code];
-+# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
-+#
-+# For reference here is register layout for N32/64 MIPS ABIs:
-+#
-+# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
-+# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
-+# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
-+# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
-+# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
-+#
-+$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64
-+
-+if ($flavour =~ /64|n32/i) {
-+	$PTR_ADD="dadd";	# incidentally works even on n32
-+	$PTR_SUB="dsub";	# incidentally works even on n32
-+	$REG_S="sd";
-+	$REG_L="ld";
-+	$PTR_SLL="dsll";	# incidentally works even on n32
-+	$SZREG=8;
-+} else {
-+	$PTR_ADD="add";
-+	$PTR_SUB="sub";
-+	$REG_S="sw";
-+	$REG_L="lw";
-+	$PTR_SLL="sll";
-+	$SZREG=4;
-+}
-+$pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
-+#
-+# <appro@openssl.org>
-+#
-+######################################################################
-+
-+$big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0;
-+
-+for (@ARGV) {	$output=$_ if (/^\w[\w\-]*\.\w+$/);	}
-+open STDOUT,">$output";
-+
-+if (!defined($big_endian)) { $big_endian=(unpack('L',pack('N',1))==1); }
-+
-+if ($output =~ /512/) {
-+	$label="512";
-+	$SZ=8;
-+	$LD="ld";		# load from memory
-+	$ST="sd";		# store to memory
-+	$SLL="dsll";		# shift left logical
-+	$SRL="dsrl";		# shift right logical
-+	$ADDU="daddu";
-+	@Sigma0=(28,34,39);
-+	@Sigma1=(14,18,41);
-+	@sigma0=( 7, 1, 8);	# right shift first
-+	@sigma1=( 6,19,61);	# right shift first
-+	$lastK=0x817;
-+	$rounds=80;
-+} else {
-+	$label="256";
-+	$SZ=4;
-+	$LD="lw";		# load from memory
-+	$ST="sw";		# store to memory
-+	$SLL="sll";		# shift left logical
-+	$SRL="srl";		# shift right logical
-+	$ADDU="addu";
-+	@Sigma0=( 2,13,22);
-+	@Sigma1=( 6,11,25);
-+	@sigma0=( 3, 7,18);	# right shift first
-+	@sigma1=(10,17,19);	# right shift first
-+	$lastK=0x8f2;
-+	$rounds=64;
-+}
-+
-+$MSB = $big_endian ? 0 : ($SZ-1);
-+$LSB = ($SZ-1)&~$MSB;
-+
-+@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("\$$_",(1,2,3,7,24,25,30,31));
-+@X=map("\$$_",(8..23));
-+
-+$ctx=$a0;
-+$inp=$a1;
-+$len=$a2;	$Ktbl=$len;
-+
-+sub BODY_00_15 {
-+my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
-+my ($T1,$tmp0,$tmp1,$tmp2)=(@X[4],@X[5],@X[6],@X[7]);
-+
-+$code.=<<___ if ($i<15);
-+	${LD}l	@X[1],`($i+1)*$SZ+$MSB`($inp)
-+	${LD}r	@X[1],`($i+1)*$SZ+$LSB`($inp)
-+___
-+$code.=<<___	if (!$big_endian && $i<16 && $SZ==4);
-+	srl	$tmp0,@X[0],24		# byte swap($i)
-+	srl	$tmp1,@X[0],8
-+	andi	$tmp2,@X[0],0xFF00
-+	sll	@X[0],@X[0],24
-+	andi	$tmp1,0xFF00
-+	sll	$tmp2,$tmp2,8
-+	or	@X[0],$tmp0
-+	or	$tmp1,$tmp2
-+	or	@X[0],$tmp1
-+___
-+$code.=<<___	if (!$big_endian && $i<16 && $SZ==8);
-+	ori	$tmp0,$zero,0xFF
-+	dsll	$tmp2,$tmp0,32
-+	or	$tmp0,$tmp2		# 0x000000FF000000FF
-+	and	$tmp1,@X[0],$tmp0	# byte swap($i)
-+	dsrl	$tmp2,@X[0],24
-+	dsll	$tmp1,24
-+	and	$tmp2,$tmp0
-+	dsll	$tmp0,8			# 0x0000FF000000FF00
-+	or	$tmp1,$tmp2
-+	and	$tmp2,@X[0],$tmp0
-+	dsrl	@X[0],8
-+	dsll	$tmp2,8
-+	and	@X[0],$tmp0
-+	or	$tmp1,$tmp2
-+	or	@X[0],$tmp1
-+	dsrl	$tmp1,@X[0],32
-+	dsll	@X[0],32
-+	or	@X[0],$tmp1
-+___
-+$code.=<<___;
-+	$ADDU	$T1,$X[0],$h			# $i
-+	$SRL	$h,$e,@Sigma1[0]
-+	xor	$tmp2,$f,$g
-+	$SLL	$tmp1,$e,`$SZ*8-@Sigma1[2]`
-+	and	$tmp2,$e
-+	$SRL	$tmp0,$e,@Sigma1[1]
-+	xor	$h,$tmp1
-+	$SLL	$tmp1,$e,`$SZ*8-@Sigma1[1]`
-+	xor	$h,$tmp0
-+	$SRL	$tmp0,$e,@Sigma1[2]
-+	xor	$h,$tmp1
-+	$SLL	$tmp1,$e,`$SZ*8-@Sigma1[0]`
-+	xor	$h,$tmp0
-+	xor	$tmp2,$g			# Ch(e,f,g)
-+	xor	$tmp0,$tmp1,$h			# Sigma1(e)
-+
-+	$SRL	$h,$a,@Sigma0[0]
-+	$ADDU	$T1,$tmp2
-+	$LD	$tmp2,`$i*$SZ`($Ktbl)		# K[$i]
-+	$SLL	$tmp1,$a,`$SZ*8-@Sigma0[2]`
-+	$ADDU	$T1,$tmp0
-+	$SRL	$tmp0,$a,@Sigma0[1]
-+	xor	$h,$tmp1
-+	$SLL	$tmp1,$a,`$SZ*8-@Sigma0[1]`
-+	xor	$h,$tmp0
-+	$SRL	$tmp0,$a,@Sigma0[2]
-+	xor	$h,$tmp1
-+	$SLL	$tmp1,$a,`$SZ*8-@Sigma0[0]`
-+	xor	$h,$tmp0
-+	$ST	@X[0],`($i%16)*$SZ`($sp)	# offload to ring buffer
-+	xor	$h,$tmp1			# Sigma0(a)
-+
-+	or	$tmp0,$a,$b
-+	and	$tmp1,$a,$b
-+	and	$tmp0,$c
-+	or	$tmp1,$tmp0			# Maj(a,b,c)
-+	$ADDU	$T1,$tmp2			# +=K[$i]
-+	$ADDU	$h,$tmp1
-+
-+	$ADDU	$d,$T1
-+	$ADDU	$h,$T1
-+___
-+$code.=<<___ if ($i>=13);
-+	$LD	@X[3],`(($i+3)%16)*$SZ`($sp)	# prefetch from ring buffer
-+___
-+}
-+
-+sub BODY_16_XX {
-+my $i=@_[0];
-+my ($tmp0,$tmp1,$tmp2,$tmp3)=(@X[4],@X[5],@X[6],@X[7]);
-+
-+$code.=<<___;
-+	$SRL	$tmp2,@X[1],@sigma0[0]		# Xupdate($i)
-+	$ADDU	@X[0],@X[9]			# +=X[i+9]
-+	$SLL	$tmp1,@X[1],`$SZ*8-@sigma0[2]`
-+	$SRL	$tmp0,@X[1],@sigma0[1]
-+	xor	$tmp2,$tmp1
-+	$SLL	$tmp1,`@sigma0[2]-@sigma0[1]`
-+	xor	$tmp2,$tmp0
-+	$SRL	$tmp0,@X[1],@sigma0[2]
-+	xor	$tmp2,$tmp1
-+
-+	$SRL	$tmp3,@X[14],@sigma1[0]
-+	xor	$tmp2,$tmp0			# sigma0(X[i+1])
-+	$SLL	$tmp1,@X[14],`$SZ*8-@sigma1[2]`
-+	$ADDU	@X[0],$tmp2
-+	$SRL	$tmp0,@X[14],@sigma1[1]
-+	xor	$tmp3,$tmp1
-+	$SLL	$tmp1,`@sigma1[2]-@sigma1[1]`
-+	xor	$tmp3,$tmp0
-+	$SRL	$tmp0,@X[14],@sigma1[2]
-+	xor	$tmp3,$tmp1
-+
-+	xor	$tmp3,$tmp0			# sigma1(X[i+14])
-+	$ADDU	@X[0],$tmp3
-+___
-+	&BODY_00_15(@_);
-+}
-+
-+$FRAMESIZE=16*$SZ+16*$SZREG;
-+$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000;
-+
-+$code.=<<___;
-+#ifdef OPENSSL_FIPSCANISTER
-+# include <openssl/fipssyms.h>
-+#endif
-+
-+.text
-+.set	noat
-+#if !defined(__vxworks) || defined(__pic__)
-+.option	pic2
-+#endif
-+
-+.align	5
-+.globl	sha${label}_block_data_order
-+.ent	sha${label}_block_data_order
-+sha${label}_block_data_order:
-+	.frame	$sp,$FRAMESIZE,$ra
-+	.mask	$SAVED_REGS_MASK,-$SZREG
-+	.set	noreorder
-+___
-+$code.=<<___ if ($flavour =~ /o32/i);	# o32 PIC-ification
-+	.cpload	$pf
-+___
-+$code.=<<___;
-+	$PTR_SUB $sp,$FRAMESIZE
-+	$REG_S	$ra,$FRAMESIZE-1*$SZREG($sp)
-+	$REG_S	$fp,$FRAMESIZE-2*$SZREG($sp)
-+	$REG_S	$s11,$FRAMESIZE-3*$SZREG($sp)
-+	$REG_S	$s10,$FRAMESIZE-4*$SZREG($sp)
-+	$REG_S	$s9,$FRAMESIZE-5*$SZREG($sp)
-+	$REG_S	$s8,$FRAMESIZE-6*$SZREG($sp)
-+	$REG_S	$s7,$FRAMESIZE-7*$SZREG($sp)
-+	$REG_S	$s6,$FRAMESIZE-8*$SZREG($sp)
-+	$REG_S	$s5,$FRAMESIZE-9*$SZREG($sp)
-+	$REG_S	$s4,$FRAMESIZE-10*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
-+	$REG_S	$s3,$FRAMESIZE-11*$SZREG($sp)
-+	$REG_S	$s2,$FRAMESIZE-12*$SZREG($sp)
-+	$REG_S	$s1,$FRAMESIZE-13*$SZREG($sp)
-+	$REG_S	$s0,$FRAMESIZE-14*$SZREG($sp)
-+	$REG_S	$gp,$FRAMESIZE-15*$SZREG($sp)
-+___
-+$code.=<<___;
-+	$PTR_SLL @X[15],$len,`log(16*$SZ)/log(2)`
-+___
-+$code.=<<___ if ($flavour !~ /o32/i);	# non-o32 PIC-ification
-+	.cplocal	$Ktbl
-+	.cpsetup	$pf,$zero,sha${label}_block_data_order
-+___
-+$code.=<<___;
-+	.set	reorder
-+	la	$Ktbl,K${label}		# PIC-ified 'load address'
-+
-+	$LD	$A,0*$SZ($ctx)		# load context
-+	$LD	$B,1*$SZ($ctx)
-+	$LD	$C,2*$SZ($ctx)
-+	$LD	$D,3*$SZ($ctx)
-+	$LD	$E,4*$SZ($ctx)
-+	$LD	$F,5*$SZ($ctx)
-+	$LD	$G,6*$SZ($ctx)
-+	$LD	$H,7*$SZ($ctx)
-+
-+	$PTR_ADD @X[15],$inp		# pointer to the end of input
-+	$REG_S	@X[15],16*$SZ($sp)
-+	b	.Loop
-+
-+.align	5
-+.Loop:
-+	${LD}l	@X[0],$MSB($inp)
-+	${LD}r	@X[0],$LSB($inp)
-+___
-+for ($i=0;$i<16;$i++)
-+{ &BODY_00_15($i,@V); unshift(@V,pop(@V)); push(@X,shift(@X)); }
-+$code.=<<___;
-+	b	.L16_xx
-+.align	4
-+.L16_xx:
-+___
-+for (;$i<32;$i++)
-+{ &BODY_16_XX($i,@V); unshift(@V,pop(@V)); push(@X,shift(@X)); }
-+$code.=<<___;
-+	and	@X[6],0xfff
-+	li	@X[7],$lastK
-+	.set	noreorder
-+	bne	@X[6],@X[7],.L16_xx
-+	$PTR_ADD $Ktbl,16*$SZ		# Ktbl+=16
-+
-+	$REG_L	@X[15],16*$SZ($sp)	# restore pointer to the end of input
-+	$LD	@X[0],0*$SZ($ctx)
-+	$LD	@X[1],1*$SZ($ctx)
-+	$LD	@X[2],2*$SZ($ctx)
-+	$PTR_ADD $inp,16*$SZ
-+	$LD	@X[3],3*$SZ($ctx)
-+	$ADDU	$A,@X[0]
-+	$LD	@X[4],4*$SZ($ctx)
-+	$ADDU	$B,@X[1]
-+	$LD	@X[5],5*$SZ($ctx)
-+	$ADDU	$C,@X[2]
-+	$LD	@X[6],6*$SZ($ctx)
-+	$ADDU	$D,@X[3]
-+	$LD	@X[7],7*$SZ($ctx)
-+	$ADDU	$E,@X[4]
-+	$ST	$A,0*$SZ($ctx)
-+	$ADDU	$F,@X[5]
-+	$ST	$B,1*$SZ($ctx)
-+	$ADDU	$G,@X[6]
-+	$ST	$C,2*$SZ($ctx)
-+	$ADDU	$H,@X[7]
-+	$ST	$D,3*$SZ($ctx)
-+	$ST	$E,4*$SZ($ctx)
-+	$ST	$F,5*$SZ($ctx)
-+	$ST	$G,6*$SZ($ctx)
-+	$ST	$H,7*$SZ($ctx)
-+
-+	bnel	$inp,@X[15],.Loop
-+	$PTR_SUB $Ktbl,`($rounds-16)*$SZ`	# rewind $Ktbl
-+
-+	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
-+	$REG_L	$fp,$FRAMESIZE-2*$SZREG($sp)
-+	$REG_L	$s11,$FRAMESIZE-3*$SZREG($sp)
-+	$REG_L	$s10,$FRAMESIZE-4*$SZREG($sp)
-+	$REG_L	$s9,$FRAMESIZE-5*$SZREG($sp)
-+	$REG_L	$s8,$FRAMESIZE-6*$SZREG($sp)
-+	$REG_L	$s7,$FRAMESIZE-7*$SZREG($sp)
-+	$REG_L	$s6,$FRAMESIZE-8*$SZREG($sp)
-+	$REG_L	$s5,$FRAMESIZE-9*$SZREG($sp)
-+	$REG_L	$s4,$FRAMESIZE-10*$SZREG($sp)
-+___
-+$code.=<<___ if ($flavour =~ /nubi/i);
-+	$REG_L	$s3,$FRAMESIZE-11*$SZREG($sp)
-+	$REG_L	$s2,$FRAMESIZE-12*$SZREG($sp)
-+	$REG_L	$s1,$FRAMESIZE-13*$SZREG($sp)
-+	$REG_L	$s0,$FRAMESIZE-14*$SZREG($sp)
-+	$REG_L	$gp,$FRAMESIZE-15*$SZREG($sp)
-+___
-+$code.=<<___;
-+	jr	$ra
-+	$PTR_ADD $sp,$FRAMESIZE
-+.end	sha${label}_block_data_order
-+
-+.rdata
-+.align	5
-+K${label}:
-+___
-+if ($SZ==4) {
-+$code.=<<___;
-+	.word	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
-+	.word	0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
-+	.word	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
-+	.word	0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
-+	.word	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
-+	.word	0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
-+	.word	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
-+	.word	0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
-+	.word	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
-+	.word	0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
-+	.word	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
-+	.word	0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
-+	.word	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
-+	.word	0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
-+	.word	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
-+	.word	0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-+___
-+} else {
-+$code.=<<___;
-+	.dword	0x428a2f98d728ae22, 0x7137449123ef65cd
-+	.dword	0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc
-+	.dword	0x3956c25bf348b538, 0x59f111f1b605d019
-+	.dword	0x923f82a4af194f9b, 0xab1c5ed5da6d8118
-+	.dword	0xd807aa98a3030242, 0x12835b0145706fbe
-+	.dword	0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2
-+	.dword	0x72be5d74f27b896f, 0x80deb1fe3b1696b1
-+	.dword	0x9bdc06a725c71235, 0xc19bf174cf692694
-+	.dword	0xe49b69c19ef14ad2, 0xefbe4786384f25e3
-+	.dword	0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65
-+	.dword	0x2de92c6f592b0275, 0x4a7484aa6ea6e483
-+	.dword	0x5cb0a9dcbd41fbd4, 0x76f988da831153b5
-+	.dword	0x983e5152ee66dfab, 0xa831c66d2db43210
-+	.dword	0xb00327c898fb213f, 0xbf597fc7beef0ee4
-+	.dword	0xc6e00bf33da88fc2, 0xd5a79147930aa725
-+	.dword	0x06ca6351e003826f, 0x142929670a0e6e70
-+	.dword	0x27b70a8546d22ffc, 0x2e1b21385c26c926
-+	.dword	0x4d2c6dfc5ac42aed, 0x53380d139d95b3df
-+	.dword	0x650a73548baf63de, 0x766a0abb3c77b2a8
-+	.dword	0x81c2c92e47edaee6, 0x92722c851482353b
-+	.dword	0xa2bfe8a14cf10364, 0xa81a664bbc423001
-+	.dword	0xc24b8b70d0f89791, 0xc76c51a30654be30
-+	.dword	0xd192e819d6ef5218, 0xd69906245565a910
-+	.dword	0xf40e35855771202a, 0x106aa07032bbd1b8
-+	.dword	0x19a4c116b8d2d0c8, 0x1e376c085141ab53
-+	.dword	0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8
-+	.dword	0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb
-+	.dword	0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3
-+	.dword	0x748f82ee5defb2fc, 0x78a5636f43172f60
-+	.dword	0x84c87814a1f0ab72, 0x8cc702081a6439ec
-+	.dword	0x90befffa23631e28, 0xa4506cebde82bde9
-+	.dword	0xbef9a3f7b2c67915, 0xc67178f2e372532b
-+	.dword	0xca273eceea26619c, 0xd186b8c721c0c207
-+	.dword	0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178
-+	.dword	0x06f067aa72176fba, 0x0a637dc5a2c898a6
-+	.dword	0x113f9804bef90dae, 0x1b710b35131c471b
-+	.dword	0x28db77f523047d84, 0x32caab7b40c72493
-+	.dword	0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c
-+	.dword	0x4cc5d4becb3e42b6, 0x597f299cfc657e2a
-+	.dword	0x5fcb6fab3ad6faec, 0x6c44198c4a475817
-+___
-+}
-+$code.=<<___;
-+.asciiz	"SHA${label} for MIPS, CRYPTOGAMS by <appro\@openssl.org>"
-+.align	5
-+
-+___
-+
-+$code =~ s/\`([^\`]*)\`/eval $1/gem;
-+print $code;
-+close STDOUT;
diff --git a/patches/npn.patch b/patches/npn.patch
deleted file mode 100644
index 46b7a7d..0000000
--- a/patches/npn.patch
+++ /dev/null
@@ -1,1293 +0,0 @@
---- openssl-1.0.0b.orig/apps/apps.c	2010-11-11 14:42:19.000000000 +0000
-+++ openssl-1.0.0b/apps/apps.c	2010-11-29 19:56:04.902465346 +0000
-@@ -3012,3 +3012,46 @@ int raw_write_stdout(const void *buf,int
- int raw_write_stdout(const void *buf,int siz)
- 	{	return write(fileno(stdout),buf,siz);	}
- #endif
-+
-+#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG)
-+/* next_protos_parse parses a comma separated list of strings into a string
-+ * in a format suitable for passing to SSL_CTX_set_next_protos_advertised.
-+ *   outlen: (output) set to the length of the resulting buffer on success.
-+ *   in: a NUL termianted string like "abc,def,ghi"
-+ *
-+ *   returns: a malloced buffer or NULL on failure.
-+ */
-+unsigned char *next_protos_parse(unsigned short *outlen, const char *in)
-+	{
-+	size_t len;
-+	unsigned char *out;
-+	size_t i, start = 0;
-+
-+	len = strlen(in);
-+	if (len >= 65535)
-+		return NULL;
-+
-+	out = OPENSSL_malloc(strlen(in) + 1);
-+	if (!out)
-+		return NULL;
-+
-+	for (i = 0; i <= len; ++i)
-+		{
-+		if (i == len || in[i] == ',')
-+			{
-+			if (i - start > 255)
-+				{
-+				OPENSSL_free(out);
-+				return NULL;
-+				}
-+			out[start] = i - start;
-+			start = i + 1;
-+			}
-+		else
-+			out[i+1] = in[i];
-+		}
-+
-+	*outlen = len + 1;
-+	return out;
-+	}
-+#endif  /* !OPENSSL_NO_TLSEXT && !OPENSSL_NO_NEXTPROTONEG */
---- openssl-1.0.0b.orig/apps/apps.h	2009-10-31 13:34:19.000000000 +0000
-+++ openssl-1.0.0b/apps/apps.h	2010-11-29 19:56:04.902465346 +0000
-@@ -358,3 +358,7 @@ int raw_write_stdout(const void *,int);
- #define TM_STOP		1
- double app_tminterval (int stop,int usertime);
- #endif
-+
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+unsigned char *next_protos_parse(unsigned short *outlen, const char *in);
-+#endif
---- openssl-1.0.0b.orig/apps/s_client.c	2010-11-29 19:56:04.832465351 +0000
-+++ openssl-1.0.0b/apps/s_client.c	2010-11-29 19:56:04.902465346 +0000
-@@ -342,6 +342,9 @@ static void sc_usage(void)
- 	BIO_printf(bio_err," -tlsextdebug      - hex dump of all TLS extensions received\n");
- 	BIO_printf(bio_err," -status           - request certificate status from server\n");
- 	BIO_printf(bio_err," -no_ticket        - disable use of RFC4507bis session tickets\n");
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+	BIO_printf(bio_err," -nextprotoneg arg - enable NPN extension, considering named protocols supported (comma-separated list)\n");
-+# endif
- 	BIO_printf(bio_err," -cutthrough       - enable 1-RTT full-handshake for strong ciphers\n");
- #endif
- 	BIO_printf(bio_err," -legacy_renegotiation - enable use of legacy renegotiation (dangerous)\n");
-@@ -367,6 +370,40 @@ static int MS_CALLBACK ssl_servername_cb
- 	
- 	return SSL_TLSEXT_ERR_OK;
- 	}
-+
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+/* This the context that we pass to next_proto_cb */
-+typedef struct tlsextnextprotoctx_st {
-+	unsigned char *data;
-+	unsigned short len;
-+	int status;
-+} tlsextnextprotoctx;
-+
-+static tlsextnextprotoctx next_proto;
-+
-+static int next_proto_cb(SSL *s, unsigned char **out, unsigned char *outlen, const unsigned char *in, unsigned int inlen, void *arg)
-+	{
-+	tlsextnextprotoctx *ctx = arg;
-+
-+	if (!c_quiet)
-+		{
-+		/* We can assume that |in| is syntactically valid. */
-+		unsigned i;
-+		BIO_printf(bio_c_out, "Protocols advertised by server: ");
-+		for (i = 0; i < inlen; )
-+			{
-+			if (i)
-+				BIO_write(bio_c_out, ", ", 2);
-+			BIO_write(bio_c_out, &in[i + 1], in[i]);
-+			i += in[i] + 1;
-+			}
-+		BIO_write(bio_c_out, "\n", 1);
-+		}
-+
-+	ctx->status = SSL_select_next_proto(out, outlen, in, inlen, ctx->data, ctx->len);
-+	return SSL_TLSEXT_ERR_OK;
-+	}
-+# endif  /* ndef OPENSSL_NO_NEXTPROTONEG */
- #endif
- 
- enum
-@@ -431,6 +468,9 @@ int MAIN(int argc, char **argv)
- 	char *servername = NULL; 
-         tlsextctx tlsextcbp = 
-         {NULL,0};
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+	const char *next_proto_neg_in = NULL;
-+# endif
- #endif
- 	char *sess_in = NULL;
- 	char *sess_out = NULL;
-@@ -658,6 +698,13 @@ int MAIN(int argc, char **argv)
- #ifndef OPENSSL_NO_TLSEXT
- 		else if	(strcmp(*argv,"-no_ticket") == 0)
- 			{ off|=SSL_OP_NO_TICKET; }
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+		else if (strcmp(*argv,"-nextprotoneg") == 0)
-+			{
-+			if (--argc < 1) goto bad;
-+			next_proto_neg_in = *(++argv);
-+			}
-+# endif
- #endif
- 		else if (strcmp(*argv,"-cutthrough") == 0)
- 			cutthrough=1;
-@@ -766,6 +813,21 @@ bad:
- 	OpenSSL_add_ssl_algorithms();
- 	SSL_load_error_strings();
- 
-+#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG)
-+	next_proto.status = -1;
-+	if (next_proto_neg_in)
-+		{
-+		next_proto.data = next_protos_parse(&next_proto.len, next_proto_neg_in);
-+		if (next_proto.data == NULL)
-+			{
-+			BIO_printf(bio_err, "Error parsing -nextprotoneg argument\n");
-+			goto end;
-+			}
-+		}
-+	else
-+		next_proto.data = NULL;
-+#endif
-+
- #ifndef OPENSSL_NO_ENGINE
-         e = setup_engine(bio_err, engine_id, 1);
- 	if (ssl_client_engine_id)
-@@ -896,6 +958,11 @@ bad:
- 		SSL_CTX_set_mode(ctx, ssl_mode);
- 		}
- 
-+#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG)
-+	if (next_proto.data)
-+		SSL_CTX_set_next_proto_select_cb(ctx, next_proto_cb, &next_proto);
-+#endif
-+
- 	if (state) SSL_CTX_set_info_callback(ctx,apps_ssl_info_callback);
- 	if (cipher != NULL)
- 		if(!SSL_CTX_set_cipher_list(ctx,cipher)) {
-@@ -1755,6 +1822,18 @@ static void print_stuff(BIO *bio, SSL *s
- 	BIO_printf(bio,"Expansion: %s\n",
- 		expansion ? SSL_COMP_get_name(expansion) : "NONE");
- #endif
-+
-+#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG)
-+	if (next_proto.status != -1) {
-+		const unsigned char *proto;
-+		unsigned int proto_len;
-+		SSL_get0_next_proto_negotiated(s, &proto, &proto_len);
-+		BIO_printf(bio, "Next protocol: (%d) ", next_proto.status);
-+		BIO_write(bio, proto, proto_len);
-+		BIO_write(bio, "\n", 1);
-+	}
-+#endif
-+
- 	SSL_SESSION_print(bio,SSL_get_session(s));
- 	BIO_printf(bio,"---\n");
- 	if (peer != NULL)
---- openssl-1.0.0b.orig/apps/s_server.c	2010-06-15 17:25:02.000000000 +0000
-+++ openssl-1.0.0b/apps/s_server.c	2010-11-29 19:56:04.902465346 +0000
-@@ -492,6 +492,9 @@ static void sv_usage(void)
- 	BIO_printf(bio_err," -tlsextdebug  - hex dump of all TLS extensions received\n");
- 	BIO_printf(bio_err," -no_ticket    - disable use of RFC4507bis session tickets\n");
- 	BIO_printf(bio_err," -legacy_renegotiation - enable use of legacy renegotiation (dangerous)\n");
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+	BIO_printf(bio_err," -nextprotoneg arg - set the advertised protocols for the NPN extension (comma-separated list)\n");
-+# endif
- #endif
- 	}
- 
-@@ -826,6 +829,24 @@ BIO_printf(err, "cert_status: received %
- 	ret = SSL_TLSEXT_ERR_ALERT_FATAL;
- 	goto done;
- 	}
-+
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+/* This is the context that we pass to next_proto_cb */
-+typedef struct tlsextnextprotoctx_st {
-+	unsigned char *data;
-+	unsigned int len;
-+} tlsextnextprotoctx;
-+
-+static int next_proto_cb(SSL *s, const unsigned char **data, unsigned int *len, void *arg)
-+	{
-+	tlsextnextprotoctx *next_proto = arg;
-+
-+	*data = next_proto->data;
-+	*len = next_proto->len;
-+
-+	return SSL_TLSEXT_ERR_OK;
-+	}
-+# endif  /* ndef OPENSSL_NO_NPN */
- #endif
- 
- int MAIN(int, char **);
-@@ -867,6 +888,10 @@ int MAIN(int argc, char *argv[])
- #endif
- #ifndef OPENSSL_NO_TLSEXT
-         tlsextctx tlsextcbp = {NULL, NULL, SSL_TLSEXT_ERR_ALERT_WARNING};
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+	const char *next_proto_neg_in = NULL;
-+	tlsextnextprotoctx next_proto;
-+# endif
- #endif
- #ifndef OPENSSL_NO_PSK
- 	/* by default do not send a PSK identity hint */
-@@ -1191,7 +1216,13 @@ int MAIN(int argc, char *argv[])
- 			if (--argc < 1) goto bad;
- 			s_key_file2= *(++argv);
- 			}
--			
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+		else if	(strcmp(*argv,"-nextprotoneg") == 0)
-+			{
-+			if (--argc < 1) goto bad;
-+			next_proto_neg_in = *(++argv);
-+			}
-+# endif
- #endif
- #if !defined(OPENSSL_NO_JPAKE) && !defined(OPENSSL_NO_PSK)
- 		else if (strcmp(*argv,"-jpake") == 0)
-@@ -1476,6 +1507,11 @@ bad:
- 		if (vpm)
- 			SSL_CTX_set1_param(ctx2, vpm);
- 		}
-+
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+	if (next_proto.data)
-+		SSL_CTX_set_next_protos_advertised_cb(ctx, next_proto_cb, &next_proto);
-+# endif
- #endif 
- 
- #ifndef OPENSSL_NO_DH
-@@ -1617,6 +1653,21 @@ bad:
- 					goto end;
- 					}
- 				}
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+		if (next_proto_neg_in)
-+			{
-+			unsigned short len;
-+			next_proto.data = next_protos_parse(&len,
-+				next_proto_neg_in);
-+			if (next_proto.data == NULL)
-+				goto end;
-+			next_proto.len = len;
-+			}
-+		else
-+			{
-+			next_proto.data = NULL;
-+			}
-+# endif
- #endif
- 		RSA_free(rsa);
- 		BIO_printf(bio_s_out,"\n");
-@@ -2159,6 +2210,10 @@ static int init_ssl_connection(SSL *con)
- 	X509 *peer;
- 	long verify_error;
- 	MS_STATIC char buf[BUFSIZ];
-+#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG)
-+	const unsigned char *next_proto_neg;
-+	unsigned next_proto_neg_len;
-+#endif
- 
- 	if ((i=SSL_accept(con)) <= 0)
- 		{
-@@ -2198,6 +2253,15 @@ static int init_ssl_connection(SSL *con)
- 		BIO_printf(bio_s_out,"Shared ciphers:%s\n",buf);
- 	str=SSL_CIPHER_get_name(SSL_get_current_cipher(con));
- 	BIO_printf(bio_s_out,"CIPHER is %s\n",(str != NULL)?str:"(NONE)");
-+#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG)
-+	SSL_get0_next_proto_negotiated(con, &next_proto_neg, &next_proto_neg_len);
-+	if (next_proto_neg)
-+		{
-+		BIO_printf(bio_s_out,"NEXTPROTO is ");
-+		BIO_write(bio_s_out, next_proto_neg, next_proto_neg_len);
-+		BIO_printf(bio_s_out, "\n");
-+		}
-+#endif
- 	if (con->hit) BIO_printf(bio_s_out,"Reused session-id\n");
- 	if (SSL_ctrl(con,SSL_CTRL_GET_FLAGS,0,NULL) &
- 		TLS1_FLAGS_TLS_PADDING_BUG)
---- openssl-1.0.0b.orig/include/openssl/ssl.h	2010-11-29 19:56:04.846517045 +0000
-+++ openssl-1.0.0b/include/openssl/ssl.h	2010-11-29 19:56:04.965928855 +0000
-@@ -857,6 +857,25 @@ struct ssl_ctx_st
- 	/* draft-rescorla-tls-opaque-prf-input-00.txt information */
- 	int (*tlsext_opaque_prf_input_callback)(SSL *, void *peerinput, size_t len, void *arg);
- 	void *tlsext_opaque_prf_input_callback_arg;
-+
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+	/* Next protocol negotiation information */
-+	/* (for experimental NPN extension). */
-+
-+	/* For a server, this contains a callback function by which the set of
-+	 * advertised protocols can be provided. */
-+	int (*next_protos_advertised_cb)(SSL *s, const unsigned char **buf,
-+			                 unsigned int *len, void *arg);
-+	void *next_protos_advertised_cb_arg;
-+	/* For a client, this contains a callback function that selects the
-+	 * next protocol from the list provided by the server. */
-+	int (*next_proto_select_cb)(SSL *s, unsigned char **out,
-+				    unsigned char *outlen,
-+				    const unsigned char *in,
-+				    unsigned int inlen,
-+				    void *arg);
-+	void *next_proto_select_cb_arg;
-+# endif
- #endif
- 
- #ifndef OPENSSL_NO_PSK
-@@ -928,6 +947,30 @@ int SSL_CTX_set_client_cert_engine(SSL_C
- #endif
- void SSL_CTX_set_cookie_generate_cb(SSL_CTX *ctx, int (*app_gen_cookie_cb)(SSL *ssl, unsigned char *cookie, unsigned int *cookie_len));
- void SSL_CTX_set_cookie_verify_cb(SSL_CTX *ctx, int (*app_verify_cookie_cb)(SSL *ssl, unsigned char *cookie, unsigned int cookie_len));
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+void SSL_CTX_set_next_protos_advertised_cb(SSL_CTX *s,
-+					   int (*cb) (SSL *ssl,
-+						      const unsigned char **out,
-+						      unsigned int *outlen,
-+						      void *arg), void *arg);
-+void SSL_CTX_set_next_proto_select_cb(SSL_CTX *s,
-+				      int (*cb) (SSL *ssl, unsigned char **out,
-+						 unsigned char *outlen,
-+						 const unsigned char *in,
-+						 unsigned int inlen, void *arg),
-+				      void *arg);
-+
-+int SSL_select_next_proto(unsigned char **out, unsigned char *outlen,
-+			  const unsigned char *in, unsigned int inlen,
-+			  const unsigned char *client, unsigned int client_len);
-+void SSL_get0_next_proto_negotiated(const SSL *s, const unsigned char **data,
-+				    unsigned *len);
-+
-+#define OPENSSL_NPN_UNSUPPORTED	0
-+#define OPENSSL_NPN_NEGOTIATED	1
-+#define OPENSSL_NPN_NO_OVERLAP	2
-+
-+#endif
- 
- #ifndef OPENSSL_NO_PSK
- /* the maximum length of the buffer given to callbacks containing the
-@@ -1187,6 +1230,19 @@ struct ssl_st
- 	void *tls_session_secret_cb_arg;
- 
- 	SSL_CTX * initial_ctx; /* initial ctx, used to store sessions */
-+
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+	/* Next protocol negotiation. For the client, this is the protocol that
-+	 * we sent in NextProtocol and is set when handling ServerHello
-+	 * extensions.
-+	 *
-+	 * For a server, this is the client's selected_protocol from
-+	 * NextProtocol and is set when handling the NextProtocol message,
-+	 * before the Finished message. */
-+	unsigned char *next_proto_negotiated;
-+	unsigned char next_proto_negotiated_len;
-+#endif
-+
- #define session_ctx initial_ctx
- #else
- #define session_ctx ctx
-@@ -1919,6 +1975,7 @@ void ERR_load_SSL_strings(void);
- #define SSL_F_SSL3_GET_KEY_EXCHANGE			 141
- #define SSL_F_SSL3_GET_MESSAGE				 142
- #define SSL_F_SSL3_GET_NEW_SESSION_TICKET		 283
-+#define SSL_F_SSL3_GET_NEXT_PROTO			 304
- #define SSL_F_SSL3_GET_RECORD				 143
- #define SSL_F_SSL3_GET_SERVER_CERTIFICATE		 144
- #define SSL_F_SSL3_GET_SERVER_DONE			 145
-@@ -2117,6 +2174,8 @@ void ERR_load_SSL_strings(void);
- #define SSL_R_EXCESSIVE_MESSAGE_SIZE			 152
- #define SSL_R_EXTRA_DATA_IN_MESSAGE			 153
- #define SSL_R_GOT_A_FIN_BEFORE_A_CCS			 154
-+#define SSL_R_GOT_NEXT_PROTO_BEFORE_A_CCS		 346
-+#define SSL_R_GOT_NEXT_PROTO_WITHOUT_EXTENSION		 347
- #define SSL_R_HTTPS_PROXY_REQUEST			 155
- #define SSL_R_HTTP_REQUEST				 156
- #define SSL_R_ILLEGAL_PADDING				 283
---- openssl-1.0.0b.orig/include/openssl/ssl3.h	2010-11-29 19:56:04.832465351 +0000
-+++ openssl-1.0.0b/include/openssl/ssl3.h	2010-11-29 19:56:04.965928855 +0000
-@@ -465,6 +465,12 @@ typedef struct ssl3_state_st
- 	void *server_opaque_prf_input;
- 	size_t server_opaque_prf_input_len;
- 
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+	/* Set if we saw the Next Protocol Negotiation extension from
-+	   our peer. */
-+	int next_proto_neg_seen;
-+#endif
-+
- 	struct	{
- 		/* actually only needs to be 16+20 */
- 		unsigned char cert_verify_md[EVP_MAX_MD_SIZE*2];
-@@ -557,6 +563,10 @@ typedef struct ssl3_state_st
- #define SSL3_ST_CW_CERT_VRFY_B		(0x191|SSL_ST_CONNECT)
- #define SSL3_ST_CW_CHANGE_A		(0x1A0|SSL_ST_CONNECT)
- #define SSL3_ST_CW_CHANGE_B		(0x1A1|SSL_ST_CONNECT)
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+#define SSL3_ST_CW_NEXT_PROTO_A		(0x200|SSL_ST_CONNECT)
-+#define SSL3_ST_CW_NEXT_PROTO_B		(0x201|SSL_ST_CONNECT)
-+#endif
- #define SSL3_ST_CW_FINISHED_A		(0x1B0|SSL_ST_CONNECT)
- #define SSL3_ST_CW_FINISHED_B		(0x1B1|SSL_ST_CONNECT)
- /* read from server */
-@@ -602,6 +612,10 @@ typedef struct ssl3_state_st
- #define SSL3_ST_SR_CERT_VRFY_B		(0x1A1|SSL_ST_ACCEPT)
- #define SSL3_ST_SR_CHANGE_A		(0x1B0|SSL_ST_ACCEPT)
- #define SSL3_ST_SR_CHANGE_B		(0x1B1|SSL_ST_ACCEPT)
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+#define SSL3_ST_SR_NEXT_PROTO_A		(0x210|SSL_ST_ACCEPT)
-+#define SSL3_ST_SR_NEXT_PROTO_B		(0x211|SSL_ST_ACCEPT)
-+#endif
- #define SSL3_ST_SR_FINISHED_A		(0x1C0|SSL_ST_ACCEPT)
- #define SSL3_ST_SR_FINISHED_B		(0x1C1|SSL_ST_ACCEPT)
- /* write to client */
-@@ -626,6 +640,9 @@ typedef struct ssl3_state_st
- #define SSL3_MT_CLIENT_KEY_EXCHANGE		16
- #define SSL3_MT_FINISHED			20
- #define SSL3_MT_CERTIFICATE_STATUS		22
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+#define SSL3_MT_NEXT_PROTO			67
-+#endif
- #define DTLS1_MT_HELLO_VERIFY_REQUEST    3
- 
- 
---- openssl-1.0.0b.orig/include/openssl/tls1.h	2009-11-11 14:51:29.000000000 +0000
-+++ openssl-1.0.0b/include/openssl/tls1.h	2010-11-29 19:56:04.965928855 +0000
-@@ -204,6 +204,11 @@ extern "C" {
- /* Temporary extension type */
- #define TLSEXT_TYPE_renegotiate                 0xff01
- 
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+/* This is not an IANA defined extension number */
-+#define TLSEXT_TYPE_next_proto_neg		13172
-+#endif
-+
- /* NameType value from RFC 3546 */
- #define TLSEXT_NAMETYPE_host_name 0
- /* status request value from RFC 3546 */
---- openssl-1.0.0b.orig/ssl/s3_both.c	2010-11-29 19:56:04.846517045 +0000
-+++ openssl-1.0.0b/ssl/s3_both.c	2010-11-29 19:56:04.965928855 +0000
-@@ -202,15 +202,40 @@ int ssl3_send_finished(SSL *s, int a, in
- 	return(ssl3_do_write(s,SSL3_RT_HANDSHAKE));
- 	}
- 
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+/* ssl3_take_mac calculates the Finished MAC for the handshakes messages seen to far. */
-+static void ssl3_take_mac(SSL *s)
-+	{
-+	const char *sender;
-+	int slen;
-+
-+	if (s->state & SSL_ST_CONNECT)
-+		{
-+		sender=s->method->ssl3_enc->server_finished_label;
-+		slen=s->method->ssl3_enc->server_finished_label_len;
-+		}
-+	else
-+		{
-+		sender=s->method->ssl3_enc->client_finished_label;
-+		slen=s->method->ssl3_enc->client_finished_label_len;
-+		}
-+
-+	s->s3->tmp.peer_finish_md_len = s->method->ssl3_enc->final_finish_mac(s,
-+		sender,slen,s->s3->tmp.peer_finish_md);
-+	}
-+#endif
-+
- int ssl3_get_finished(SSL *s, int a, int b)
- 	{
- 	int al,i,ok;
- 	long n;
- 	unsigned char *p;
- 
-+#ifdef OPENSSL_NO_NEXTPROTONEG
- 	/* the mac has already been generated when we received the
- 	 * change cipher spec message and is in s->s3->tmp.peer_finish_md
- 	 */ 
-+#endif
- 
- 	n=s->method->ssl_get_message(s,
- 		a,
-@@ -521,6 +546,15 @@ long ssl3_get_message(SSL *s, int st1, i
- 		s->init_num += i;
- 		n -= i;
- 		}
-+
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+	/* If receiving Finished, record MAC of prior handshake messages for
-+	 * Finished verification. */
-+	if (*s->init_buf->data == SSL3_MT_FINISHED)
-+		ssl3_take_mac(s);
-+#endif
-+
-+	/* Feed this message into MAC computation. */
- 	ssl3_finish_mac(s, (unsigned char *)s->init_buf->data, s->init_num + 4);
- 	if (s->msg_callback)
- 		s->msg_callback(0, s->version, SSL3_RT_HANDSHAKE, s->init_buf->data, (size_t)s->init_num + 4, s, s->msg_callback_arg);
---- openssl-1.0.0b.orig/ssl/s3_clnt.c	2010-11-29 19:56:04.846517045 +0000
-+++ openssl-1.0.0b/ssl/s3_clnt.c	2010-11-29 19:56:04.965928855 +0000
-@@ -435,7 +435,16 @@ int ssl3_connect(SSL *s)
- 			ret=ssl3_send_change_cipher_spec(s,
- 				SSL3_ST_CW_CHANGE_A,SSL3_ST_CW_CHANGE_B);
- 			if (ret <= 0) goto end;
-+
-+#if defined(OPENSSL_NO_TLSEXT) || defined(OPENSSL_NO_NEXTPROTONEG)
- 			s->state=SSL3_ST_CW_FINISHED_A;
-+#else
-+			if (s->next_proto_negotiated)
-+				s->state=SSL3_ST_CW_NEXT_PROTO_A;
-+			else
-+				s->state=SSL3_ST_CW_FINISHED_A;
-+#endif
-+
- 			s->init_num=0;
- 
- 			s->session->cipher=s->s3->tmp.new_cipher;
-@@ -463,6 +472,15 @@ int ssl3_connect(SSL *s)
- 
- 			break;
- 
-+#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG)
-+		case SSL3_ST_CW_NEXT_PROTO_A:
-+		case SSL3_ST_CW_NEXT_PROTO_B:
-+			ret=ssl3_send_next_proto(s);
-+			if (ret <= 0) goto end;
-+			s->state=SSL3_ST_CW_FINISHED_A;
-+			break;
-+#endif
-+
- 		case SSL3_ST_CW_FINISHED_A:
- 		case SSL3_ST_CW_FINISHED_B:
- 			ret=ssl3_send_finished(s,
-@@ -3060,6 +3078,32 @@ err:
-  */
- 
- #ifndef OPENSSL_NO_TLSEXT
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+int ssl3_send_next_proto(SSL *s)
-+	{
-+	unsigned int len, padding_len;
-+	unsigned char *d;
-+
-+	if (s->state == SSL3_ST_CW_NEXT_PROTO_A)
-+		{
-+		len = s->next_proto_negotiated_len;
-+		padding_len = 32 - ((len + 2) % 32);
-+		d = (unsigned char *)s->init_buf->data;
-+		d[4] = len;
-+		memcpy(d + 5, s->next_proto_negotiated, len);
-+		d[5 + len] = padding_len;
-+		memset(d + 6 + len, 0, padding_len);
-+		*(d++)=SSL3_MT_NEXT_PROTO;
-+		l2n3(2 + len + padding_len, d);
-+		s->state = SSL3_ST_CW_NEXT_PROTO_B;
-+		s->init_num = 4 + 2 + len + padding_len;
-+		s->init_off = 0;
-+		}
-+
-+	return ssl3_do_write(s, SSL3_RT_HANDSHAKE);
-+	}
-+# endif
-+
- int ssl3_check_finished(SSL *s)
- 	{
- 	int ok;
---- openssl-1.0.0b.orig/ssl/s3_lib.c	2010-11-29 19:56:04.832465351 +0000
-+++ openssl-1.0.0b/ssl/s3_lib.c	2010-11-29 19:56:04.965928855 +0000
-@@ -2230,6 +2230,15 @@ void ssl3_clear(SSL *s)
- 	s->s3->num_renegotiations=0;
- 	s->s3->in_read_app_data=0;
- 	s->version=SSL3_VERSION;
-+
-+#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG)
-+	if (s->next_proto_negotiated)
-+		{
-+		OPENSSL_free(s->next_proto_negotiated);
-+		s->next_proto_negotiated = NULL;
-+		s->next_proto_negotiated_len = 0;
-+		}
-+#endif
- 	}
- 
- long ssl3_ctrl(SSL *s, int cmd, long larg, void *parg)
---- openssl-1.0.0b.orig/ssl/s3_pkt.c	2010-11-29 19:56:04.832465351 +0000
-+++ openssl-1.0.0b/ssl/s3_pkt.c	2010-11-29 19:56:04.965928855 +0000
-@@ -1394,8 +1394,10 @@ err:
- int ssl3_do_change_cipher_spec(SSL *s)
- 	{
- 	int i;
-+#ifdef OPENSSL_NO_NEXTPROTONEG
- 	const char *sender;
- 	int slen;
-+#endif
- 
- 	if (s->state & SSL_ST_ACCEPT)
- 		i=SSL3_CHANGE_CIPHER_SERVER_READ;
-@@ -1418,6 +1420,7 @@ int ssl3_do_change_cipher_spec(SSL *s)
- 	if (!s->method->ssl3_enc->change_cipher_state(s,i))
- 		return(0);
- 
-+#ifdef OPENSSL_NO_NEXTPROTONEG
- 	/* we have to record the message digest at
- 	 * this point so we can get it before we read
- 	 * the finished message */
-@@ -1434,6 +1437,7 @@ int ssl3_do_change_cipher_spec(SSL *s)
- 
- 	s->s3->tmp.peer_finish_md_len = s->method->ssl3_enc->final_finish_mac(s,
- 		sender,slen,s->s3->tmp.peer_finish_md);
-+#endif
- 
- 	return(1);
- 	}
---- openssl-1.0.0b.orig/ssl/s3_srvr.c	2010-11-29 19:56:04.846517045 +0000
-+++ openssl-1.0.0b/ssl/s3_srvr.c	2010-11-29 19:56:04.965928855 +0000
-@@ -538,7 +538,14 @@ int ssl3_accept(SSL *s)
- 				 * the client uses its key from the certificate
- 				 * for key exchange.
- 				 */
-+#if defined(OPENSSL_NO_TLSEXT) || defined(OPENSSL_NO_NEXTPROTONEG)
- 				s->state=SSL3_ST_SR_FINISHED_A;
-+#else
-+				if (s->s3->next_proto_neg_seen)
-+					s->state=SSL3_ST_SR_NEXT_PROTO_A;
-+				else
-+					s->state=SSL3_ST_SR_FINISHED_A;
-+#endif
- 				s->init_num = 0;
- 				}
- 			else
-@@ -581,10 +588,27 @@ int ssl3_accept(SSL *s)
- 			ret=ssl3_get_cert_verify(s);
- 			if (ret <= 0) goto end;
- 
-+#if defined(OPENSSL_NO_TLSEXT) || defined(OPENSSL_NO_NEXTPROTONEG)
- 			s->state=SSL3_ST_SR_FINISHED_A;
-+#else
-+			if (s->s3->next_proto_neg_seen)
-+				s->state=SSL3_ST_SR_NEXT_PROTO_A;
-+			else
-+				s->state=SSL3_ST_SR_FINISHED_A;
-+#endif
- 			s->init_num=0;
- 			break;
- 
-+#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG)
-+		case SSL3_ST_SR_NEXT_PROTO_A:
-+		case SSL3_ST_SR_NEXT_PROTO_B:
-+			ret=ssl3_get_next_proto(s);
-+			if (ret <= 0) goto end;
-+			s->init_num = 0;
-+			s->state=SSL3_ST_SR_FINISHED_A;
-+			break;
-+#endif
-+
- 		case SSL3_ST_SR_FINISHED_A:
- 		case SSL3_ST_SR_FINISHED_B:
- 			ret=ssl3_get_finished(s,SSL3_ST_SR_FINISHED_A,
-@@ -655,7 +679,16 @@ int ssl3_accept(SSL *s)
- 			if (ret <= 0) goto end;
- 			s->state=SSL3_ST_SW_FLUSH;
- 			if (s->hit)
-+				{
-+#if defined(OPENSSL_NO_TLSEXT) || defined(OPENSSL_NO_NEXTPROTONEG)
- 				s->s3->tmp.next_state=SSL3_ST_SR_FINISHED_A;
-+#else
-+				if (s->s3->next_proto_neg_seen)
-+					s->s3->tmp.next_state=SSL3_ST_SR_NEXT_PROTO_A;
-+				else
-+					s->s3->tmp.next_state=SSL3_ST_SR_FINISHED_A;
-+#endif
-+				}
- 			else
- 				s->s3->tmp.next_state=SSL_ST_OK;
- 			s->init_num=0;
-@@ -3196,4 +3229,72 @@ int ssl3_send_cert_status(SSL *s)
- 	/* SSL3_ST_SW_CERT_STATUS_B */
- 	return(ssl3_do_write(s,SSL3_RT_HANDSHAKE));
- 	}
-+
-+# ifndef OPENSSL_NO_NPN
-+/* ssl3_get_next_proto reads a Next Protocol Negotiation handshake message. It
-+ * sets the next_proto member in s if found */
-+int ssl3_get_next_proto(SSL *s)
-+	{
-+	int ok;
-+	unsigned proto_len, padding_len;
-+	long n;
-+	const unsigned char *p;
-+
-+	/* Clients cannot send a NextProtocol message if we didn't see the
-+	 * extension in their ClientHello */
-+	if (!s->s3->next_proto_neg_seen)
-+		{
-+		SSLerr(SSL_F_SSL3_GET_NEXT_PROTO,SSL_R_GOT_NEXT_PROTO_WITHOUT_EXTENSION);
-+		return -1;
-+		}
-+
-+	n=s->method->ssl_get_message(s,
-+		SSL3_ST_SR_NEXT_PROTO_A,
-+		SSL3_ST_SR_NEXT_PROTO_B,
-+		SSL3_MT_NEXT_PROTO,
-+		514,  /* See the payload format below */
-+		&ok);
-+
-+	if (!ok)
-+		return((int)n);
-+
-+	/* s->state doesn't reflect whether ChangeCipherSpec has been received
-+	 * in this handshake, but s->s3->change_cipher_spec does (will be reset
-+	 * by ssl3_get_finished). */
-+	if (!s->s3->change_cipher_spec)
-+		{
-+		SSLerr(SSL_F_SSL3_GET_NEXT_PROTO,SSL_R_GOT_NEXT_PROTO_BEFORE_A_CCS);
-+		return -1;
-+		}
-+
-+	if (n < 2)
-+		return 0;  /* The body must be > 1 bytes long */
-+
-+	p=(unsigned char *)s->init_msg;
-+
-+	/* The payload looks like:
-+	 *   uint8 proto_len;
-+	 *   uint8 proto[proto_len];
-+	 *   uint8 padding_len;
-+	 *   uint8 padding[padding_len];
-+	 */
-+	proto_len = p[0];
-+	if (proto_len + 2 > s->init_num)
-+		return 0;
-+	padding_len = p[proto_len + 1];
-+	if (proto_len + padding_len + 2 != s->init_num)
-+		return 0;
-+
-+	s->next_proto_negotiated = OPENSSL_malloc(proto_len);
-+	if (!s->next_proto_negotiated)
-+		{
-+		SSLerr(SSL_F_SSL3_GET_NEXT_PROTO,ERR_R_MALLOC_FAILURE);
-+		return 0;
-+		}
-+	memcpy(s->next_proto_negotiated, p + 1, proto_len);
-+	s->next_proto_negotiated_len = proto_len;
-+
-+	return 1;
-+	}
-+# endif
- #endif
---- openssl-1.0.0b.orig/ssl/ssl.h	2010-11-29 19:56:04.846517045 +0000
-+++ openssl-1.0.0b/ssl/ssl.h	2010-11-29 19:56:04.965928855 +0000
-@@ -857,6 +857,25 @@ struct ssl_ctx_st
- 	/* draft-rescorla-tls-opaque-prf-input-00.txt information */
- 	int (*tlsext_opaque_prf_input_callback)(SSL *, void *peerinput, size_t len, void *arg);
- 	void *tlsext_opaque_prf_input_callback_arg;
-+
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+	/* Next protocol negotiation information */
-+	/* (for experimental NPN extension). */
-+
-+	/* For a server, this contains a callback function by which the set of
-+	 * advertised protocols can be provided. */
-+	int (*next_protos_advertised_cb)(SSL *s, const unsigned char **buf,
-+			                 unsigned int *len, void *arg);
-+	void *next_protos_advertised_cb_arg;
-+	/* For a client, this contains a callback function that selects the
-+	 * next protocol from the list provided by the server. */
-+	int (*next_proto_select_cb)(SSL *s, unsigned char **out,
-+				    unsigned char *outlen,
-+				    const unsigned char *in,
-+				    unsigned int inlen,
-+				    void *arg);
-+	void *next_proto_select_cb_arg;
-+# endif
- #endif
- 
- #ifndef OPENSSL_NO_PSK
-@@ -928,6 +947,30 @@ int SSL_CTX_set_client_cert_engine(SSL_C
- #endif
- void SSL_CTX_set_cookie_generate_cb(SSL_CTX *ctx, int (*app_gen_cookie_cb)(SSL *ssl, unsigned char *cookie, unsigned int *cookie_len));
- void SSL_CTX_set_cookie_verify_cb(SSL_CTX *ctx, int (*app_verify_cookie_cb)(SSL *ssl, unsigned char *cookie, unsigned int cookie_len));
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+void SSL_CTX_set_next_protos_advertised_cb(SSL_CTX *s,
-+					   int (*cb) (SSL *ssl,
-+						      const unsigned char **out,
-+						      unsigned int *outlen,
-+						      void *arg), void *arg);
-+void SSL_CTX_set_next_proto_select_cb(SSL_CTX *s,
-+				      int (*cb) (SSL *ssl, unsigned char **out,
-+						 unsigned char *outlen,
-+						 const unsigned char *in,
-+						 unsigned int inlen, void *arg),
-+				      void *arg);
-+
-+int SSL_select_next_proto(unsigned char **out, unsigned char *outlen,
-+			  const unsigned char *in, unsigned int inlen,
-+			  const unsigned char *client, unsigned int client_len);
-+void SSL_get0_next_proto_negotiated(const SSL *s, const unsigned char **data,
-+				    unsigned *len);
-+
-+#define OPENSSL_NPN_UNSUPPORTED	0
-+#define OPENSSL_NPN_NEGOTIATED	1
-+#define OPENSSL_NPN_NO_OVERLAP	2
-+
-+#endif
- 
- #ifndef OPENSSL_NO_PSK
- /* the maximum length of the buffer given to callbacks containing the
-@@ -1187,6 +1230,19 @@ struct ssl_st
- 	void *tls_session_secret_cb_arg;
- 
- 	SSL_CTX * initial_ctx; /* initial ctx, used to store sessions */
-+
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+	/* Next protocol negotiation. For the client, this is the protocol that
-+	 * we sent in NextProtocol and is set when handling ServerHello
-+	 * extensions.
-+	 *
-+	 * For a server, this is the client's selected_protocol from
-+	 * NextProtocol and is set when handling the NextProtocol message,
-+	 * before the Finished message. */
-+	unsigned char *next_proto_negotiated;
-+	unsigned char next_proto_negotiated_len;
-+#endif
-+
- #define session_ctx initial_ctx
- #else
- #define session_ctx ctx
-@@ -1919,6 +1975,7 @@ void ERR_load_SSL_strings(void);
- #define SSL_F_SSL3_GET_KEY_EXCHANGE			 141
- #define SSL_F_SSL3_GET_MESSAGE				 142
- #define SSL_F_SSL3_GET_NEW_SESSION_TICKET		 283
-+#define SSL_F_SSL3_GET_NEXT_PROTO			 304
- #define SSL_F_SSL3_GET_RECORD				 143
- #define SSL_F_SSL3_GET_SERVER_CERTIFICATE		 144
- #define SSL_F_SSL3_GET_SERVER_DONE			 145
-@@ -2117,6 +2174,8 @@ void ERR_load_SSL_strings(void);
- #define SSL_R_EXCESSIVE_MESSAGE_SIZE			 152
- #define SSL_R_EXTRA_DATA_IN_MESSAGE			 153
- #define SSL_R_GOT_A_FIN_BEFORE_A_CCS			 154
-+#define SSL_R_GOT_NEXT_PROTO_BEFORE_A_CCS		 346
-+#define SSL_R_GOT_NEXT_PROTO_WITHOUT_EXTENSION		 347
- #define SSL_R_HTTPS_PROXY_REQUEST			 155
- #define SSL_R_HTTP_REQUEST				 156
- #define SSL_R_ILLEGAL_PADDING				 283
---- openssl-1.0.0b.orig/ssl/ssl3.h	2010-11-29 19:56:04.832465351 +0000
-+++ openssl-1.0.0b/ssl/ssl3.h	2010-11-29 19:56:04.965928855 +0000
-@@ -465,6 +465,12 @@ typedef struct ssl3_state_st
- 	void *server_opaque_prf_input;
- 	size_t server_opaque_prf_input_len;
- 
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+	/* Set if we saw the Next Protocol Negotiation extension from
-+	   our peer. */
-+	int next_proto_neg_seen;
-+#endif
-+
- 	struct	{
- 		/* actually only needs to be 16+20 */
- 		unsigned char cert_verify_md[EVP_MAX_MD_SIZE*2];
-@@ -557,6 +563,10 @@ typedef struct ssl3_state_st
- #define SSL3_ST_CW_CERT_VRFY_B		(0x191|SSL_ST_CONNECT)
- #define SSL3_ST_CW_CHANGE_A		(0x1A0|SSL_ST_CONNECT)
- #define SSL3_ST_CW_CHANGE_B		(0x1A1|SSL_ST_CONNECT)
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+#define SSL3_ST_CW_NEXT_PROTO_A		(0x200|SSL_ST_CONNECT)
-+#define SSL3_ST_CW_NEXT_PROTO_B		(0x201|SSL_ST_CONNECT)
-+#endif
- #define SSL3_ST_CW_FINISHED_A		(0x1B0|SSL_ST_CONNECT)
- #define SSL3_ST_CW_FINISHED_B		(0x1B1|SSL_ST_CONNECT)
- /* read from server */
-@@ -602,6 +612,10 @@ typedef struct ssl3_state_st
- #define SSL3_ST_SR_CERT_VRFY_B		(0x1A1|SSL_ST_ACCEPT)
- #define SSL3_ST_SR_CHANGE_A		(0x1B0|SSL_ST_ACCEPT)
- #define SSL3_ST_SR_CHANGE_B		(0x1B1|SSL_ST_ACCEPT)
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+#define SSL3_ST_SR_NEXT_PROTO_A		(0x210|SSL_ST_ACCEPT)
-+#define SSL3_ST_SR_NEXT_PROTO_B		(0x211|SSL_ST_ACCEPT)
-+#endif
- #define SSL3_ST_SR_FINISHED_A		(0x1C0|SSL_ST_ACCEPT)
- #define SSL3_ST_SR_FINISHED_B		(0x1C1|SSL_ST_ACCEPT)
- /* write to client */
-@@ -626,6 +640,9 @@ typedef struct ssl3_state_st
- #define SSL3_MT_CLIENT_KEY_EXCHANGE		16
- #define SSL3_MT_FINISHED			20
- #define SSL3_MT_CERTIFICATE_STATUS		22
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+#define SSL3_MT_NEXT_PROTO			67
-+#endif
- #define DTLS1_MT_HELLO_VERIFY_REQUEST    3
- 
- 
---- openssl-1.0.0b.orig/ssl/ssl_err.c	2010-11-29 19:56:04.846517045 +0000
-+++ openssl-1.0.0b/ssl/ssl_err.c	2010-11-29 19:56:04.965928855 +0000
-@@ -155,6 +155,7 @@ static ERR_STRING_DATA SSL_str_functs[]=
- {ERR_FUNC(SSL_F_SSL3_GET_KEY_EXCHANGE),	"SSL3_GET_KEY_EXCHANGE"},
- {ERR_FUNC(SSL_F_SSL3_GET_MESSAGE),	"SSL3_GET_MESSAGE"},
- {ERR_FUNC(SSL_F_SSL3_GET_NEW_SESSION_TICKET),	"SSL3_GET_NEW_SESSION_TICKET"},
-+{ERR_FUNC(SSL_F_SSL3_GET_NEXT_PROTO),	"SSL3_GET_NEXT_PROTO"},
- {ERR_FUNC(SSL_F_SSL3_GET_RECORD),	"SSL3_GET_RECORD"},
- {ERR_FUNC(SSL_F_SSL3_GET_SERVER_CERTIFICATE),	"SSL3_GET_SERVER_CERTIFICATE"},
- {ERR_FUNC(SSL_F_SSL3_GET_SERVER_DONE),	"SSL3_GET_SERVER_DONE"},
-@@ -355,6 +356,8 @@ static ERR_STRING_DATA SSL_str_reasons[]
- {ERR_REASON(SSL_R_EXCESSIVE_MESSAGE_SIZE),"excessive message size"},
- {ERR_REASON(SSL_R_EXTRA_DATA_IN_MESSAGE) ,"extra data in message"},
- {ERR_REASON(SSL_R_GOT_A_FIN_BEFORE_A_CCS),"got a fin before a ccs"},
-+{ERR_REASON(SSL_R_GOT_NEXT_PROTO_BEFORE_A_CCS),"got next proto before a ccs"},
-+{ERR_REASON(SSL_R_GOT_NEXT_PROTO_WITHOUT_EXTENSION),"got next proto without seeing extension"},
- {ERR_REASON(SSL_R_HTTPS_PROXY_REQUEST)   ,"https proxy request"},
- {ERR_REASON(SSL_R_HTTP_REQUEST)          ,"http request"},
- {ERR_REASON(SSL_R_ILLEGAL_PADDING)       ,"illegal padding"},
---- openssl-1.0.0b.orig/ssl/ssl_lib.c	2010-11-29 19:56:04.846517045 +0000
-+++ openssl-1.0.0b/ssl/ssl_lib.c	2010-11-29 19:56:04.965928855 +0000
-@@ -354,6 +354,9 @@ SSL *SSL_new(SSL_CTX *ctx)
- 	s->tlsext_ocsp_resplen = -1;
- 	CRYPTO_add(&ctx->references,1,CRYPTO_LOCK_SSL_CTX);
- 	s->initial_ctx=ctx;
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+	s->next_proto_negotiated = NULL;
-+# endif
- #endif
- 
- 	s->verify_result=X509_V_OK;
-@@ -587,6 +590,11 @@ void SSL_free(SSL *s)
- 		kssl_ctx_free(s->kssl_ctx);
- #endif	/* OPENSSL_NO_KRB5 */
- 
-+#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG)
-+	if (s->next_proto_negotiated)
-+		OPENSSL_free(s->next_proto_negotiated);
-+#endif
-+
- 	OPENSSL_free(s);
- 	}
- 
-@@ -1503,6 +1511,124 @@ int SSL_get_servername_type(const SSL *s
- 		return TLSEXT_NAMETYPE_host_name;
- 	return -1;
- 	}
-+
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+/* SSL_select_next_proto implements the standard protocol selection. It is
-+ * expected that this function is called from the callback set by
-+ * SSL_CTX_set_next_proto_select_cb.
-+ *
-+ * The protocol data is assumed to be a vector of 8-bit, length prefixed byte
-+ * strings. The length byte itself is not included in the length. A byte
-+ * string of length 0 is invalid. No byte string may be truncated.
-+ *
-+ * The current, but experimental algorithm for selecting the protocol is:
-+ *
-+ * 1) If the server doesn't support NPN then this is indicated to the
-+ * callback. In this case, the client application has to abort the connection
-+ * or have a default application level protocol.
-+ *
-+ * 2) If the server supports NPN, but advertises an empty list then the
-+ * client selects the first protcol in its list, but indicates via the
-+ * API that this fallback case was enacted.
-+ *
-+ * 3) Otherwise, the client finds the first protocol in the server's list
-+ * that it supports and selects this protocol. This is because it's
-+ * assumed that the server has better information about which protocol
-+ * a client should use.
-+ *
-+ * 4) If the client doesn't support any of the server's advertised
-+ * protocols, then this is treated the same as case 2.
-+ *
-+ * It returns either
-+ * OPENSSL_NPN_NEGOTIATED if a common protocol was found, or
-+ * OPENSSL_NPN_NO_OVERLAP if the fallback case was reached.
-+ */
-+int SSL_select_next_proto(unsigned char **out, unsigned char *outlen, const unsigned char *server, unsigned int server_len, const unsigned char *client, unsigned int client_len)
-+	{
-+	unsigned int i, j;
-+	const unsigned char *result;
-+	int status = OPENSSL_NPN_UNSUPPORTED;
-+
-+	/* For each protocol in server preference order, see if we support it. */
-+	for (i = 0; i < server_len; )
-+		{
-+		for (j = 0; j < client_len; )
-+			{
-+			if (server[i] == client[j] &&
-+			    memcmp(&server[i+1], &client[j+1], server[i]) == 0)
-+				{
-+				/* We found a match */
-+				result = &server[i];
-+				status = OPENSSL_NPN_NEGOTIATED;
-+				goto found;
-+				}
-+			j += client[j];
-+			j++;
-+			}
-+		i += server[i];
-+		i++;
-+		}
-+
-+	/* There's no overlap between our protocols and the server's list. */
-+	result = client;
-+	status = OPENSSL_NPN_NO_OVERLAP;
-+
-+	found:
-+	*out = (unsigned char *) result + 1;
-+	*outlen = result[0];
-+	return status;
-+	}
-+
-+/* SSL_get0_next_proto_negotiated sets *data and *len to point to the client's
-+ * requested protocol for this connection and returns 0. If the client didn't
-+ * request any protocol, then *data is set to NULL.
-+ *
-+ * Note that the client can request any protocol it chooses. The value returned
-+ * from this function need not be a member of the list of supported protocols
-+ * provided by the callback.
-+ */
-+void SSL_get0_next_proto_negotiated(const SSL *s, const unsigned char **data, unsigned *len)
-+	{
-+	*data = s->next_proto_negotiated;
-+	if (!*data) {
-+		*len = 0;
-+	} else {
-+		*len = s->next_proto_negotiated_len;
-+	}
-+}
-+
-+/* SSL_CTX_set_next_protos_advertised_cb sets a callback that is called when a
-+ * TLS server needs a list of supported protocols for Next Protocol
-+ * Negotiation. The returned list must be in wire format.  The list is returned
-+ * by setting |out| to point to it and |outlen| to its length. This memory will
-+ * not be modified, but one should assume that the SSL* keeps a reference to
-+ * it.
-+ *
-+ * The callback should return SSL_TLSEXT_ERR_OK if it wishes to advertise. Otherwise, no
-+ * such extension will be included in the ServerHello. */
-+void SSL_CTX_set_next_protos_advertised_cb(SSL_CTX *ctx, int (*cb) (SSL *ssl, const unsigned char **out, unsigned int *outlen, void *arg), void *arg)
-+	{
-+	ctx->next_protos_advertised_cb = cb;
-+	ctx->next_protos_advertised_cb_arg = arg;
-+	}
-+
-+/* SSL_CTX_set_next_proto_select_cb sets a callback that is called when a
-+ * client needs to select a protocol from the server's provided list. |out|
-+ * must be set to point to the selected protocol (which may be within |in|).
-+ * The length of the protocol name must be written into |outlen|. The server's
-+ * advertised protocols are provided in |in| and |inlen|. The callback can
-+ * assume that |in| is syntactically valid.
-+ *
-+ * The client must select a protocol. It is fatal to the connection if this
-+ * callback returns a value other than SSL_TLSEXT_ERR_OK.
-+ */
-+void SSL_CTX_set_next_proto_select_cb(SSL_CTX *ctx, int (*cb) (SSL *s, unsigned char **out, unsigned char *outlen, const unsigned char *in, unsigned int inlen, void *arg), void *arg)
-+	{
-+	ctx->next_proto_select_cb = cb;
-+	ctx->next_proto_select_cb_arg = arg;
-+	}
-+
-+# endif
- #endif
- 
- static unsigned long ssl_session_hash(const SSL_SESSION *a)
-@@ -1667,6 +1793,10 @@ SSL_CTX *SSL_CTX_new(const SSL_METHOD *m
- 	ret->tlsext_status_cb = 0;
- 	ret->tlsext_status_arg = NULL;
- 
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+	ret->next_protos_advertised_cb = 0;
-+	ret->next_proto_select_cb = 0;
-+# endif
- #endif
- #ifndef OPENSSL_NO_PSK
- 	ret->psk_identity_hint=NULL;
---- openssl-1.0.0b.orig/ssl/ssl_locl.h	2010-11-29 19:56:04.846517045 +0000
-+++ openssl-1.0.0b/ssl/ssl_locl.h	2010-11-29 19:56:04.965928855 +0000
-@@ -968,6 +968,9 @@ int ssl3_get_server_certificate(SSL *s);
- int ssl3_check_cert_and_algorithm(SSL *s);
- #ifndef OPENSSL_NO_TLSEXT
- int ssl3_check_finished(SSL *s);
-+# ifndef OPENSSL_NO_NEXTPROTONEG
-+int ssl3_send_next_proto(SSL *s);
-+# endif
- #endif
- 
- int dtls1_client_hello(SSL *s);
-@@ -986,6 +989,9 @@ int ssl3_check_client_hello(SSL *s);
- int ssl3_get_client_certificate(SSL *s);
- int ssl3_get_client_key_exchange(SSL *s);
- int ssl3_get_cert_verify(SSL *s);
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+int ssl3_get_next_proto(SSL *s);
-+#endif
- 
- int dtls1_send_hello_request(SSL *s);
- int dtls1_send_server_hello(SSL *s);
---- openssl-1.0.0b.orig/ssl/t1_lib.c	2010-11-16 13:26:24.000000000 +0000
-+++ openssl-1.0.0b/ssl/t1_lib.c	2010-11-29 19:56:04.965928855 +0000
-@@ -494,6 +494,18 @@ unsigned char *ssl_add_clienthello_tlsex
- 			i2d_X509_EXTENSIONS(s->tlsext_ocsp_exts, &ret);
- 		}
- 
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+	if (s->ctx->next_proto_select_cb && !s->s3->tmp.finish_md_len)
-+		{
-+		/* The client advertises an emtpy extension to indicate its
-+		 * support for Next Protocol Negotiation */
-+		if (limit - ret - 4 < 0)
-+			return NULL;
-+		s2n(TLSEXT_TYPE_next_proto_neg,ret);
-+		s2n(0,ret);
-+		}
-+#endif
-+
- 	if ((extdatalen = ret-p-2)== 0) 
- 		return p;
- 
-@@ -505,6 +517,9 @@ unsigned char *ssl_add_serverhello_tlsex
- 	{
- 	int extdatalen=0;
- 	unsigned char *ret = p;
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+	int next_proto_neg_seen;
-+#endif
- 
- 	/* don't add extensions for SSLv3, unless doing secure renegotiation */
- 	if (s->version == SSL3_VERSION && !s->s3->send_connection_binding)
-@@ -618,6 +633,28 @@ unsigned char *ssl_add_serverhello_tlsex
- 
- 		}
- 
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+	next_proto_neg_seen = s->s3->next_proto_neg_seen;
-+	s->s3->next_proto_neg_seen = 0;
-+	if (next_proto_neg_seen && s->ctx->next_protos_advertised_cb)
-+		{
-+		const unsigned char *npa;
-+		unsigned int npalen;
-+		int r;
-+
-+		r = s->ctx->next_protos_advertised_cb(s, &npa, &npalen, s->ctx->next_protos_advertised_cb_arg);
-+		if (r == SSL_TLSEXT_ERR_OK)
-+			{
-+			if ((long)(limit - ret - 4 - npalen) < 0) return NULL;
-+			s2n(TLSEXT_TYPE_next_proto_neg,ret);
-+			s2n(npalen,ret);
-+			memcpy(ret, npa, npalen);
-+			ret += npalen;
-+			s->s3->next_proto_neg_seen = 1;
-+			}
-+		}
-+#endif
-+
- 	if ((extdatalen = ret-p-2)== 0) 
- 		return p;
- 
-@@ -982,6 +1019,28 @@ int ssl_parse_clienthello_tlsext(SSL *s,
- 				else
- 					s->tlsext_status_type = -1;
- 			}
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+		else if (type == TLSEXT_TYPE_next_proto_neg &&
-+                         s->s3->tmp.finish_md_len == 0)
-+			{
-+			/* We shouldn't accept this extension on a
-+			 * renegotiation.
-+			 *
-+			 * s->new_session will be set on renegotiation, but we
-+			 * probably shouldn't rely that it couldn't be set on
-+			 * the initial renegotation too in certain cases (when
-+			 * there's some other reason to disallow resuming an
-+			 * earlier session -- the current code won't be doing
-+			 * anything like that, but this might change).
-+
-+			 * A valid sign that there's been a previous handshake
-+			 * in this connection is if s->s3->tmp.finish_md_len >
-+			 * 0.  (We are talking about a check that will happen
-+			 * in the Hello protocol round, well before a new
-+			 * Finished message could have been computed.) */
-+			s->s3->next_proto_neg_seen = 1;
-+			}
-+#endif
- 
- 		/* session ticket processed earlier */
- 		data+=size;
-@@ -1005,6 +1064,26 @@ int ssl_parse_clienthello_tlsext(SSL *s,
- 	return 1;
- 	}
- 
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+/* ssl_next_proto_validate validates a Next Protocol Negotiation block. No
-+ * elements of zero length are allowed and the set of elements must exactly fill
-+ * the length of the block. */
-+static int ssl_next_proto_validate(unsigned char *d, unsigned len)
-+	{
-+	unsigned int off = 0;
-+
-+	while (off < len)
-+		{
-+		if (d[off] == 0)
-+			return 0;
-+		off += d[off];
-+		off++;
-+		}
-+
-+	return off == len;
-+	}
-+#endif
-+
- int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d, int n, int *al)
- 	{
- 	unsigned short length;
-@@ -1139,6 +1218,39 @@ int ssl_parse_serverhello_tlsext(SSL *s,
- 			/* Set flag to expect CertificateStatus message */
- 			s->tlsext_status_expected = 1;
- 			}
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+		else if (type == TLSEXT_TYPE_next_proto_neg)
-+			{
-+			unsigned char *selected;
-+			unsigned char selected_len;
-+
-+			/* We must have requested it. */
-+			if ((s->ctx->next_proto_select_cb == NULL))
-+				{
-+				*al = TLS1_AD_UNSUPPORTED_EXTENSION;
-+				return 0;
-+				}
-+			/* The data must be valid */
-+			if (!ssl_next_proto_validate(data, size))
-+				{
-+				*al = TLS1_AD_DECODE_ERROR;
-+				return 0;
-+				}
-+			if (s->ctx->next_proto_select_cb(s, &selected, &selected_len, data, size, s->ctx->next_proto_select_cb_arg) != SSL_TLSEXT_ERR_OK)
-+				{
-+				*al = TLS1_AD_INTERNAL_ERROR;
-+				return 0;
-+				}
-+			s->next_proto_negotiated = OPENSSL_malloc(selected_len);
-+			if (!s->next_proto_negotiated)
-+				{
-+				*al = TLS1_AD_INTERNAL_ERROR;
-+				return 0;
-+				}
-+			memcpy(s->next_proto_negotiated, selected, selected_len);
-+			s->next_proto_negotiated_len = selected_len;
-+			}
-+#endif
- 		else if (type == TLSEXT_TYPE_renegotiate)
- 			{
- 			if(!ssl_parse_serverhello_renegotiate_ext(s, data, size, al))
---- openssl-1.0.0b.orig/ssl/tls1.h	2009-11-11 14:51:29.000000000 +0000
-+++ openssl-1.0.0b/ssl/tls1.h	2010-11-29 19:56:04.965928855 +0000
-@@ -204,6 +204,11 @@ extern "C" {
- /* Temporary extension type */
- #define TLSEXT_TYPE_renegotiate                 0xff01
- 
-+#ifndef OPENSSL_NO_NEXTPROTONEG
-+/* This is not an IANA defined extension number */
-+#define TLSEXT_TYPE_next_proto_neg		13172
-+#endif
-+
- /* NameType value from RFC 3546 */
- #define TLSEXT_NAMETYPE_host_name 0
- /* status request value from RFC 3546 */
diff --git a/patches/progs.patch b/patches/progs.patch
index 16fd9b0..f0879ae 100644
--- a/patches/progs.patch
+++ b/patches/progs.patch
@@ -20,8 +20,8 @@
 +#if 0 /* ANDROID */
  	{FUNC_TYPE_GENERAL,"ts",ts_main},
 +#endif
- #ifndef OPENSSL_NO_MD2
- 	{FUNC_TYPE_MD,"md2",dgst_main},
+ #ifndef OPENSSL_NO_SRP
+ 	{FUNC_TYPE_GENERAL,"srp",srp_main},
  #endif
 --- openssl-1.0.0.orig/apps/speed.c	2010-03-03 11:56:17.000000000 -0800
 +++ openssl-1.0.0/apps/speed.c	2010-05-18 14:05:57.000000000 -0700
diff --git a/patches/ssl_Android.mk b/patches/ssl_Android.mk
index 40641a3..619aede 100644
--- a/patches/ssl_Android.mk
+++ b/patches/ssl_Android.mk
@@ -6,43 +6,49 @@
 	external/openssl/crypto
 
 local_src_files:= \
-	s2_meth.c \
-	s2_srvr.c \
-	s2_clnt.c \
-	s2_lib.c \
-	s2_enc.c \
-	s2_pkt.c \
-	s3_meth.c \
-	s3_srvr.c \
-	s3_clnt.c \
-	s3_lib.c \
-	s3_enc.c \
-	s3_pkt.c \
-	s3_both.c \
-	s23_meth.c \
-	s23_srvr.c \
+	bio_ssl.c \
+	d1_both.c \
+	d1_enc.c \
+	d1_lib.c \
+	d1_pkt.c \
+	d1_srtp.c \
+	kssl.c \
 	s23_clnt.c \
 	s23_lib.c \
+	s23_meth.c \
 	s23_pkt.c \
-	t1_meth.c \
-	t1_srvr.c \
-	t1_clnt.c \
-	t1_lib.c \
-	t1_enc.c \
-	t1_reneg.c \
-	ssl_lib.c \
-	ssl_err2.c \
-	ssl_cert.c \
-	ssl_sess.c \
-	ssl_ciph.c \
-	ssl_stat.c \
-	ssl_rsa.c \
-	ssl_asn1.c \
-	ssl_txt.c \
+	s23_srvr.c \
+	s2_clnt.c \
+	s2_enc.c \
+	s2_lib.c \
+	s2_meth.c \
+	s2_pkt.c \
+	s2_srvr.c \
+	s3_both.c \
+	s3_clnt.c \
+	s3_enc.c \
+	s3_lib.c \
+	s3_meth.c \
+	s3_pkt.c \
+	s3_srvr.c \
 	ssl_algs.c \
-	bio_ssl.c \
+	ssl_asn1.c \
+	ssl_cert.c \
+	ssl_ciph.c \
 	ssl_err.c \
-	kssl.c
+	ssl_err2.c \
+	ssl_lib.c \
+	ssl_rsa.c \
+	ssl_sess.c \
+	ssl_stat.c \
+	ssl_txt.c \
+	t1_clnt.c \
+	t1_enc.c \
+	t1_lib.c \
+	t1_meth.c \
+	t1_reneg.c \
+	t1_srvr.c \
+	tls_srp.c
 
 #######################################
 # target static library