Blame - lib/openssl/crypto/bn/bn_exp.c - kernel/lk

blob: d9b6c737fc82f5f5f8c4bc9a5d7ba23ee8627c73 [file] [log] [blame]

Kinson Chik	a8fa74c	2011-07-29 11:33:41 -0700	[diff] [blame^]	1	/* crypto/bn/bn_exp.c */
				2	/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
				3	* All rights reserved.
				4	*
				5	* This package is an SSL implementation written
				6	* by Eric Young (eay@cryptsoft.com).
				7	* The implementation was written so as to conform with Netscapes SSL.
				8	*
				9	* This library is free for commercial and non-commercial use as long as
				10	* the following conditions are aheared to. The following conditions
				11	* apply to all code found in this distribution, be it the RC4, RSA,
				12	* lhash, DES, etc., code; not just the SSL code. The SSL documentation
				13	* included with this distribution is covered by the same copyright terms
				14	* except that the holder is Tim Hudson (tjh@cryptsoft.com).
				15	*
				16	* Copyright remains Eric Young's, and as such any Copyright notices in
				17	* the code are not to be removed.
				18	* If this package is used in a product, Eric Young should be given attribution
				19	* as the author of the parts of the library used.
				20	* This can be in the form of a textual message at program startup or
				21	* in documentation (online or textual) provided with the package.
				22	*
				23	* Redistribution and use in source and binary forms, with or without
				24	* modification, are permitted provided that the following conditions
				25	* are met:
				26	* 1. Redistributions of source code must retain the copyright
				27	* notice, this list of conditions and the following disclaimer.
				28	* 2. Redistributions in binary form must reproduce the above copyright
				29	* notice, this list of conditions and the following disclaimer in the
				30	* documentation and/or other materials provided with the distribution.
				31	* 3. All advertising materials mentioning features or use of this software
				32	* must display the following acknowledgement:
				33	* "This product includes cryptographic software written by
				34	* Eric Young (eay@cryptsoft.com)"
				35	* The word 'cryptographic' can be left out if the rouines from the library
				36	* being used are not cryptographic related :-).
				37	* 4. If you include any Windows specific code (or a derivative thereof) from
				38	* the apps directory (application code) you must include an acknowledgement:
				39	* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
				40	*
				41	* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
				42	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
				43	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
				44	* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
				45	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
				46	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
				47	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
				48	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
				49	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
				50	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
				51	* SUCH DAMAGE.
				52	*
				53	* The licence and distribution terms for any publically available version or
				54	* derivative of this code cannot be changed. i.e. this code cannot simply be
				55	* copied and put under another distribution licence
				56	* [including the GNU Public Licence.]
				57	*/
				58	/* ====================================================================
				59	* Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved.
				60	*
				61	* Redistribution and use in source and binary forms, with or without
				62	* modification, are permitted provided that the following conditions
				63	* are met:
				64	*
				65	* 1. Redistributions of source code must retain the above copyright
				66	* notice, this list of conditions and the following disclaimer.
				67	*
				68	* 2. Redistributions in binary form must reproduce the above copyright
				69	* notice, this list of conditions and the following disclaimer in
				70	* the documentation and/or other materials provided with the
				71	* distribution.
				72	*
				73	* 3. All advertising materials mentioning features or use of this
				74	* software must display the following acknowledgment:
				75	* "This product includes software developed by the OpenSSL Project
				76	* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
				77	*
				78	* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
				79	* endorse or promote products derived from this software without
				80	* prior written permission. For written permission, please contact
				81	* openssl-core@openssl.org.
				82	*
				83	* 5. Products derived from this software may not be called "OpenSSL"
				84	* nor may "OpenSSL" appear in their names without prior written
				85	* permission of the OpenSSL Project.
				86	*
				87	* 6. Redistributions of any form whatsoever must retain the following
				88	* acknowledgment:
				89	* "This product includes software developed by the OpenSSL Project
				90	* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
				91	*
				92	* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
				93	* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
				94	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
				95	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
				96	* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				97	* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
				98	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
				99	* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
				100	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
				101	* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
				102	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
				103	* OF THE POSSIBILITY OF SUCH DAMAGE.
				104	* ====================================================================
				105	*
				106	* This product includes cryptographic software written by Eric Young
				107	* (eay@cryptsoft.com). This product includes software written by Tim
				108	* Hudson (tjh@cryptsoft.com).
				109	*
				110	*/
				111
				112
				113	#include "cryptlib.h"
				114	#include "bn_lcl.h"
				115
				116	/* maximum precomputation table size for variable sliding windows */
				117	#define TABLE_SIZE 32
				118
				119	/* this one works - simple but works */
				120	int BN_exp(BIGNUM r, const BIGNUM a, const BIGNUM p, BN_CTX ctx)
				121	{
				122	int i,bits,ret=0;
				123	BIGNUM v,rr;
				124
				125	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
				126	{
				127	/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
				128	BNerr(BN_F_BN_EXP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
				129	return -1;
				130	}
				131
				132	BN_CTX_start(ctx);
				133	if ((r == a) \|\| (r == p))
				134	rr = BN_CTX_get(ctx);
				135	else
				136	rr = r;
				137	v = BN_CTX_get(ctx);
				138	if (rr == NULL \|\| v == NULL) goto err;
				139
				140	if (BN_copy(v,a) == NULL) goto err;
				141	bits=BN_num_bits(p);
				142
				143	if (BN_is_odd(p))
				144	{ if (BN_copy(rr,a) == NULL) goto err; }
				145	else { if (!BN_one(rr)) goto err; }
				146
				147	for (i=1; i<bits; i++)
				148	{
				149	if (!BN_sqr(v,v,ctx)) goto err;
				150	if (BN_is_bit_set(p,i))
				151	{
				152	if (!BN_mul(rr,rr,v,ctx)) goto err;
				153	}
				154	}
				155	ret=1;
				156	err:
				157	if (r != rr) BN_copy(r,rr);
				158	BN_CTX_end(ctx);
				159	bn_check_top(r);
				160	return(ret);
				161	}
				162
				163
				164	int BN_mod_exp(BIGNUM r, const BIGNUM a, const BIGNUM p, const BIGNUM m,
				165	BN_CTX *ctx)
				166	{
				167	int ret;
				168
				169	bn_check_top(a);
				170	bn_check_top(p);
				171	bn_check_top(m);
				172
				173	/* For even modulus m = 2^k*m_odd, it might make sense to compute
				174	* a^p mod m_odd and a^p mod 2^k separately (with Montgomery
				175	* exponentiation for the odd part), using appropriate exponent
				176	* reductions, and combine the results using the CRT.
				177	*
				178	* For now, we use Montgomery only if the modulus is odd; otherwise,
				179	* exponentiation using the reciprocal-based quick remaindering
				180	* algorithm is used.
				181	*
				182	* (Timing obtained with expspeed.c [computations a^p mod m
				183	* where a, p, m are of the same length: 256, 512, 1024, 2048,
				184	* 4096, 8192 bits], compared to the running time of the
				185	* standard algorithm:
				186	*
				187	* BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration]
				188	* 55 .. 77 % [UltraSparc processor, but
				189	* debug-solaris-sparcv8-gcc conf.]
				190	*
				191	* BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration]
				192	* 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
				193	*
				194	* On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
				195	* at 2048 and more bits, but at 512 and 1024 bits, it was
				196	* slower even than the standard algorithm!
				197	*
				198	* "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
				199	* should be obtained when the new Montgomery reduction code
				200	* has been integrated into OpenSSL.)
				201	*/
				202
				203	#define MONT_MUL_MOD
				204	#define MONT_EXP_WORD
				205	#define RECP_MUL_MOD
				206
				207	#ifdef MONT_MUL_MOD
				208	/* I have finally been able to take out this pre-condition of
				209	* the top bit being set. It was caused by an error in BN_div
				210	* with negatives. There was also another problem when for a^b%m
				211	* a >= m. eay 07-May-97 */
				212	/* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */
				213
				214	if (BN_is_odd(m))
				215	{
				216	# ifdef MONT_EXP_WORD
				217	if (a->top == 1 && !a->neg && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0))
				218	{
				219	BN_ULONG A = a->d[0];
				220	ret=BN_mod_exp_mont_word(r,A,p,m,ctx,NULL);
				221	}
				222	else
				223	# endif
				224	ret=BN_mod_exp_mont(r,a,p,m,ctx,NULL);
				225	}
				226	else
				227	#endif
				228	#ifdef RECP_MUL_MOD
				229	{ ret=BN_mod_exp_recp(r,a,p,m,ctx); }
				230	#else
				231	{ ret=BN_mod_exp_simple(r,a,p,m,ctx); }
				232	#endif
				233
				234	bn_check_top(r);
				235	return(ret);
				236	}
				237
				238
				239	int BN_mod_exp_recp(BIGNUM r, const BIGNUM a, const BIGNUM *p,
				240	const BIGNUM m, BN_CTX ctx)
				241	{
				242	int i,j,bits,ret=0,wstart,wend,window,wvalue;
				243	int start=1;
				244	BIGNUM *aa;
				245	/* Table of variables obtained from 'ctx' */
				246	BIGNUM *val[TABLE_SIZE];
				247	BN_RECP_CTX recp;
				248
				249	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
				250	{
				251	/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
				252	BNerr(BN_F_BN_MOD_EXP_RECP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
				253	return -1;
				254	}
				255
				256	bits=BN_num_bits(p);
				257
				258	if (bits == 0)
				259	{
				260	ret = BN_one(r);
				261	return ret;
				262	}
				263
				264	BN_CTX_start(ctx);
				265	aa = BN_CTX_get(ctx);
				266	val[0] = BN_CTX_get(ctx);
				267	if(!aa \|\| !val[0]) goto err;
				268
				269	BN_RECP_CTX_init(&recp);
				270	if (m->neg)
				271	{
				272	/* ignore sign of 'm' */
				273	if (!BN_copy(aa, m)) goto err;
				274	aa->neg = 0;
				275	if (BN_RECP_CTX_set(&recp,aa,ctx) <= 0) goto err;
				276	}
				277	else
				278	{
				279	if (BN_RECP_CTX_set(&recp,m,ctx) <= 0) goto err;
				280	}
				281
				282	if (!BN_nnmod(val[0],a,m,ctx)) goto err; /* 1 */
				283	if (BN_is_zero(val[0]))
				284	{
				285	BN_zero(r);
				286	ret = 1;
				287	goto err;
				288	}
				289
				290	window = BN_window_bits_for_exponent_size(bits);
				291	if (window > 1)
				292	{
				293	if (!BN_mod_mul_reciprocal(aa,val[0],val[0],&recp,ctx))
				294	goto err; /* 2 */
				295	j=1<<(window-1);
				296	for (i=1; i<j; i++)
				297	{
				298	if(((val[i] = BN_CTX_get(ctx)) == NULL) \|\|
				299	!BN_mod_mul_reciprocal(val[i],val[i-1],
				300	aa,&recp,ctx))
				301	goto err;
				302	}
				303	}
				304
				305	start=1; /* This is used to avoid multiplication etc
				306	* when there is only the value '1' in the
				307	* buffer. */
				308	wvalue=0; /* The 'value' of the window */
				309	wstart=bits-1; /* The top bit of the window */
				310	wend=0; /* The bottom bit of the window */
				311
				312	if (!BN_one(r)) goto err;
				313
				314	for (;;)
				315	{
				316	if (BN_is_bit_set(p,wstart) == 0)
				317	{
				318	if (!start)
				319	if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
				320	goto err;
				321	if (wstart == 0) break;
				322	wstart--;
				323	continue;
				324	}
				325	/* We now have wstart on a 'set' bit, we now need to work out
				326	* how bit a window to do. To do this we need to scan
				327	* forward until the last set bit before the end of the
				328	* window */
				329	j=wstart;
				330	wvalue=1;
				331	wend=0;
				332	for (i=1; i<window; i++)
				333	{
				334	if (wstart-i < 0) break;
				335	if (BN_is_bit_set(p,wstart-i))
				336	{
				337	wvalue<<=(i-wend);
				338	wvalue\|=1;
				339	wend=i;
				340	}
				341	}
				342
				343	/* wend is the size of the current window */
				344	j=wend+1;
				345	/* add the 'bytes above' */
				346	if (!start)
				347	for (i=0; i<j; i++)
				348	{
				349	if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
				350	goto err;
				351	}
				352
				353	/* wvalue will be an odd number < 2^window */
				354	if (!BN_mod_mul_reciprocal(r,r,val[wvalue>>1],&recp,ctx))
				355	goto err;
				356
				357	/* move the 'window' down further */
				358	wstart-=wend+1;
				359	wvalue=0;
				360	start=0;
				361	if (wstart < 0) break;
				362	}
				363	ret=1;
				364	err:
				365	BN_CTX_end(ctx);
				366	BN_RECP_CTX_free(&recp);
				367	bn_check_top(r);
				368	return(ret);
				369	}
				370
				371
				372	int BN_mod_exp_mont(BIGNUM rr, const BIGNUM a, const BIGNUM *p,
				373	const BIGNUM m, BN_CTX ctx, BN_MONT_CTX *in_mont)
				374	{
				375	int i,j,bits,ret=0,wstart,wend,window,wvalue;
				376	int start=1;
				377	BIGNUM d,r;
				378	const BIGNUM *aa;
				379	/* Table of variables obtained from 'ctx' */
				380	BIGNUM *val[TABLE_SIZE];
				381	BN_MONT_CTX *mont=NULL;
				382
				383	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
				384	{
				385	return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
				386	}
				387
				388	bn_check_top(a);
				389	bn_check_top(p);
				390	bn_check_top(m);
				391
				392	if (!BN_is_odd(m))
				393	{
				394	BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS);
				395	return(0);
				396	}
				397	bits=BN_num_bits(p);
				398	if (bits == 0)
				399	{
				400	ret = BN_one(rr);
				401	return ret;
				402	}
				403
				404	BN_CTX_start(ctx);
				405	d = BN_CTX_get(ctx);
				406	r = BN_CTX_get(ctx);
				407	val[0] = BN_CTX_get(ctx);
				408	if (!d \|\| !r \|\| !val[0]) goto err;
				409
				410	/* If this is not done, things will break in the montgomery
				411	* part */
				412
				413	if (in_mont != NULL)
				414	mont=in_mont;
				415	else
				416	{
				417	if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
				418	if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
				419	}
				420
				421	if (a->neg \|\| BN_ucmp(a,m) >= 0)
				422	{
				423	if (!BN_nnmod(val[0],a,m,ctx))
				424	goto err;
				425	aa= val[0];
				426	}
				427	else
				428	aa=a;
				429	if (BN_is_zero(aa))
				430	{
				431	BN_zero(rr);
				432	ret = 1;
				433	goto err;
				434	}
				435	if (!BN_to_montgomery(val[0],aa,mont,ctx)) goto err; /* 1 */
				436
				437	window = BN_window_bits_for_exponent_size(bits);
				438	if (window > 1)
				439	{
				440	if (!BN_mod_mul_montgomery(d,val[0],val[0],mont,ctx)) goto err; /* 2 */
				441	j=1<<(window-1);
				442	for (i=1; i<j; i++)
				443	{
				444	if(((val[i] = BN_CTX_get(ctx)) == NULL) \|\|
				445	!BN_mod_mul_montgomery(val[i],val[i-1],
				446	d,mont,ctx))
				447	goto err;
				448	}
				449	}
				450
				451	start=1; /* This is used to avoid multiplication etc
				452	* when there is only the value '1' in the
				453	* buffer. */
				454	wvalue=0; /* The 'value' of the window */
				455	wstart=bits-1; /* The top bit of the window */
				456	wend=0; /* The bottom bit of the window */
				457
				458	if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
				459	for (;;)
				460	{
				461	if (BN_is_bit_set(p,wstart) == 0)
				462	{
				463	if (!start)
				464	{
				465	if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
				466	goto err;
				467	}
				468	if (wstart == 0) break;
				469	wstart--;
				470	continue;
				471	}
				472	/* We now have wstart on a 'set' bit, we now need to work out
				473	* how bit a window to do. To do this we need to scan
				474	* forward until the last set bit before the end of the
				475	* window */
				476	j=wstart;
				477	wvalue=1;
				478	wend=0;
				479	for (i=1; i<window; i++)
				480	{
				481	if (wstart-i < 0) break;
				482	if (BN_is_bit_set(p,wstart-i))
				483	{
				484	wvalue<<=(i-wend);
				485	wvalue\|=1;
				486	wend=i;
				487	}
				488	}
				489
				490	/* wend is the size of the current window */
				491	j=wend+1;
				492	/* add the 'bytes above' */
				493	if (!start)
				494	for (i=0; i<j; i++)
				495	{
				496	if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
				497	goto err;
				498	}
				499
				500	/* wvalue will be an odd number < 2^window */
				501	if (!BN_mod_mul_montgomery(r,r,val[wvalue>>1],mont,ctx))
				502	goto err;
				503
				504	/* move the 'window' down further */
				505	wstart-=wend+1;
				506	wvalue=0;
				507	start=0;
				508	if (wstart < 0) break;
				509	}
				510	if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
				511	ret=1;
				512	err:
				513	if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
				514	BN_CTX_end(ctx);
				515	bn_check_top(rr);
				516	return(ret);
				517	}
				518
				519
				520	/* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout
				521	* so that accessing any of these table values shows the same access pattern as far
				522	* as cache lines are concerned. The following functions are used to transfer a BIGNUM
				523	* from/to that table. */
				524
				525	static int MOD_EXP_CTIME_COPY_TO_PREBUF(BIGNUM b, int top, unsigned char buf, int idx, int width)
				526	{
				527	size_t i, j;
				528
				529	if (bn_wexpand(b, top) == NULL)
				530	return 0;
				531	while (b->top < top)
				532	{
				533	b->d[b->top++] = 0;
				534	}
				535
				536	for (i = 0, j=idx; i < top * sizeof b->d[0]; i++, j+=width)
				537	{
				538	buf[j] = ((unsigned char*)b->d)[i];
				539	}
				540
				541	bn_correct_top(b);
				542	return 1;
				543	}
				544
				545	static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM b, int top, unsigned char buf, int idx, int width)
				546	{
				547	size_t i, j;
				548
				549	if (bn_wexpand(b, top) == NULL)
				550	return 0;
				551
				552	for (i=0, j=idx; i < top * sizeof b->d[0]; i++, j+=width)
				553	{
				554	((unsigned char*)b->d)[i] = buf[j];
				555	}
				556
				557	b->top = top;
				558	bn_correct_top(b);
				559	return 1;
				560	}
				561
				562	/* Given a pointer value, compute the next address that is a cache line multiple. */
				563	#define MOD_EXP_CTIME_ALIGN(x_) \
				564	((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((BN_ULONG)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
				565
				566	/* This variant of BN_mod_exp_mont() uses fixed windows and the special
				567	* precomputation memory layout to limit data-dependency to a minimum
				568	* to protect secret exponents (cf. the hyper-threading timing attacks
				569	* pointed out by Colin Percival,
				570	* http://www.daemonology.net/hyperthreading-considered-harmful/)
				571	*/
				572	int BN_mod_exp_mont_consttime(BIGNUM rr, const BIGNUM a, const BIGNUM *p,
				573	const BIGNUM m, BN_CTX ctx, BN_MONT_CTX *in_mont)
				574	{
				575	int i,bits,ret=0,idx,window,wvalue;
				576	int top;
				577	BIGNUM *r;
				578	const BIGNUM *aa;
				579	BN_MONT_CTX *mont=NULL;
				580
				581	int numPowers;
				582	unsigned char *powerbufFree=NULL;
				583	int powerbufLen = 0;
				584	unsigned char *powerbuf=NULL;
				585	BIGNUM computeTemp=NULL, am=NULL;
				586
				587	bn_check_top(a);
				588	bn_check_top(p);
				589	bn_check_top(m);
				590
				591	top = m->top;
				592
				593	if (!(m->d[0] & 1))
				594	{
				595	BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME,BN_R_CALLED_WITH_EVEN_MODULUS);
				596	return(0);
				597	}
				598	bits=BN_num_bits(p);
				599	if (bits == 0)
				600	{
				601	ret = BN_one(rr);
				602	return ret;
				603	}
				604
				605	/* Initialize BIGNUM context and allocate intermediate result */
				606	BN_CTX_start(ctx);
				607	r = BN_CTX_get(ctx);
				608	if (r == NULL) goto err;
				609
				610	/* Allocate a montgomery context if it was not supplied by the caller.
				611	* If this is not done, things will break in the montgomery part.
				612	*/
				613	if (in_mont != NULL)
				614	mont=in_mont;
				615	else
				616	{
				617	if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
				618	if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
				619	}
				620
				621	/* Get the window size to use with size of p. */
				622	window = BN_window_bits_for_ctime_exponent_size(bits);
				623
				624	/* Allocate a buffer large enough to hold all of the pre-computed
				625	* powers of a.
				626	*/
				627	numPowers = 1 << window;
				628	powerbufLen = sizeof(m->d[0])topnumPowers;
				629	if ((powerbufFree=(unsigned char*)OPENSSL_malloc(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL)
				630	goto err;
				631
				632	powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
				633	memset(powerbuf, 0, powerbufLen);
				634
				635	/* Initialize the intermediate result. Do this early to save double conversion,
				636	* once each for a^0 and intermediate result.
				637	*/
				638	if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
				639	if (!MOD_EXP_CTIME_COPY_TO_PREBUF(r, top, powerbuf, 0, numPowers)) goto err;
				640
				641	/* Initialize computeTemp as a^1 with montgomery precalcs */
				642	computeTemp = BN_CTX_get(ctx);
				643	am = BN_CTX_get(ctx);
				644	if (computeTemp==NULL \|\| am==NULL) goto err;
				645
				646	if (a->neg \|\| BN_ucmp(a,m) >= 0)
				647	{
				648	if (!BN_mod(am,a,m,ctx))
				649	goto err;
				650	aa= am;
				651	}
				652	else
				653	aa=a;
				654	if (!BN_to_montgomery(am,aa,mont,ctx)) goto err;
				655	if (!BN_copy(computeTemp, am)) goto err;
				656	if (!MOD_EXP_CTIME_COPY_TO_PREBUF(am, top, powerbuf, 1, numPowers)) goto err;
				657
				658	/* If the window size is greater than 1, then calculate
				659	* val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
				660	* (even powers could instead be computed as (a^(i/2))^2
				661	* to use the slight performance advantage of sqr over mul).
				662	*/
				663	if (window > 1)
				664	{
				665	for (i=2; i<numPowers; i++)
				666	{
				667	/* Calculate a^i = a^(i-1) * a */
				668	if (!BN_mod_mul_montgomery(computeTemp,am,computeTemp,mont,ctx))
				669	goto err;
				670	if (!MOD_EXP_CTIME_COPY_TO_PREBUF(computeTemp, top, powerbuf, i, numPowers)) goto err;
				671	}
				672	}
				673
				674	/* Adjust the number of bits up to a multiple of the window size.
				675	* If the exponent length is not a multiple of the window size, then
				676	* this pads the most significant bits with zeros to normalize the
				677	* scanning loop to there's no special cases.
				678	*
				679	* * NOTE: Making the window size a power of two less than the native
				680	* * word size ensures that the padded bits won't go past the last
				681	* * word in the internal BIGNUM structure. Going past the end will
				682	* * still produce the correct result, but causes a different branch
				683	* * to be taken in the BN_is_bit_set function.
				684	*/
				685	bits = ((bits+window-1)/window)*window;
				686	idx=bits-1; /* The top bit of the window */
				687
				688	/* Scan the exponent one window at a time starting from the most
				689	* significant bits.
				690	*/
				691	while (idx >= 0)
				692	{
				693	wvalue=0; /* The 'value' of the window */
				694
				695	/* Scan the window, squaring the result as we go */
				696	for (i=0; i<window; i++,idx--)
				697	{
				698	if (!BN_mod_mul_montgomery(r,r,r,mont,ctx)) goto err;
				699	wvalue = (wvalue<<1)+BN_is_bit_set(p,idx);
				700	}
				701
				702	/* Fetch the appropriate pre-computed value from the pre-buf */
				703	if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(computeTemp, top, powerbuf, wvalue, numPowers)) goto err;
				704
				705	/* Multiply the result into the intermediate result */
				706	if (!BN_mod_mul_montgomery(r,r,computeTemp,mont,ctx)) goto err;
				707	}
				708
				709	/* Convert the final result from montgomery to standard format */
				710	if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
				711	ret=1;
				712	err:
				713	if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
				714	if (powerbuf!=NULL)
				715	{
				716	OPENSSL_cleanse(powerbuf,powerbufLen);
				717	OPENSSL_free(powerbufFree);
				718	}
				719	if (am!=NULL) BN_clear(am);
				720	if (computeTemp!=NULL) BN_clear(computeTemp);
				721	BN_CTX_end(ctx);
				722	return(ret);
				723	}
				724
				725	int BN_mod_exp_mont_word(BIGNUM rr, BN_ULONG a, const BIGNUM p,
				726	const BIGNUM m, BN_CTX ctx, BN_MONT_CTX *in_mont)
				727	{
				728	BN_MONT_CTX *mont = NULL;
				729	int b, bits, ret=0;
				730	int r_is_one;
				731	BN_ULONG w, next_w;
				732	BIGNUM d, r, *t;
				733	BIGNUM *swap_tmp;
				734	#define BN_MOD_MUL_WORD(r, w, m) \
				735	(BN_mul_word(r, (w)) && \
				736	(/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \
				737	(BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
				738	/* BN_MOD_MUL_WORD is only used with 'w' large,
				739	* so the BN_ucmp test is probably more overhead
				740	* than always using BN_mod (which uses BN_copy if
				741	* a similar test returns true). */
				742	/* We can use BN_mod and do not need BN_nnmod because our
				743	* accumulator is never negative (the result of BN_mod does
				744	* not depend on the sign of the modulus).
				745	*/
				746	#define BN_TO_MONTGOMERY_WORD(r, w, mont) \
				747	(BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
				748
				749	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
				750	{
				751	/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
				752	BNerr(BN_F_BN_MOD_EXP_MONT_WORD,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
				753	return -1;
				754	}
				755
				756	bn_check_top(p);
				757	bn_check_top(m);
				758
				759	if (!BN_is_odd(m))
				760	{
				761	BNerr(BN_F_BN_MOD_EXP_MONT_WORD,BN_R_CALLED_WITH_EVEN_MODULUS);
				762	return(0);
				763	}
				764	if (m->top == 1)
				765	a %= m->d[0]; /* make sure that 'a' is reduced */
				766
				767	bits = BN_num_bits(p);
				768	if (bits == 0)
				769	{
				770	ret = BN_one(rr);
				771	return ret;
				772	}
				773	if (a == 0)
				774	{
				775	BN_zero(rr);
				776	ret = 1;
				777	return ret;
				778	}
				779
				780	BN_CTX_start(ctx);
				781	d = BN_CTX_get(ctx);
				782	r = BN_CTX_get(ctx);
				783	t = BN_CTX_get(ctx);
				784	if (d == NULL \|\| r == NULL \|\| t == NULL) goto err;
				785
				786	if (in_mont != NULL)
				787	mont=in_mont;
				788	else
				789	{
				790	if ((mont = BN_MONT_CTX_new()) == NULL) goto err;
				791	if (!BN_MONT_CTX_set(mont, m, ctx)) goto err;
				792	}
				793
				794	r_is_one = 1; /* except for Montgomery factor */
				795
				796	/* bits-1 >= 0 */
				797
				798	/* The result is accumulated in the product rw. /
				799	w = a; /* bit 'bits-1' of 'p' is always set */
				800	for (b = bits-2; b >= 0; b--)
				801	{
				802	/* First, square rw. /
				803	next_w = w*w;
				804	if ((next_w/w) != w) /* overflow */
				805	{
				806	if (r_is_one)
				807	{
				808	if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
				809	r_is_one = 0;
				810	}
				811	else
				812	{
				813	if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
				814	}
				815	next_w = 1;
				816	}
				817	w = next_w;
				818	if (!r_is_one)
				819	{
				820	if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) goto err;
				821	}
				822
				823	/* Second, multiply rw by 'a' if exponent bit is set. /
				824	if (BN_is_bit_set(p, b))
				825	{
				826	next_w = w*a;
				827	if ((next_w/a) != w) /* overflow */
				828	{
				829	if (r_is_one)
				830	{
				831	if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
				832	r_is_one = 0;
				833	}
				834	else
				835	{
				836	if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
				837	}
				838	next_w = a;
				839	}
				840	w = next_w;
				841	}
				842	}
				843
				844	/* Finally, set r:=rw. /
				845	if (w != 1)
				846	{
				847	if (r_is_one)
				848	{
				849	if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
				850	r_is_one = 0;
				851	}
				852	else
				853	{
				854	if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
				855	}
				856	}
				857
				858	if (r_is_one) /* can happen only if a == 1*/
				859	{
				860	if (!BN_one(rr)) goto err;
				861	}
				862	else
				863	{
				864	if (!BN_from_montgomery(rr, r, mont, ctx)) goto err;
				865	}
				866	ret = 1;
				867	err:
				868	if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
				869	BN_CTX_end(ctx);
				870	bn_check_top(rr);
				871	return(ret);
				872	}
				873
				874
				875	/* The old fallback, simple version :-) */
				876	int BN_mod_exp_simple(BIGNUM r, const BIGNUM a, const BIGNUM *p,
				877	const BIGNUM m, BN_CTX ctx)
				878	{
				879	int i,j,bits,ret=0,wstart,wend,window,wvalue;
				880	int start=1;
				881	BIGNUM *d;
				882	/* Table of variables obtained from 'ctx' */
				883	BIGNUM *val[TABLE_SIZE];
				884
				885	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
				886	{
				887	/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
				888	BNerr(BN_F_BN_MOD_EXP_SIMPLE,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
				889	return -1;
				890	}
				891
				892	bits=BN_num_bits(p);
				893
				894	if (bits == 0)
				895	{
				896	ret = BN_one(r);
				897	return ret;
				898	}
				899
				900	BN_CTX_start(ctx);
				901	d = BN_CTX_get(ctx);
				902	val[0] = BN_CTX_get(ctx);
				903	if(!d \|\| !val[0]) goto err;
				904
				905	if (!BN_nnmod(val[0],a,m,ctx)) goto err; /* 1 */
				906	if (BN_is_zero(val[0]))
				907	{
				908	BN_zero(r);
				909	ret = 1;
				910	goto err;
				911	}
				912
				913	window = BN_window_bits_for_exponent_size(bits);
				914	if (window > 1)
				915	{
				916	if (!BN_mod_mul(d,val[0],val[0],m,ctx))
				917	goto err; /* 2 */
				918	j=1<<(window-1);
				919	for (i=1; i<j; i++)
				920	{
				921	if(((val[i] = BN_CTX_get(ctx)) == NULL) \|\|
				922	!BN_mod_mul(val[i],val[i-1],d,m,ctx))
				923	goto err;
				924	}
				925	}
				926
				927	start=1; /* This is used to avoid multiplication etc
				928	* when there is only the value '1' in the
				929	* buffer. */
				930	wvalue=0; /* The 'value' of the window */
				931	wstart=bits-1; /* The top bit of the window */
				932	wend=0; /* The bottom bit of the window */
				933
				934	if (!BN_one(r)) goto err;
				935
				936	for (;;)
				937	{
				938	if (BN_is_bit_set(p,wstart) == 0)
				939	{
				940	if (!start)
				941	if (!BN_mod_mul(r,r,r,m,ctx))
				942	goto err;
				943	if (wstart == 0) break;
				944	wstart--;
				945	continue;
				946	}
				947	/* We now have wstart on a 'set' bit, we now need to work out
				948	* how bit a window to do. To do this we need to scan
				949	* forward until the last set bit before the end of the
				950	* window */
				951	j=wstart;
				952	wvalue=1;
				953	wend=0;
				954	for (i=1; i<window; i++)
				955	{
				956	if (wstart-i < 0) break;
				957	if (BN_is_bit_set(p,wstart-i))
				958	{
				959	wvalue<<=(i-wend);
				960	wvalue\|=1;
				961	wend=i;
				962	}
				963	}
				964
				965	/* wend is the size of the current window */
				966	j=wend+1;
				967	/* add the 'bytes above' */
				968	if (!start)
				969	for (i=0; i<j; i++)
				970	{
				971	if (!BN_mod_mul(r,r,r,m,ctx))
				972	goto err;
				973	}
				974
				975	/* wvalue will be an odd number < 2^window */
				976	if (!BN_mod_mul(r,r,val[wvalue>>1],m,ctx))
				977	goto err;
				978
				979	/* move the 'window' down further */
				980	wstart-=wend+1;
				981	wvalue=0;
				982	start=0;
				983	if (wstart < 0) break;
				984	}
				985	ret=1;
				986	err:
				987	BN_CTX_end(ctx);
				988	bn_check_top(r);
				989	return(ret);
				990	}
				991