blob: de80dca0d846e2990f4529b03a7e9b92577ce3bc [file] [log] [blame]
Adam Langleyfad63272015-11-12 12:15:39 -08001/* Copyright (c) 2014, Intel Corporation.
2 *
3 * Permission to use, copy, modify, and/or distribute this software for any
4 * purpose with or without fee is hereby granted, provided that the above
5 * copyright notice and this permission notice appear in all copies.
6 *
7 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10 * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14
15/* Developers and authors:
16 * Shay Gueron (1, 2), and Vlad Krasnov (1)
17 * (1) Intel Corporation, Israel Development Center
18 * (2) University of Haifa
19 * Reference:
20 * S.Gueron and V.Krasnov, "Fast Prime Field Elliptic Curve Cryptography with
21 * 256 Bit Primes" */
22
23#include <openssl/ec.h>
24
Adam Langley4139edb2016-01-13 15:00:54 -080025#include <assert.h>
Adam Langleyfad63272015-11-12 12:15:39 -080026#include <stdint.h>
27#include <string.h>
28
29#include <openssl/bn.h>
30#include <openssl/crypto.h>
31#include <openssl/err.h>
32
33#include "../bn/internal.h"
Robert Sloan8ff03552017-06-14 12:40:58 -070034#include "../delocate.h"
35#include "../../internal.h"
Steven Valdez909b19f2016-11-21 15:35:44 -050036#include "internal.h"
37#include "p256-x86_64.h"
Adam Langleyfad63272015-11-12 12:15:39 -080038
39
40#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \
41 !defined(OPENSSL_SMALL)
42
Adam Langleyfad63272015-11-12 12:15:39 -080043typedef P256_POINT_AFFINE PRECOMP256_ROW[64];
44
Adam Langleyfad63272015-11-12 12:15:39 -080045/* One converted into the Montgomery domain */
46static const BN_ULONG ONE[P256_LIMBS] = {
47 TOBN(0x00000000, 0x00000001), TOBN(0xffffffff, 0x00000000),
48 TOBN(0xffffffff, 0xffffffff), TOBN(0x00000000, 0xfffffffe),
49};
50
51/* Precomputed tables for the default generator */
52#include "p256-x86_64-table.h"
53
Steven Valdez909b19f2016-11-21 15:35:44 -050054/* Recode window to a signed digit, see util-64.c for details */
Adam Langleyfad63272015-11-12 12:15:39 -080055static unsigned booth_recode_w5(unsigned in) {
56 unsigned s, d;
57
58 s = ~((in >> 5) - 1);
59 d = (1 << 6) - in - 1;
60 d = (d & s) | (in & ~s);
61 d = (d >> 1) + (d & 1);
62
63 return (d << 1) + (s & 1);
64}
65
66static unsigned booth_recode_w7(unsigned in) {
67 unsigned s, d;
68
69 s = ~((in >> 7) - 1);
70 d = (1 << 8) - in - 1;
71 d = (d & s) | (in & ~s);
72 d = (d >> 1) + (d & 1);
73
74 return (d << 1) + (s & 1);
75}
76
Steven Valdez909b19f2016-11-21 15:35:44 -050077/* copy_conditional copies |src| to |dst| if |move| is one and leaves it as-is
78 * if |move| is zero.
79 *
80 * WARNING: this breaks the usual convention of constant-time functions
81 * returning masks. */
Adam Langleyfad63272015-11-12 12:15:39 -080082static void copy_conditional(BN_ULONG dst[P256_LIMBS],
83 const BN_ULONG src[P256_LIMBS], BN_ULONG move) {
84 BN_ULONG mask1 = ((BN_ULONG)0) - move;
85 BN_ULONG mask2 = ~mask1;
86
87 dst[0] = (src[0] & mask1) ^ (dst[0] & mask2);
88 dst[1] = (src[1] & mask1) ^ (dst[1] & mask2);
89 dst[2] = (src[2] & mask1) ^ (dst[2] & mask2);
90 dst[3] = (src[3] & mask1) ^ (dst[3] & mask2);
91 if (P256_LIMBS == 8) {
92 dst[4] = (src[4] & mask1) ^ (dst[4] & mask2);
93 dst[5] = (src[5] & mask1) ^ (dst[5] & mask2);
94 dst[6] = (src[6] & mask1) ^ (dst[6] & mask2);
95 dst[7] = (src[7] & mask1) ^ (dst[7] & mask2);
96 }
97}
98
Steven Valdez909b19f2016-11-21 15:35:44 -050099/* is_not_zero returns one iff in != 0 and zero otherwise.
100 *
101 * WARNING: this breaks the usual convention of constant-time functions
102 * returning masks.
103 *
104 * (define-fun is_not_zero ((in (_ BitVec 64))) (_ BitVec 64)
105 * (bvlshr (bvor in (bvsub #x0000000000000000 in)) #x000000000000003f)
106 * )
107 *
108 * (declare-fun x () (_ BitVec 64))
109 *
110 * (assert (and (= x #x0000000000000000) (= (is_not_zero x) #x0000000000000001)))
111 * (check-sat)
112 *
113 * (assert (and (not (= x #x0000000000000000)) (= (is_not_zero x) #x0000000000000000)))
114 * (check-sat)
115 * */
116static BN_ULONG is_not_zero(BN_ULONG in) {
117 in |= (0 - in);
118 in >>= BN_BITS2 - 1;
119 return in;
120}
Adam Langleyfad63272015-11-12 12:15:39 -0800121
Steven Valdez909b19f2016-11-21 15:35:44 -0500122/* ecp_nistz256_mod_inverse_mont sets |r| to (|in| * 2^-256)^-1 * 2^256 mod p.
123 * That is, |r| is the modular inverse of |in| for input and output in the
124 * Montgomery domain. */
125static void ecp_nistz256_mod_inverse_mont(BN_ULONG r[P256_LIMBS],
126 const BN_ULONG in[P256_LIMBS]) {
Adam Langleyfad63272015-11-12 12:15:39 -0800127 /* The poly is ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff
128 ffffffff
129 We use FLT and used poly-2 as exponent */
130 BN_ULONG p2[P256_LIMBS];
131 BN_ULONG p4[P256_LIMBS];
132 BN_ULONG p8[P256_LIMBS];
133 BN_ULONG p16[P256_LIMBS];
134 BN_ULONG p32[P256_LIMBS];
135 BN_ULONG res[P256_LIMBS];
136 int i;
137
138 ecp_nistz256_sqr_mont(res, in);
139 ecp_nistz256_mul_mont(p2, res, in); /* 3*p */
140
141 ecp_nistz256_sqr_mont(res, p2);
142 ecp_nistz256_sqr_mont(res, res);
143 ecp_nistz256_mul_mont(p4, res, p2); /* f*p */
144
145 ecp_nistz256_sqr_mont(res, p4);
146 ecp_nistz256_sqr_mont(res, res);
147 ecp_nistz256_sqr_mont(res, res);
148 ecp_nistz256_sqr_mont(res, res);
149 ecp_nistz256_mul_mont(p8, res, p4); /* ff*p */
150
151 ecp_nistz256_sqr_mont(res, p8);
152 for (i = 0; i < 7; i++) {
153 ecp_nistz256_sqr_mont(res, res);
154 }
155 ecp_nistz256_mul_mont(p16, res, p8); /* ffff*p */
156
157 ecp_nistz256_sqr_mont(res, p16);
158 for (i = 0; i < 15; i++) {
159 ecp_nistz256_sqr_mont(res, res);
160 }
161 ecp_nistz256_mul_mont(p32, res, p16); /* ffffffff*p */
162
163 ecp_nistz256_sqr_mont(res, p32);
164 for (i = 0; i < 31; i++) {
165 ecp_nistz256_sqr_mont(res, res);
166 }
167 ecp_nistz256_mul_mont(res, res, in);
168
169 for (i = 0; i < 32 * 4; i++) {
170 ecp_nistz256_sqr_mont(res, res);
171 }
172 ecp_nistz256_mul_mont(res, res, p32);
173
174 for (i = 0; i < 32; i++) {
175 ecp_nistz256_sqr_mont(res, res);
176 }
177 ecp_nistz256_mul_mont(res, res, p32);
178
179 for (i = 0; i < 16; i++) {
180 ecp_nistz256_sqr_mont(res, res);
181 }
182 ecp_nistz256_mul_mont(res, res, p16);
183
184 for (i = 0; i < 8; i++) {
185 ecp_nistz256_sqr_mont(res, res);
186 }
187 ecp_nistz256_mul_mont(res, res, p8);
188
189 ecp_nistz256_sqr_mont(res, res);
190 ecp_nistz256_sqr_mont(res, res);
191 ecp_nistz256_sqr_mont(res, res);
192 ecp_nistz256_sqr_mont(res, res);
193 ecp_nistz256_mul_mont(res, res, p4);
194
195 ecp_nistz256_sqr_mont(res, res);
196 ecp_nistz256_sqr_mont(res, res);
197 ecp_nistz256_mul_mont(res, res, p2);
198
199 ecp_nistz256_sqr_mont(res, res);
200 ecp_nistz256_sqr_mont(res, res);
David Benjamin4969cc92016-04-22 15:02:23 -0400201 ecp_nistz256_mul_mont(r, res, in);
Adam Langleyfad63272015-11-12 12:15:39 -0800202}
203
204/* ecp_nistz256_bignum_to_field_elem copies the contents of |in| to |out| and
205 * returns one if it fits. Otherwise it returns zero. */
206static int ecp_nistz256_bignum_to_field_elem(BN_ULONG out[P256_LIMBS],
207 const BIGNUM *in) {
208 if (in->top > P256_LIMBS) {
209 return 0;
210 }
211
Robert Sloan69939df2017-01-09 10:53:07 -0800212 OPENSSL_memset(out, 0, sizeof(BN_ULONG) * P256_LIMBS);
213 OPENSSL_memcpy(out, in->d, sizeof(BN_ULONG) * in->top);
Adam Langleyfad63272015-11-12 12:15:39 -0800214 return 1;
215}
216
Adam Langley4139edb2016-01-13 15:00:54 -0800217/* r = p * p_scalar */
218static int ecp_nistz256_windowed_mul(const EC_GROUP *group, P256_POINT *r,
219 const EC_POINT *p, const BIGNUM *p_scalar,
220 BN_CTX *ctx) {
221 assert(p != NULL);
222 assert(p_scalar != NULL);
223
Adam Langleyfad63272015-11-12 12:15:39 -0800224 static const unsigned kWindowSize = 5;
225 static const unsigned kMask = (1 << (5 /* kWindowSize */ + 1)) - 1;
226
Adam Langley4139edb2016-01-13 15:00:54 -0800227 /* A |P256_POINT| is (3 * 32) = 96 bytes, and the 64-byte alignment should
228 * add no more than 63 bytes of overhead. Thus, |table| should require
229 * ~1599 ((96 * 16) + 63) bytes of stack space. */
David Benjamin4969cc92016-04-22 15:02:23 -0400230 alignas(64) P256_POINT table[16];
Adam Langley4139edb2016-01-13 15:00:54 -0800231 uint8_t p_str[33];
Adam Langleyfad63272015-11-12 12:15:39 -0800232
Adam Langley4139edb2016-01-13 15:00:54 -0800233
234 int ret = 0;
235 BN_CTX *new_ctx = NULL;
236 int ctx_started = 0;
237
238 if (BN_num_bits(p_scalar) > 256 || BN_is_negative(p_scalar)) {
239 if (ctx == NULL) {
240 new_ctx = BN_CTX_new();
241 if (new_ctx == NULL) {
242 OPENSSL_PUT_ERROR(EC, ERR_R_MALLOC_FAILURE);
243 goto err;
244 }
245 ctx = new_ctx;
246 }
247 BN_CTX_start(ctx);
248 ctx_started = 1;
249 BIGNUM *mod = BN_CTX_get(ctx);
250 if (mod == NULL) {
251 OPENSSL_PUT_ERROR(EC, ERR_R_MALLOC_FAILURE);
252 goto err;
253 }
254 if (!BN_nnmod(mod, p_scalar, &group->order, ctx)) {
255 OPENSSL_PUT_ERROR(EC, ERR_R_BN_LIB);
256 goto err;
257 }
258 p_scalar = mod;
259 }
260
261 int j;
262 for (j = 0; j < p_scalar->top * BN_BYTES; j += BN_BYTES) {
263 BN_ULONG d = p_scalar->d[j / BN_BYTES];
264
265 p_str[j + 0] = d & 0xff;
266 p_str[j + 1] = (d >> 8) & 0xff;
267 p_str[j + 2] = (d >> 16) & 0xff;
268 p_str[j + 3] = (d >>= 24) & 0xff;
269 if (BN_BYTES == 8) {
270 d >>= 8;
271 p_str[j + 4] = d & 0xff;
272 p_str[j + 5] = (d >> 8) & 0xff;
273 p_str[j + 6] = (d >> 16) & 0xff;
274 p_str[j + 7] = (d >> 24) & 0xff;
275 }
276 }
277
278 for (; j < 33; j++) {
279 p_str[j] = 0;
280 }
281
282 /* table[0] is implicitly (0,0,0) (the point at infinity), therefore it is
283 * not stored. All other values are actually stored with an offset of -1 in
284 * table. */
285 P256_POINT *row = table;
286
287 if (!ecp_nistz256_bignum_to_field_elem(row[1 - 1].X, &p->X) ||
288 !ecp_nistz256_bignum_to_field_elem(row[1 - 1].Y, &p->Y) ||
289 !ecp_nistz256_bignum_to_field_elem(row[1 - 1].Z, &p->Z)) {
290 OPENSSL_PUT_ERROR(EC, EC_R_COORDINATES_OUT_OF_RANGE);
Adam Langleyfad63272015-11-12 12:15:39 -0800291 goto err;
292 }
293
Adam Langley4139edb2016-01-13 15:00:54 -0800294 ecp_nistz256_point_double(&row[2 - 1], &row[1 - 1]);
295 ecp_nistz256_point_add(&row[3 - 1], &row[2 - 1], &row[1 - 1]);
296 ecp_nistz256_point_double(&row[4 - 1], &row[2 - 1]);
297 ecp_nistz256_point_double(&row[6 - 1], &row[3 - 1]);
298 ecp_nistz256_point_double(&row[8 - 1], &row[4 - 1]);
299 ecp_nistz256_point_double(&row[12 - 1], &row[6 - 1]);
300 ecp_nistz256_point_add(&row[5 - 1], &row[4 - 1], &row[1 - 1]);
301 ecp_nistz256_point_add(&row[7 - 1], &row[6 - 1], &row[1 - 1]);
302 ecp_nistz256_point_add(&row[9 - 1], &row[8 - 1], &row[1 - 1]);
303 ecp_nistz256_point_add(&row[13 - 1], &row[12 - 1], &row[1 - 1]);
304 ecp_nistz256_point_double(&row[14 - 1], &row[7 - 1]);
305 ecp_nistz256_point_double(&row[10 - 1], &row[5 - 1]);
306 ecp_nistz256_point_add(&row[15 - 1], &row[14 - 1], &row[1 - 1]);
307 ecp_nistz256_point_add(&row[11 - 1], &row[10 - 1], &row[1 - 1]);
David Benjaminc895d6b2016-08-11 13:26:41 -0400308 ecp_nistz256_point_double(&row[16 - 1], &row[8 - 1]);
Adam Langleyfad63272015-11-12 12:15:39 -0800309
310 BN_ULONG tmp[P256_LIMBS];
David Benjamin4969cc92016-04-22 15:02:23 -0400311 alignas(32) P256_POINT h;
Adam Langleyfad63272015-11-12 12:15:39 -0800312 unsigned index = 255;
Adam Langley4139edb2016-01-13 15:00:54 -0800313 unsigned wvalue = p_str[(index - 1) / 8];
Adam Langleyfad63272015-11-12 12:15:39 -0800314 wvalue = (wvalue >> ((index - 1) % 8)) & kMask;
315
Adam Langley4139edb2016-01-13 15:00:54 -0800316 ecp_nistz256_select_w5(r, table, booth_recode_w5(wvalue) >> 1);
Adam Langleyfad63272015-11-12 12:15:39 -0800317
318 while (index >= 5) {
Adam Langley4139edb2016-01-13 15:00:54 -0800319 if (index != 255) {
Adam Langleyfad63272015-11-12 12:15:39 -0800320 unsigned off = (index - 1) / 8;
321
Adam Langley4139edb2016-01-13 15:00:54 -0800322 wvalue = p_str[off] | p_str[off + 1] << 8;
Adam Langleyfad63272015-11-12 12:15:39 -0800323 wvalue = (wvalue >> ((index - 1) % 8)) & kMask;
324
325 wvalue = booth_recode_w5(wvalue);
326
Adam Langley4139edb2016-01-13 15:00:54 -0800327 ecp_nistz256_select_w5(&h, table, wvalue >> 1);
Adam Langleyfad63272015-11-12 12:15:39 -0800328
329 ecp_nistz256_neg(tmp, h.Y);
330 copy_conditional(h.Y, tmp, (wvalue & 1));
331
332 ecp_nistz256_point_add(r, r, &h);
333 }
334
335 index -= kWindowSize;
336
337 ecp_nistz256_point_double(r, r);
338 ecp_nistz256_point_double(r, r);
339 ecp_nistz256_point_double(r, r);
340 ecp_nistz256_point_double(r, r);
341 ecp_nistz256_point_double(r, r);
342 }
343
344 /* Final window */
Adam Langley4139edb2016-01-13 15:00:54 -0800345 wvalue = p_str[0];
346 wvalue = (wvalue << 1) & kMask;
Adam Langleyfad63272015-11-12 12:15:39 -0800347
Adam Langley4139edb2016-01-13 15:00:54 -0800348 wvalue = booth_recode_w5(wvalue);
Adam Langleyfad63272015-11-12 12:15:39 -0800349
Adam Langley4139edb2016-01-13 15:00:54 -0800350 ecp_nistz256_select_w5(&h, table, wvalue >> 1);
Adam Langleyfad63272015-11-12 12:15:39 -0800351
Adam Langley4139edb2016-01-13 15:00:54 -0800352 ecp_nistz256_neg(tmp, h.Y);
353 copy_conditional(h.Y, tmp, wvalue & 1);
Adam Langleyfad63272015-11-12 12:15:39 -0800354
Adam Langley4139edb2016-01-13 15:00:54 -0800355 ecp_nistz256_point_add(r, r, &h);
356
357 ret = 1;
Adam Langleyfad63272015-11-12 12:15:39 -0800358
359err:
Adam Langley4139edb2016-01-13 15:00:54 -0800360 if (ctx_started) {
361 BN_CTX_end(ctx);
362 }
363 BN_CTX_free(new_ctx);
364 return ret;
Adam Langleyfad63272015-11-12 12:15:39 -0800365}
366
Adam Langleyfad63272015-11-12 12:15:39 -0800367static int ecp_nistz256_points_mul(
Adam Langley4139edb2016-01-13 15:00:54 -0800368 const EC_GROUP *group, EC_POINT *r, const BIGNUM *g_scalar,
369 const EC_POINT *p_, const BIGNUM *p_scalar, BN_CTX *ctx) {
370 assert((p_ != NULL) == (p_scalar != NULL));
371
Adam Langleyfad63272015-11-12 12:15:39 -0800372 static const unsigned kWindowSize = 7;
373 static const unsigned kMask = (1 << (7 /* kWindowSize */ + 1)) - 1;
374
David Benjamin4969cc92016-04-22 15:02:23 -0400375 alignas(32) union {
Adam Langleyfad63272015-11-12 12:15:39 -0800376 P256_POINT p;
377 P256_POINT_AFFINE a;
378 } t, p;
379
Adam Langley4139edb2016-01-13 15:00:54 -0800380 int ret = 0;
381 BN_CTX *new_ctx = NULL;
382 int ctx_started = 0;
Adam Langleyfad63272015-11-12 12:15:39 -0800383
Adam Langley4139edb2016-01-13 15:00:54 -0800384 if (g_scalar != NULL) {
385 if (BN_num_bits(g_scalar) > 256 || BN_is_negative(g_scalar)) {
386 if (ctx == NULL) {
387 new_ctx = BN_CTX_new();
388 if (new_ctx == NULL) {
389 goto err;
390 }
391 ctx = new_ctx;
392 }
393 BN_CTX_start(ctx);
394 ctx_started = 1;
395 BIGNUM *tmp_scalar = BN_CTX_get(ctx);
396 if (tmp_scalar == NULL) {
397 goto err;
398 }
399
400 if (!BN_nnmod(tmp_scalar, g_scalar, &group->order, ctx)) {
401 OPENSSL_PUT_ERROR(EC, ERR_R_BN_LIB);
402 goto err;
403 }
404 g_scalar = tmp_scalar;
Adam Langleyfad63272015-11-12 12:15:39 -0800405 }
406
Adam Langley4139edb2016-01-13 15:00:54 -0800407 uint8_t p_str[33] = {0};
408 int i;
409 for (i = 0; i < g_scalar->top * BN_BYTES; i += BN_BYTES) {
410 BN_ULONG d = g_scalar->d[i / BN_BYTES];
Adam Langleyfad63272015-11-12 12:15:39 -0800411
Adam Langley4139edb2016-01-13 15:00:54 -0800412 p_str[i + 0] = d & 0xff;
413 p_str[i + 1] = (d >> 8) & 0xff;
414 p_str[i + 2] = (d >> 16) & 0xff;
415 p_str[i + 3] = (d >>= 24) & 0xff;
416 if (BN_BYTES == 8) {
417 d >>= 8;
418 p_str[i + 4] = d & 0xff;
419 p_str[i + 5] = (d >> 8) & 0xff;
420 p_str[i + 6] = (d >> 16) & 0xff;
421 p_str[i + 7] = (d >> 24) & 0xff;
Adam Langleyfad63272015-11-12 12:15:39 -0800422 }
Adam Langley4139edb2016-01-13 15:00:54 -0800423 }
Adam Langleyfad63272015-11-12 12:15:39 -0800424
Adam Langley4139edb2016-01-13 15:00:54 -0800425 for (; i < (int) sizeof(p_str); i++) {
426 p_str[i] = 0;
427 }
Adam Langleyfad63272015-11-12 12:15:39 -0800428
Adam Langley4139edb2016-01-13 15:00:54 -0800429 /* First window */
430 unsigned wvalue = (p_str[0] << 1) & kMask;
431 unsigned index = kWindowSize;
Adam Langleyfad63272015-11-12 12:15:39 -0800432
Adam Langley4139edb2016-01-13 15:00:54 -0800433 wvalue = booth_recode_w7(wvalue);
Adam Langleyfad63272015-11-12 12:15:39 -0800434
Adam Langley4139edb2016-01-13 15:00:54 -0800435 const PRECOMP256_ROW *const precomputed_table =
436 (const PRECOMP256_ROW *)ecp_nistz256_precomputed;
437 ecp_nistz256_select_w7(&p.a, precomputed_table[0], wvalue >> 1);
438
439 ecp_nistz256_neg(p.p.Z, p.p.Y);
440 copy_conditional(p.p.Y, p.p.Z, wvalue & 1);
441
Steven Valdez909b19f2016-11-21 15:35:44 -0500442 /* Convert |p| from affine to Jacobian coordinates. We set Z to zero if |p|
443 * is infinity and |ONE| otherwise. |p| was computed from the table, so it
444 * is infinity iff |wvalue >> 1| is zero. */
Robert Sloan69939df2017-01-09 10:53:07 -0800445 OPENSSL_memset(p.p.Z, 0, sizeof(p.p.Z));
Steven Valdez909b19f2016-11-21 15:35:44 -0500446 copy_conditional(p.p.Z, ONE, is_not_zero(wvalue >> 1));
Adam Langley4139edb2016-01-13 15:00:54 -0800447
448 for (i = 1; i < 37; i++) {
449 unsigned off = (index - 1) / 8;
450 wvalue = p_str[off] | p_str[off + 1] << 8;
451 wvalue = (wvalue >> ((index - 1) % 8)) & kMask;
452 index += kWindowSize;
Adam Langleyfad63272015-11-12 12:15:39 -0800453
454 wvalue = booth_recode_w7(wvalue);
455
Adam Langley4139edb2016-01-13 15:00:54 -0800456 ecp_nistz256_select_w7(&t.a, precomputed_table[i], wvalue >> 1);
Adam Langleyfad63272015-11-12 12:15:39 -0800457
Adam Langley4139edb2016-01-13 15:00:54 -0800458 ecp_nistz256_neg(t.p.Z, t.a.Y);
459 copy_conditional(t.a.Y, t.p.Z, wvalue & 1);
Adam Langleyfad63272015-11-12 12:15:39 -0800460
Adam Langley4139edb2016-01-13 15:00:54 -0800461 ecp_nistz256_point_add_affine(&p.p, &p.p, &t.a);
Adam Langleyfad63272015-11-12 12:15:39 -0800462 }
Adam Langleyfad63272015-11-12 12:15:39 -0800463 }
464
Adam Langley4139edb2016-01-13 15:00:54 -0800465 const int p_is_infinity = g_scalar == NULL;
466 if (p_scalar != NULL) {
Adam Langleyfad63272015-11-12 12:15:39 -0800467 P256_POINT *out = &t.p;
468 if (p_is_infinity) {
469 out = &p.p;
470 }
471
Adam Langley4139edb2016-01-13 15:00:54 -0800472 if (!ecp_nistz256_windowed_mul(group, out, p_, p_scalar, ctx)) {
473 goto err;
474 }
Adam Langleyfad63272015-11-12 12:15:39 -0800475
476 if (!p_is_infinity) {
477 ecp_nistz256_point_add(&p.p, &p.p, out);
478 }
479 }
480
Adam Langley4139edb2016-01-13 15:00:54 -0800481 /* Not constant-time, but we're only operating on the public output. */
David Benjamin4969cc92016-04-22 15:02:23 -0400482 if (!bn_set_words(&r->X, p.p.X, P256_LIMBS) ||
483 !bn_set_words(&r->Y, p.p.Y, P256_LIMBS) ||
484 !bn_set_words(&r->Z, p.p.Z, P256_LIMBS)) {
485 return 0;
486 }
Adam Langleyfad63272015-11-12 12:15:39 -0800487
488 ret = 1;
489
490err:
Adam Langley4139edb2016-01-13 15:00:54 -0800491 if (ctx_started) {
492 BN_CTX_end(ctx);
493 }
494 BN_CTX_free(new_ctx);
Adam Langleyfad63272015-11-12 12:15:39 -0800495 return ret;
496}
497
498static int ecp_nistz256_get_affine(const EC_GROUP *group, const EC_POINT *point,
499 BIGNUM *x, BIGNUM *y, BN_CTX *ctx) {
500 BN_ULONG z_inv2[P256_LIMBS];
501 BN_ULONG z_inv3[P256_LIMBS];
Adam Langleyfad63272015-11-12 12:15:39 -0800502 BN_ULONG point_x[P256_LIMBS], point_y[P256_LIMBS], point_z[P256_LIMBS];
503
504 if (EC_POINT_is_at_infinity(group, point)) {
505 OPENSSL_PUT_ERROR(EC, EC_R_POINT_AT_INFINITY);
506 return 0;
507 }
508
509 if (!ecp_nistz256_bignum_to_field_elem(point_x, &point->X) ||
510 !ecp_nistz256_bignum_to_field_elem(point_y, &point->Y) ||
511 !ecp_nistz256_bignum_to_field_elem(point_z, &point->Z)) {
512 OPENSSL_PUT_ERROR(EC, EC_R_COORDINATES_OUT_OF_RANGE);
513 return 0;
514 }
515
Steven Valdez909b19f2016-11-21 15:35:44 -0500516 ecp_nistz256_mod_inverse_mont(z_inv3, point_z);
Adam Langleyfad63272015-11-12 12:15:39 -0800517 ecp_nistz256_sqr_mont(z_inv2, z_inv3);
David Benjamin4969cc92016-04-22 15:02:23 -0400518
Steven Valdezb0b45c62017-01-17 16:23:54 -0500519 /* Instead of using |ecp_nistz256_from_mont| to convert the |x| coordinate
520 * and then calling |ecp_nistz256_from_mont| again to convert the |y|
521 * coordinate below, convert the common factor |z_inv2| once now, saving one
522 * reduction. */
523 ecp_nistz256_from_mont(z_inv2, z_inv2);
Adam Langleyfad63272015-11-12 12:15:39 -0800524
525 if (x != NULL) {
David Benjaminc895d6b2016-08-11 13:26:41 -0400526 BN_ULONG x_aff[P256_LIMBS];
527 ecp_nistz256_mul_mont(x_aff, z_inv2, point_x);
David Benjaminc895d6b2016-08-11 13:26:41 -0400528 if (!bn_set_words(x, x_aff, P256_LIMBS)) {
Adam Langley4139edb2016-01-13 15:00:54 -0800529 OPENSSL_PUT_ERROR(EC, ERR_R_MALLOC_FAILURE);
530 return 0;
531 }
Adam Langleyfad63272015-11-12 12:15:39 -0800532 }
533
534 if (y != NULL) {
David Benjaminc895d6b2016-08-11 13:26:41 -0400535 BN_ULONG y_aff[P256_LIMBS];
Adam Langleyfad63272015-11-12 12:15:39 -0800536 ecp_nistz256_mul_mont(z_inv3, z_inv3, z_inv2);
David Benjaminc895d6b2016-08-11 13:26:41 -0400537 ecp_nistz256_mul_mont(y_aff, z_inv3, point_y);
David Benjaminc895d6b2016-08-11 13:26:41 -0400538 if (!bn_set_words(y, y_aff, P256_LIMBS)) {
Adam Langley4139edb2016-01-13 15:00:54 -0800539 OPENSSL_PUT_ERROR(EC, ERR_R_MALLOC_FAILURE);
540 return 0;
541 }
Adam Langleyfad63272015-11-12 12:15:39 -0800542 }
543
544 return 1;
545}
546
Robert Sloan8ff03552017-06-14 12:40:58 -0700547DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_nistz256_method) {
548 out->group_init = ec_GFp_mont_group_init;
549 out->group_finish = ec_GFp_mont_group_finish;
550 out->group_copy = ec_GFp_mont_group_copy;
551 out->group_set_curve = ec_GFp_mont_group_set_curve;
552 out->point_get_affine_coordinates = ecp_nistz256_get_affine;
553 out->mul = ecp_nistz256_points_mul;
554 out->field_mul = ec_GFp_mont_field_mul;
555 out->field_sqr = ec_GFp_mont_field_sqr;
556 out->field_encode = ec_GFp_mont_field_encode;
557 out->field_decode = ec_GFp_mont_field_decode;
David Benjaminf0c4a6c2016-08-11 13:26:41 -0400558};
Adam Langleyfad63272015-11-12 12:15:39 -0800559
560#endif /* !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \
561 !defined(OPENSSL_SMALL) */