blob: 10a2ca630f167efa4e0b6ee393dd35a8fdece575 [file] [log] [blame]
Pete Bentleya5c947b2019-08-09 14:24:27 +00001// Copyright (c) 2019, Cloudflare Inc.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15package sike
16
17import (
18 "math/bits"
19)
20
21// Compute z = x + y (mod 2*p).
22func fpAddRdc(z, x, y *Fp) {
23 var carry uint64
24
25 // z=x+y % p
26 for i := 0; i < FP_WORDS; i++ {
27 z[i], carry = bits.Add64(x[i], y[i], carry)
28 }
29
30 // z = z - pX2
31 carry = 0
32 for i := 0; i < FP_WORDS; i++ {
33 z[i], carry = bits.Sub64(z[i], pX2[i], carry)
34 }
35
36 // if z<0 add pX2 back
37 mask := uint64(0 - carry)
38 carry = 0
39 for i := 0; i < FP_WORDS; i++ {
40 z[i], carry = bits.Add64(z[i], pX2[i]&mask, carry)
41 }
42}
43
44// Compute z = x - y (mod 2*p).
45func fpSubRdc(z, x, y *Fp) {
46 var borrow uint64
47
48 // z = z - pX2
49 for i := 0; i < FP_WORDS; i++ {
50 z[i], borrow = bits.Sub64(x[i], y[i], borrow)
51 }
52
53 // if z<0 add pX2 back
54 mask := uint64(0 - borrow)
55 borrow = 0
56 for i := 0; i < FP_WORDS; i++ {
57 z[i], borrow = bits.Add64(z[i], pX2[i]&mask, borrow)
58 }
59}
60
61// Reduce a field element in [0, 2*p) to one in [0,p).
62func fpRdcP(x *Fp) {
63 var borrow, mask uint64
64 for i := 0; i < FP_WORDS; i++ {
65 x[i], borrow = bits.Sub64(x[i], p[i], borrow)
66 }
67
68 // Sets all bits if borrow = 1
69 mask = 0 - borrow
70 borrow = 0
71 for i := 0; i < FP_WORDS; i++ {
72 x[i], borrow = bits.Add64(x[i], p[i]&mask, borrow)
73 }
74}
75
76// Implementation doesn't actually depend on a prime field.
77func fpSwapCond(x, y *Fp, mask uint8) {
78 if mask != 0 {
79 var tmp Fp
80 copy(tmp[:], y[:])
81 copy(y[:], x[:])
82 copy(x[:], tmp[:])
83 }
84}
85
86// Compute z = x * y.
87func fpMul(z *FpX2, x, y *Fp) {
88 var carry, t, u, v uint64
89 var hi, lo uint64
90
91 for i := uint64(0); i < FP_WORDS; i++ {
92 for j := uint64(0); j <= i; j++ {
93 hi, lo = bits.Mul64(x[j], y[i-j])
94 v, carry = bits.Add64(lo, v, 0)
95 u, carry = bits.Add64(hi, u, carry)
96 t += carry
97 }
98 z[i] = v
99 v = u
100 u = t
101 t = 0
102 }
103
104 for i := FP_WORDS; i < (2*FP_WORDS)-1; i++ {
105 for j := i - FP_WORDS + 1; j < FP_WORDS; j++ {
106 hi, lo = bits.Mul64(x[j], y[i-j])
107 v, carry = bits.Add64(lo, v, 0)
108 u, carry = bits.Add64(hi, u, carry)
109 t += carry
110 }
111 z[i] = v
112 v = u
113 u = t
114 t = 0
115 }
116 z[2*FP_WORDS-1] = v
117}
118
119// Perform Montgomery reduction: set z = x R^{-1} (mod 2*p)
120// with R=2^512. Destroys the input value.
121func fpMontRdc(z *Fp, x *FpX2) {
122 var carry, t, u, v uint64
123 var hi, lo uint64
124 var count int
125
126 count = 3 // number of 0 digits in the least significat part of p + 1
127
128 for i := 0; i < FP_WORDS; i++ {
129 for j := 0; j < i; j++ {
130 if j < (i - count + 1) {
131 hi, lo = bits.Mul64(z[j], p1[i-j])
132 v, carry = bits.Add64(lo, v, 0)
133 u, carry = bits.Add64(hi, u, carry)
134 t += carry
135 }
136 }
137 v, carry = bits.Add64(v, x[i], 0)
138 u, carry = bits.Add64(u, 0, carry)
139 t += carry
140
141 z[i] = v
142 v = u
143 u = t
144 t = 0
145 }
146
147 for i := FP_WORDS; i < 2*FP_WORDS-1; i++ {
148 if count > 0 {
149 count--
150 }
151 for j := i - FP_WORDS + 1; j < FP_WORDS; j++ {
152 if j < (FP_WORDS - count) {
153 hi, lo = bits.Mul64(z[j], p1[i-j])
154 v, carry = bits.Add64(lo, v, 0)
155 u, carry = bits.Add64(hi, u, carry)
156 t += carry
157 }
158 }
159 v, carry = bits.Add64(v, x[i], 0)
160 u, carry = bits.Add64(u, 0, carry)
161
162 t += carry
163 z[i-FP_WORDS] = v
164 v = u
165 u = t
166 t = 0
167 }
168 v, carry = bits.Add64(v, x[2*FP_WORDS-1], 0)
169 z[FP_WORDS-1] = v
170}
171
172// Compute z = x + y, without reducing mod p.
173func fp2Add(z, x, y *FpX2) {
174 var carry uint64
175 for i := 0; i < 2*FP_WORDS; i++ {
176 z[i], carry = bits.Add64(x[i], y[i], carry)
177 }
178}
179
180// Compute z = x - y, without reducing mod p.
181func fp2Sub(z, x, y *FpX2) {
182 var borrow, mask uint64
183 for i := 0; i < 2*FP_WORDS; i++ {
184 z[i], borrow = bits.Sub64(x[i], y[i], borrow)
185 }
186
187 // Sets all bits if borrow = 1
188 mask = 0 - borrow
189 borrow = 0
190 for i := FP_WORDS; i < 2*FP_WORDS; i++ {
191 z[i], borrow = bits.Add64(z[i], p[i-FP_WORDS]&mask, borrow)
192 }
193}
194
195// Montgomery multiplication. Input values must be already
196// in Montgomery domain.
197func fpMulRdc(dest, lhs, rhs *Fp) {
198 a := lhs // = a*R
199 b := rhs // = b*R
200
201 var ab FpX2
202 fpMul(&ab, a, b) // = a*b*R*R
203 fpMontRdc(dest, &ab) // = a*b*R mod p
204}
205
206// Set dest = x^((p-3)/4). If x is square, this is 1/sqrt(x).
207// Uses variation of sliding-window algorithm from with window size
208// of 5 and least to most significant bit sliding (left-to-right)
209// See HAC 14.85 for general description.
210//
211// Allowed to overlap x with dest.
212// All values in Montgomery domains
213// Set dest = x^(2^k), for k >= 1, by repeated squarings.
214func p34(dest, x *Fp) {
215 var lookup [16]Fp
216
217 // This performs sum(powStrategy) + 1 squarings and len(lookup) + len(mulStrategy)
218 // multiplications.
219 powStrategy := []uint8{
220 0x03, 0x0A, 0x07, 0x05, 0x06, 0x05, 0x03, 0x08, 0x04, 0x07,
221 0x05, 0x06, 0x04, 0x05, 0x09, 0x06, 0x03, 0x0B, 0x05, 0x05,
222 0x02, 0x08, 0x04, 0x07, 0x07, 0x08, 0x05, 0x06, 0x04, 0x08,
223 0x05, 0x02, 0x0A, 0x06, 0x05, 0x04, 0x08, 0x05, 0x05, 0x05,
224 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
225 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
226 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
227 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x01}
228 mulStrategy := []uint8{
229 0x02, 0x0F, 0x09, 0x08, 0x0E, 0x0C, 0x02, 0x08, 0x05, 0x0F,
230 0x08, 0x0F, 0x06, 0x06, 0x03, 0x02, 0x00, 0x0A, 0x09, 0x0D,
231 0x01, 0x0C, 0x03, 0x07, 0x01, 0x0A, 0x08, 0x0B, 0x02, 0x0F,
232 0x0E, 0x01, 0x0B, 0x0C, 0x0E, 0x03, 0x0B, 0x0F, 0x0F, 0x0F,
233 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F,
234 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F,
235 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F,
236 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x00}
237 initialMul := uint8(8)
238
239 // Precompute lookup table of odd multiples of x for window
240 // size k=5.
241 var xx Fp
242 fpMulRdc(&xx, x, x)
243 lookup[0] = *x
244 for i := 1; i < 16; i++ {
245 fpMulRdc(&lookup[i], &lookup[i-1], &xx)
246 }
247
248 // Now lookup = {x, x^3, x^5, ... }
249 // so that lookup[i] = x^{2*i + 1}
250 // so that lookup[k/2] = x^k, for odd k
251 *dest = lookup[initialMul]
252 for i := uint8(0); i < uint8(len(powStrategy)); i++ {
253 fpMulRdc(dest, dest, dest)
254 for j := uint8(1); j < powStrategy[i]; j++ {
255 fpMulRdc(dest, dest, dest)
256 }
257 fpMulRdc(dest, dest, &lookup[mulStrategy[i]])
258 }
259}
260
261func add(dest, lhs, rhs *Fp2) {
262 fpAddRdc(&dest.A, &lhs.A, &rhs.A)
263 fpAddRdc(&dest.B, &lhs.B, &rhs.B)
264}
265
266func sub(dest, lhs, rhs *Fp2) {
267 fpSubRdc(&dest.A, &lhs.A, &rhs.A)
268 fpSubRdc(&dest.B, &lhs.B, &rhs.B)
269}
270
271func mul(dest, lhs, rhs *Fp2) {
272 // Let (a,b,c,d) = (lhs.a,lhs.b,rhs.a,rhs.b).
273 a := &lhs.A
274 b := &lhs.B
275 c := &rhs.A
276 d := &rhs.B
277
278 // We want to compute
279 //
280 // (a + bi)*(c + di) = (a*c - b*d) + (a*d + b*c)i
281 //
282 // Use Karatsuba's trick: note that
283 //
284 // (b - a)*(c - d) = (b*c + a*d) - a*c - b*d
285 //
286 // so (a*d + b*c) = (b-a)*(c-d) + a*c + b*d.
287
288 var ac, bd FpX2
289 fpMul(&ac, a, c) // = a*c*R*R
290 fpMul(&bd, b, d) // = b*d*R*R
291
292 var b_minus_a, c_minus_d Fp
293 fpSubRdc(&b_minus_a, b, a) // = (b-a)*R
294 fpSubRdc(&c_minus_d, c, d) // = (c-d)*R
295
296 var ad_plus_bc FpX2
297 fpMul(&ad_plus_bc, &b_minus_a, &c_minus_d) // = (b-a)*(c-d)*R*R
298 fp2Add(&ad_plus_bc, &ad_plus_bc, &ac) // = ((b-a)*(c-d) + a*c)*R*R
299 fp2Add(&ad_plus_bc, &ad_plus_bc, &bd) // = ((b-a)*(c-d) + a*c + b*d)*R*R
300
301 fpMontRdc(&dest.B, &ad_plus_bc) // = (a*d + b*c)*R mod p
302
303 var ac_minus_bd FpX2
304 fp2Sub(&ac_minus_bd, &ac, &bd) // = (a*c - b*d)*R*R
305 fpMontRdc(&dest.A, &ac_minus_bd) // = (a*c - b*d)*R mod p
306}
307
308func inv(dest, x *Fp2) {
309 var a2PlusB2 Fp
310 var asq, bsq FpX2
311 var ac FpX2
312 var minusB Fp
313 var minusBC FpX2
314
315 a := &x.A
316 b := &x.B
317
318 // We want to compute
319 //
320 // 1 1 (a - bi) (a - bi)
321 // -------- = -------- -------- = -----------
322 // (a + bi) (a + bi) (a - bi) (a^2 + b^2)
323 //
324 // Letting c = 1/(a^2 + b^2), this is
325 //
326 // 1/(a+bi) = a*c - b*ci.
327
328 fpMul(&asq, a, a) // = a*a*R*R
329 fpMul(&bsq, b, b) // = b*b*R*R
330 fp2Add(&asq, &asq, &bsq) // = (a^2 + b^2)*R*R
331 fpMontRdc(&a2PlusB2, &asq) // = (a^2 + b^2)*R mod p
332 // Now a2PlusB2 = a^2 + b^2
333
334 inv := a2PlusB2
335 fpMulRdc(&inv, &a2PlusB2, &a2PlusB2)
336 p34(&inv, &inv)
337 fpMulRdc(&inv, &inv, &inv)
338 fpMulRdc(&inv, &inv, &a2PlusB2)
339
340 fpMul(&ac, a, &inv)
341 fpMontRdc(&dest.A, &ac)
342
343 fpSubRdc(&minusB, &minusB, b)
344 fpMul(&minusBC, &minusB, &inv)
345 fpMontRdc(&dest.B, &minusBC)
346}
347
348func sqr(dest, x *Fp2) {
349 var a2, aPlusB, aMinusB Fp
350 var a2MinB2, ab2 FpX2
351
352 a := &x.A
353 b := &x.B
354
355 // (a + bi)*(a + bi) = (a^2 - b^2) + 2abi.
356 fpAddRdc(&a2, a, a) // = a*R + a*R = 2*a*R
357 fpAddRdc(&aPlusB, a, b) // = a*R + b*R = (a+b)*R
358 fpSubRdc(&aMinusB, a, b) // = a*R - b*R = (a-b)*R
359 fpMul(&a2MinB2, &aPlusB, &aMinusB) // = (a+b)*(a-b)*R*R = (a^2 - b^2)*R*R
360 fpMul(&ab2, &a2, b) // = 2*a*b*R*R
361 fpMontRdc(&dest.A, &a2MinB2) // = (a^2 - b^2)*R mod p
362 fpMontRdc(&dest.B, &ab2) // = 2*a*b*R mod p
363}
364
365// In case choice == 1, performs following swap in constant time:
366// xPx <-> xQx
367// xPz <-> xQz
368// Otherwise returns xPx, xPz, xQx, xQz unchanged
369func condSwap(xPx, xPz, xQx, xQz *Fp2, choice uint8) {
370 fpSwapCond(&xPx.A, &xQx.A, choice)
371 fpSwapCond(&xPx.B, &xQx.B, choice)
372 fpSwapCond(&xPz.A, &xQz.A, choice)
373 fpSwapCond(&xPz.B, &xQz.B, choice)
374}