| use crate::std_alloc::Vec; |
| use core::mem; |
| use core::ops::Shl; |
| use num_traits::{One, Zero}; |
| |
| use crate::big_digit::{self, BigDigit, DoubleBigDigit, SignedDoubleBigDigit}; |
| use crate::biguint::BigUint; |
| |
| struct MontyReducer { |
| n0inv: BigDigit, |
| } |
| |
| // k0 = -m**-1 mod 2**BITS. Algorithm from: Dumas, J.G. "On Newton–Raphson |
| // Iteration for Multiplicative Inverses Modulo Prime Powers". |
| fn inv_mod_alt(b: BigDigit) -> BigDigit { |
| assert_ne!(b & 1, 0); |
| |
| let mut k0 = 2 - b as SignedDoubleBigDigit; |
| let mut t = (b - 1) as SignedDoubleBigDigit; |
| let mut i = 1; |
| while i < big_digit::BITS { |
| t = t.wrapping_mul(t); |
| k0 = k0.wrapping_mul(t + 1); |
| |
| i <<= 1; |
| } |
| -k0 as BigDigit |
| } |
| |
| impl MontyReducer { |
| fn new(n: &BigUint) -> Self { |
| let n0inv = inv_mod_alt(n.data[0]); |
| MontyReducer { n0inv } |
| } |
| } |
| |
| /// Computes z mod m = x * y * 2 ** (-n*_W) mod m |
| /// assuming k = -1/m mod 2**_W |
| /// See Gueron, "Efficient Software Implementations of Modular Exponentiation". |
| /// https://eprint.iacr.org/2011/239.pdf |
| /// In the terminology of that paper, this is an "Almost Montgomery Multiplication": |
| /// x and y are required to satisfy 0 <= z < 2**(n*_W) and then the result |
| /// z is guaranteed to satisfy 0 <= z < 2**(n*_W), but it may not be < m. |
| #[allow(clippy::many_single_char_names)] |
| fn montgomery(x: &BigUint, y: &BigUint, m: &BigUint, k: BigDigit, n: usize) -> BigUint { |
| // This code assumes x, y, m are all the same length, n. |
| // (required by addMulVVW and the for loop). |
| // It also assumes that x, y are already reduced mod m, |
| // or else the result will not be properly reduced. |
| assert!( |
| x.data.len() == n && y.data.len() == n && m.data.len() == n, |
| "{:?} {:?} {:?} {}", |
| x, |
| y, |
| m, |
| n |
| ); |
| |
| let mut z = BigUint::zero(); |
| z.data.resize(n * 2, 0); |
| |
| let mut c: BigDigit = 0; |
| for i in 0..n { |
| let c2 = add_mul_vvw(&mut z.data[i..n + i], &x.data, y.data[i]); |
| let t = z.data[i].wrapping_mul(k); |
| let c3 = add_mul_vvw(&mut z.data[i..n + i], &m.data, t); |
| let cx = c.wrapping_add(c2); |
| let cy = cx.wrapping_add(c3); |
| z.data[n + i] = cy; |
| if cx < c2 || cy < c3 { |
| c = 1; |
| } else { |
| c = 0; |
| } |
| } |
| |
| if c == 0 { |
| z.data = z.data[n..].to_vec(); |
| } else { |
| { |
| let (mut first, second) = z.data.split_at_mut(n); |
| sub_vv(&mut first, &second, &m.data); |
| } |
| z.data = z.data[..n].to_vec(); |
| } |
| |
| z |
| } |
| |
| #[inline(always)] |
| fn add_mul_vvw(z: &mut [BigDigit], x: &[BigDigit], y: BigDigit) -> BigDigit { |
| let mut c = 0; |
| for (zi, xi) in z.iter_mut().zip(x.iter()) { |
| let (z1, z0) = mul_add_www(*xi, y, *zi); |
| let (c_, zi_) = add_ww(z0, c, 0); |
| *zi = zi_; |
| c = c_ + z1; |
| } |
| |
| c |
| } |
| |
| /// The resulting carry c is either 0 or 1. |
| #[inline(always)] |
| fn sub_vv(z: &mut [BigDigit], x: &[BigDigit], y: &[BigDigit]) -> BigDigit { |
| let mut c = 0; |
| for (i, (xi, yi)) in x.iter().zip(y.iter()).enumerate().take(z.len()) { |
| let zi = xi.wrapping_sub(*yi).wrapping_sub(c); |
| z[i] = zi; |
| // see "Hacker's Delight", section 2-12 (overflow detection) |
| c = ((yi & !xi) | ((yi | !xi) & zi)) >> (big_digit::BITS - 1) |
| } |
| |
| c |
| } |
| |
| /// z1<<_W + z0 = x+y+c, with c == 0 or 1 |
| #[inline(always)] |
| fn add_ww(x: BigDigit, y: BigDigit, c: BigDigit) -> (BigDigit, BigDigit) { |
| let yc = y.wrapping_add(c); |
| let z0 = x.wrapping_add(yc); |
| let z1 = if z0 < x || yc < y { 1 } else { 0 }; |
| |
| (z1, z0) |
| } |
| |
| /// z1 << _W + z0 = x * y + c |
| #[inline(always)] |
| fn mul_add_www(x: BigDigit, y: BigDigit, c: BigDigit) -> (BigDigit, BigDigit) { |
| let z = x as DoubleBigDigit * y as DoubleBigDigit + c as DoubleBigDigit; |
| ((z >> big_digit::BITS) as BigDigit, z as BigDigit) |
| } |
| |
| /// Calculates x ** y mod m using a fixed, 4-bit window. |
| #[allow(clippy::many_single_char_names)] |
| pub(super) fn monty_modpow(x: &BigUint, y: &BigUint, m: &BigUint) -> BigUint { |
| assert!(m.data[0] & 1 == 1); |
| let mr = MontyReducer::new(m); |
| let num_words = m.data.len(); |
| |
| let mut x = x.clone(); |
| |
| // We want the lengths of x and m to be equal. |
| // It is OK if x >= m as long as len(x) == len(m). |
| if x.data.len() > num_words { |
| x %= m; |
| // Note: now len(x) <= numWords, not guaranteed ==. |
| } |
| if x.data.len() < num_words { |
| x.data.resize(num_words, 0); |
| } |
| |
| // rr = 2**(2*_W*len(m)) mod m |
| let mut rr = BigUint::one(); |
| rr = (rr.shl(2 * num_words as u64 * u64::from(big_digit::BITS))) % m; |
| if rr.data.len() < num_words { |
| rr.data.resize(num_words, 0); |
| } |
| // one = 1, with equal length to that of m |
| let mut one = BigUint::one(); |
| one.data.resize(num_words, 0); |
| |
| let n = 4; |
| // powers[i] contains x^i |
| let mut powers = Vec::with_capacity(1 << n); |
| powers.push(montgomery(&one, &rr, m, mr.n0inv, num_words)); |
| powers.push(montgomery(&x, &rr, m, mr.n0inv, num_words)); |
| for i in 2..1 << n { |
| let r = montgomery(&powers[i - 1], &powers[1], m, mr.n0inv, num_words); |
| powers.push(r); |
| } |
| |
| // initialize z = 1 (Montgomery 1) |
| let mut z = powers[0].clone(); |
| z.data.resize(num_words, 0); |
| let mut zz = BigUint::zero(); |
| zz.data.resize(num_words, 0); |
| |
| // same windowed exponent, but with Montgomery multiplications |
| for i in (0..y.data.len()).rev() { |
| let mut yi = y.data[i]; |
| let mut j = 0; |
| while j < big_digit::BITS { |
| if i != y.data.len() - 1 || j != 0 { |
| zz = montgomery(&z, &z, m, mr.n0inv, num_words); |
| z = montgomery(&zz, &zz, m, mr.n0inv, num_words); |
| zz = montgomery(&z, &z, m, mr.n0inv, num_words); |
| z = montgomery(&zz, &zz, m, mr.n0inv, num_words); |
| } |
| zz = montgomery( |
| &z, |
| &powers[(yi >> (big_digit::BITS - n)) as usize], |
| m, |
| mr.n0inv, |
| num_words, |
| ); |
| mem::swap(&mut z, &mut zz); |
| yi <<= n; |
| j += n; |
| } |
| } |
| |
| // convert to regular number |
| zz = montgomery(&z, &one, m, mr.n0inv, num_words); |
| |
| zz.normalize(); |
| // One last reduction, just in case. |
| // See golang.org/issue/13907. |
| if zz >= *m { |
| // Common case is m has high bit set; in that case, |
| // since zz is the same length as m, there can be just |
| // one multiple of m to remove. Just subtract. |
| // We think that the subtract should be sufficient in general, |
| // so do that unconditionally, but double-check, |
| // in case our beliefs are wrong. |
| // The div is not expected to be reached. |
| zz -= m; |
| if zz >= *m { |
| zz %= m; |
| } |
| } |
| |
| zz.normalize(); |
| zz |
| } |