/mpz/powm.c
C | 428 lines | 322 code | 36 blank | 70 comment | 77 complexity | 05849d582c6814599b7013bea79bb4b4 MD5 | raw file
Possible License(s): GPL-3.0, LGPL-3.0, GPL-2.0, LGPL-2.1, BSD-3-Clause, LGPL-2.0
- /* mpz_powm(res,base,exp,mod) -- Set RES to (base**exp) mod MOD.
- Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005 Free Software
- Foundation, Inc. Contributed by Paul Zimmermann.
- This file is part of the GNU MP Library.
- The GNU MP Library is free software; you can redistribute it and/or modify
- it under the terms of the GNU Lesser General Public License as published by
- the Free Software Foundation; either version 2.1 of the License, or (at your
- option) any later version.
- The GNU MP Library is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
- License for more details.
- You should have received a copy of the GNU Lesser General Public License
- along with the GNU MP Library; see the file COPYING.LIB. If not, write to
- the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- MA 02110-1301, USA. */
- #include "mpir.h"
- #include "gmp-impl.h"
- #include "longlong.h"
- /* Compute t = a mod m, a is defined by (ap,an), m is defined by (mp,mn), and
- t is defined by (tp,mn). */
- static void
- reduce (mp_ptr tp, mp_srcptr ap, mp_size_t an, mp_srcptr mp, mp_size_t mn)
- {
- mp_ptr qp;
- TMP_DECL;
- TMP_MARK;
- qp = TMP_ALLOC_LIMBS (an - mn + 1);
- mpn_tdiv_qr (qp, tp, 0L, ap, an, mp, mn);
- TMP_FREE;
- }
- #if REDUCE_EXPONENT
- /* Return the group order of the ring mod m. */
- static mp_limb_t
- phi (mp_limb_t t)
- {
- mp_limb_t d, m, go;
- go = 1;
- if (t % 2 == 0)
- {
- t = t / 2;
- while (t % 2 == 0)
- {
- go *= 2;
- t = t / 2;
- }
- }
- for (d = 3;; d += 2)
- {
- m = d - 1;
- for (;;)
- {
- mpir_ui q = t / d;
- if (q < d)
- {
- if (t <= 1)
- return go;
- if (t == d)
- return go * m;
- return go * (t - 1);
- }
- if (t != q * d)
- break;
- go *= m;
- m = d;
- t = q;
- }
- }
- }
- #endif
- /* average number of calls to redc for an exponent of n bits
- with the sliding window algorithm of base 2^k: the optimal is
- obtained for the value of k which minimizes 2^(k-1)+n/(k+1):
- n\k 4 5 6 7 8
- 128 156* 159 171 200 261
- 256 309 307* 316 343 403
- 512 617 607* 610 632 688
- 1024 1231 1204 1195* 1207 1256
- 2048 2461 2399 2366 2360* 2396
- 4096 4918 4787 4707 4665* 4670
- */
- /* Use REDC instead of usual reduction for sizes < POWM_THRESHOLD. In REDC
- each modular multiplication costs about 2*n^2 limbs operations, whereas
- using usual reduction it costs 3*K(n), where K(n) is the cost of a
- multiplication using Karatsuba, and a division is assumed to cost 2*K(n),
- for example using Burnikel-Ziegler's algorithm. This gives a theoretical
- threshold of a*SQR_KARATSUBA_THRESHOLD, with a=(3/2)^(1/(2-ln(3)/ln(2))) ~
- 2.66. */
- /* For now, also disable REDC when MOD is even, as the inverse can't handle
- that. At some point, we might want to make the code faster for that case,
- perhaps using CRR. */
- #ifndef POWM_THRESHOLD
- #define POWM_THRESHOLD ((8 * SQR_KARATSUBA_THRESHOLD) / 3)
- #endif
- #define HANDLE_NEGATIVE_EXPONENT 1
- #undef REDUCE_EXPONENT
- void
- mpz_powm (mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m)
- {
- mp_ptr xp, tp, qp, gp, this_gp;
- mp_srcptr bp, ep, mp;
- mp_size_t bn, es, en, mn, xn;
- mp_limb_t invm, c;
- mpir_ui enb;
- mp_size_t i, K, j, l, k;
- int m_zero_cnt, e_zero_cnt;
- int sh;
- int use_redc;
- #if HANDLE_NEGATIVE_EXPONENT
- mpz_t new_b;
- #endif
- #if REDUCE_EXPONENT
- mpz_t new_e;
- #endif
- TMP_DECL;
- mp = PTR(m);
- mn = ABSIZ (m);
- if (mn == 0)
- DIVIDE_BY_ZERO;
- TMP_MARK;
- es = SIZ (e);
- if (es <= 0)
- {
- if (es == 0)
- {
- /* Exponent is zero, result is 1 mod m, i.e., 1 or 0 depending on if
- m equals 1. */
- SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1;
- PTR(r)[0] = 1;
- TMP_FREE; /* we haven't really allocated anything here */
- return;
- }
- #if HANDLE_NEGATIVE_EXPONENT
- MPZ_TMP_INIT (new_b, mn + 1);
- if (! mpz_invert (new_b, b, m))
- DIVIDE_BY_ZERO;
- b = new_b;
- es = -es;
- #else
- DIVIDE_BY_ZERO;
- #endif
- }
- en = es;
- #if REDUCE_EXPONENT
- /* Reduce exponent by dividing it by phi(m) when m small. */
- if (mn == 1 && mp[0] < 0x7fffffffL && en * GMP_NUMB_BITS > 150)
- {
- MPZ_TMP_INIT (new_e, 2);
- mpz_mod_ui (new_e, e, phi (mp[0]));
- e = new_e;
- }
- #endif
- use_redc = mn < POWM_THRESHOLD && mp[0] % 2 != 0;
- if (use_redc)
- {
- /* invm = -1/m mod 2^BITS_PER_MP_LIMB, must have m odd */
- modlimb_invert (invm, mp[0]);
- invm = -invm;
- }
- else
- {
- /* Normalize m (i.e. make its most significant bit set) as required by
- division functions below. */
- count_leading_zeros (m_zero_cnt, mp[mn - 1]);
- m_zero_cnt -= GMP_NAIL_BITS;
- if (m_zero_cnt != 0)
- {
- mp_ptr new_mp;
- new_mp = TMP_ALLOC_LIMBS (mn);
- mpn_lshift (new_mp, mp, mn, m_zero_cnt);
- mp = new_mp;
- }
- }
- /* Determine optimal value of k, the number of exponent bits we look at
- at a time. */
- count_leading_zeros (e_zero_cnt, PTR(e)[en - 1]);
- e_zero_cnt -= GMP_NAIL_BITS;
- enb = en * GMP_NUMB_BITS - e_zero_cnt; /* number of bits of exponent */
- k = 1;
- K = 2;
- while (2 * enb > K * (2 + k * (3 + k)))
- {
- k++;
- K *= 2;
- if (k == 10) /* cap allocation */
- break;
- }
- tp = TMP_ALLOC_LIMBS (2 * mn);
- qp = TMP_ALLOC_LIMBS (mn + 1);
- gp = __GMP_ALLOCATE_FUNC_LIMBS (K / 2 * mn);
- /* Compute x*R^n where R=2^BITS_PER_MP_LIMB. */
- bn = ABSIZ (b);
- bp = PTR(b);
- /* Handle |b| >= m by computing b mod m. FIXME: It is not strictly necessary
- for speed or correctness to do this when b and m have the same number of
- limbs, perhaps remove mpn_cmp call. */
- if (bn > mn || (bn == mn && mpn_cmp (bp, mp, mn) >= 0))
- {
- /* Reduce possibly huge base while moving it to gp[0]. Use a function
- call to reduce, since we don't want the quotient allocation to
- live until function return. */
- if (use_redc)
- {
- reduce (tp + mn, bp, bn, mp, mn); /* b mod m */
- MPN_ZERO (tp, mn);
- mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn); /* unnormnalized! */
- }
- else
- {
- reduce (gp, bp, bn, mp, mn);
- }
- }
- else
- {
- /* |b| < m. We pad out operands to become mn limbs, which simplifies
- the rest of the function, but slows things down when the |b| << m. */
- if (use_redc)
- {
- MPN_ZERO (tp, mn);
- MPN_COPY (tp + mn, bp, bn);
- MPN_ZERO (tp + mn + bn, mn - bn);
- mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn);
- }
- else
- {
- MPN_COPY (gp, bp, bn);
- MPN_ZERO (gp + bn, mn - bn);
- }
- }
- /* Compute xx^i for odd g < 2^i. */
- xp = TMP_ALLOC_LIMBS (mn);
- mpn_sqr (tp, gp, mn);
- if (use_redc)
- mpn_redc_1 (xp, tp, mp, mn, invm); /* xx = x^2*R^n */
- else
- mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
- this_gp = gp;
- for (i = 1; i < K / 2; i++)
- {
- mpn_mul_n (tp, this_gp, xp, mn);
- this_gp += mn;
- if (use_redc)
- mpn_redc_1 (this_gp,tp, mp, mn, invm); /* g[i] = x^(2i+1)*R^n */
- else
- mpn_tdiv_qr (qp, this_gp, 0L, tp, 2 * mn, mp, mn);
- }
- /* Start the real stuff. */
- ep = PTR (e);
- i = en - 1; /* current index */
- c = ep[i]; /* current limb */
- sh = GMP_NUMB_BITS - e_zero_cnt; /* significant bits in ep[i] */
- sh -= k; /* index of lower bit of ep[i] to take into account */
- if (sh < 0)
- { /* k-sh extra bits are needed */
- if (i > 0)
- {
- i--;
- c <<= (-sh);
- sh += GMP_NUMB_BITS;
- c |= ep[i] >> sh;
- }
- }
- else
- c >>= sh;
- for (j = 0; c % 2 == 0; j++)
- c >>= 1;
- MPN_COPY (xp, gp + mn * (c >> 1), mn);
- while (--j >= 0)
- {
- mpn_sqr (tp, xp, mn);
- if (use_redc)
- mpn_redc_1 (xp,tp, mp, mn, invm);
- else
- mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
- }
- while (i > 0 || sh > 0)
- {
- c = ep[i];
- l = k; /* number of bits treated */
- sh -= l;
- if (sh < 0)
- {
- if (i > 0)
- {
- i--;
- c <<= (-sh);
- sh += GMP_NUMB_BITS;
- c |= ep[i] >> sh;
- }
- else
- {
- l += sh; /* last chunk of bits from e; l < k */
- }
- }
- else
- c >>= sh;
- c &= ((mp_limb_t) 1 << l) - 1;
- /* This while loop implements the sliding window improvement--loop while
- the most significant bit of c is zero, squaring xx as we go. */
- while ((c >> (l - 1)) == 0 && (i > 0 || sh > 0))
- {
- mpn_sqr (tp, xp, mn);
- if (use_redc)
- mpn_redc_1 (xp, tp,mp, mn, invm);
- else
- mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
- if (sh != 0)
- {
- sh--;
- c = (c << 1) + ((ep[i] >> sh) & 1);
- }
- else
- {
- i--;
- sh = GMP_NUMB_BITS - 1;
- c = (c << 1) + (ep[i] >> sh);
- }
- }
- /* Replace xx by xx^(2^l)*x^c. */
- if (c != 0)
- {
- for (j = 0; c % 2 == 0; j++)
- c >>= 1;
- /* c0 = c * 2^j, i.e. xx^(2^l)*x^c = (A^(2^(l - j))*c)^(2^j) */
- l -= j;
- while (--l >= 0)
- {
- mpn_sqr (tp, xp, mn);
- if (use_redc)
- mpn_redc_1 (xp, tp,mp, mn, invm);
- else
- mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
- }
- mpn_mul_n (tp, xp, gp + mn * (c >> 1), mn);
- if (use_redc)
- mpn_redc_1 (xp,tp, mp, mn, invm);
- else
- mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
- }
- else
- j = l; /* case c=0 */
- while (--j >= 0)
- {
- mpn_sqr (tp, xp, mn);
- if (use_redc)
- mpn_redc_1 (xp, tp, mp, mn, invm);
- else
- mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
- }
- }
- if (use_redc)
- {
- /* Convert back xx to xx/R^n. */
- MPN_COPY (tp, xp, mn);
- MPN_ZERO (tp + mn, mn);
- mpn_redc_1 (xp,tp, mp, mn, invm);
- if (mpn_cmp (xp, mp, mn) >= 0)
- mpn_sub_n (xp, xp, mp, mn);
- }
- else
- {
- if (m_zero_cnt != 0)
- {
- mp_limb_t cy;
- cy = mpn_lshift (tp, xp, mn, m_zero_cnt);
- tp[mn] = cy;
- mpn_tdiv_qr (qp, xp, 0L, tp, mn + (cy != 0), mp, mn);
- mpn_rshift (xp, xp, mn, m_zero_cnt);
- }
- }
- xn = mn;
- MPN_NORMALIZE (xp, xn);
- if ((ep[0] & 1) && SIZ(b) < 0 && xn != 0)
- {
- mp = PTR(m); /* want original, unnormalized m */
- mpn_sub (xp, mp, mn, xp, xn);
- xn = mn;
- MPN_NORMALIZE (xp, xn);
- }
- MPZ_REALLOC (r, xn);
- SIZ (r) = xn;
- MPN_COPY (PTR(r), xp, xn);
- __GMP_FREE_FUNC_LIMBS (gp, K / 2 * mn);
- TMP_FREE;
- }