/ecm-6.4.2/median.c

# · C · 767 lines · 518 code · 117 blank · 132 comment · 79 complexity · 8dc7b9ad0c45b95a695ca702376a62e8 MD5 · raw file

  1. /* Median/middle product.
  2. Copyright 2003, 2004, 2005, 2006, 2007, 2008 Laurent Fousse, Paul Zimmermann,
  3. Alexander Kruppa, Dave Newman.
  4. This file is part of the ECM Library.
  5. The ECM Library is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU Lesser General Public License as published by
  7. the Free Software Foundation; either version 3 of the License, or (at your
  8. option) any later version.
  9. The ECM Library is distributed in the hope that it will be useful, but
  10. WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  11. or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
  12. License for more details.
  13. You should have received a copy of the GNU Lesser General Public License
  14. along with the ECM Library; see the file COPYING.LIB. If not, see
  15. http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
  16. 51 Franklin St, Suite 500, Boston, MA 02110-1301, USA. */
  17. /* Reference:
  18. [1] Tellegen's Principle into Practice, by A. Bostan, G. Lecerf and E. Schost,
  19. Proc. of ISSAC'03, Philadelphia, 2003.
  20. */
  21. #include <stdio.h>
  22. #include "ecm-impl.h"
  23. #ifndef MAX
  24. #define MAX(a,b) (((a) > (b)) ? (a) : (b))
  25. #endif
  26. #ifndef MIN
  27. #define MIN(a,b) (((a) < (b)) ? (a) : (b))
  28. #endif
  29. extern unsigned int Fermat;
  30. static void list_add_wrapper (listz_t, listz_t, listz_t, unsigned int,
  31. unsigned int);
  32. static void list_sub_wrapper (listz_t, listz_t, listz_t, unsigned int,
  33. unsigned int);
  34. static unsigned int TKarMul (listz_t, unsigned int, listz_t, unsigned int,
  35. listz_t, unsigned int, listz_t);
  36. static void list_sub_safe (listz_t, listz_t, listz_t, unsigned int,
  37. unsigned int, unsigned int);
  38. static void list_add_safe (listz_t, listz_t, listz_t, unsigned int,
  39. unsigned int, unsigned int);
  40. static unsigned int TToomCookMul (listz_t, unsigned int, listz_t, unsigned int,
  41. listz_t, unsigned int, listz_t);
  42. static unsigned int TToomCookMul_space (unsigned int, unsigned int,
  43. unsigned int);
  44. static void
  45. list_add_wrapper (listz_t p, listz_t q, listz_t r, unsigned int n,
  46. unsigned int max_r)
  47. {
  48. list_add (p, q, r, MIN (n, max_r));
  49. if (n > max_r)
  50. list_set (p + max_r, q + max_r, n - max_r);
  51. }
  52. static void
  53. list_sub_wrapper (listz_t p, listz_t q, listz_t r, unsigned int n,
  54. unsigned int max_r)
  55. {
  56. list_sub (p, q, r, MIN (n, max_r));
  57. if (n > max_r)
  58. list_set (p + max_r, q + max_r, n - max_r);
  59. }
  60. /* Given a[0..m] and c[0..l], puts in b[0..n] the coefficients
  61. of degree m to n+m of rev(a)*c, i.e.
  62. b[0] = a[0]*c[0] + ... + a[i]*c[i] with i = min(m, l)
  63. ...
  64. b[k] = a[0]*c[k] + ... + a[i]*c[i+k] with i = min(m, l-k)
  65. ...
  66. b[n] = a[0]*c[n] + ... + a[i]*c[i+n] with i = min(m, l-n) [=l-n].
  67. Using auxiliary memory in t.
  68. Implements algorithm TKarMul of [1].
  69. Assumes deg(c) = l <= m+n.
  70. */
  71. static unsigned int
  72. TKarMul (listz_t b, unsigned int n,
  73. listz_t a, unsigned int m, listz_t c, unsigned int l, listz_t t)
  74. {
  75. unsigned int k, mu, nu, h;
  76. unsigned int s1;
  77. unsigned tot_muls = 0;
  78. #ifdef DEBUG
  79. fprintf (ECM_STDOUT, "Enter TKarMul.\nm = %d\nn = %d\nl = %d\n", m, n, l);
  80. fprintf (ECM_STDOUT, "a = ");
  81. print_list (a, m + 1);
  82. fprintf (ECM_STDOUT, "\nc = ");
  83. print_list (c, l + 1);
  84. fprintf (ECM_STDOUT, "\n");
  85. #endif
  86. if (n == 0)
  87. {
  88. #ifdef DEBUG
  89. fprintf (ECM_STDOUT, "Case n = 0.\n");
  90. #endif
  91. mpz_mul (b[0], a[0], c[0]);
  92. for (k = 1; (k <= m) && (k <= l); k++)
  93. mpz_addmul (b[0], a[k], c[k]);
  94. #ifdef DEBUG
  95. fprintf (ECM_STDOUT, "Exit TKarMul.\n");
  96. #endif
  97. return MIN (m, l) + 1;
  98. }
  99. if (m == 0)
  100. {
  101. #ifdef DEBUG
  102. fprintf (ECM_STDOUT, "Case m = 0.\n");
  103. #endif
  104. for (k = 0; (k <= l) && (k <= n); k++)
  105. mpz_mul (b[k], a[0], c[k]);
  106. for (k = l + 1; k <= n; k++)
  107. mpz_set_ui (b[k], 0);
  108. #ifdef DEBUG
  109. fprintf (ECM_STDOUT, "Exit TKarMul.\n");
  110. #endif
  111. return MIN (n, l) + 1;
  112. }
  113. mu = (m / 2) + 1; /* 1 <= mu <= m */
  114. nu = (n / 2) + 1; /* 1 <= nu <= n */
  115. h = MAX (mu, nu); /* h >= 1 */
  116. #ifdef DEBUG
  117. fprintf (ECM_STDOUT, "mu = %d\nnu = %d\nh = %d\n", mu, nu, h);
  118. #endif
  119. if (mu > n)
  120. {
  121. #ifdef DEBUG
  122. fprintf (ECM_STDOUT, "Case mu > n.\n");
  123. #endif
  124. tot_muls += TKarMul (b, n, a, mu - 1, c, l, t);
  125. if (l >= mu)
  126. {
  127. /* we have to check l-mu <= n + (m-mu), i.e. l <= n+m */
  128. tot_muls += TKarMul (t, n, a + mu, m - mu, c + mu, l - mu, t + n + 1);
  129. list_add (b, b, t, n + 1);
  130. }
  131. #ifdef DEBUG
  132. fprintf (ECM_STDOUT, "Exit TKarMul.\n");
  133. #endif
  134. return tot_muls;
  135. }
  136. if (nu > m)
  137. {
  138. #ifdef DEBUG
  139. fprintf (ECM_STDOUT, "Case nu > m.\n");
  140. #endif
  141. /* we have to check MIN(l,m+nu-1) <= nu-1+m: trivial */
  142. tot_muls += TKarMul (b, nu - 1, a, m, c, MIN (l, m + nu - 1), t);
  143. /* Description broken in reference. Should be a list
  144. * concatenation, not an addition.
  145. * Fixed now.
  146. */
  147. if (l >= nu)
  148. {
  149. /* we have to check l-nu <= n-nu+m, i.e. l <= n+m: trivial */
  150. tot_muls += TKarMul (b + nu, n - nu, a, m, c + nu, l - nu, t);
  151. }
  152. else
  153. list_zero (b + nu, n - nu + 1);
  154. #ifdef DEBUG
  155. fprintf (ECM_STDOUT, "Exit TKarMul.\n");
  156. #endif
  157. return tot_muls;
  158. }
  159. /* We want nu = mu */
  160. mu = nu = h;
  161. #ifdef DEBUG
  162. fprintf (ECM_STDOUT, "Base Case.\n");
  163. #endif
  164. s1 = MIN (l + 1, n + mu);
  165. if (l + 1 > nu)
  166. list_sub_wrapper (t, c, c + nu, s1, l - nu + 1);
  167. else
  168. list_set (t, c, s1);
  169. #ifdef DEBUG
  170. fprintf (ECM_STDOUT, "DEBUG c - c[nu].\n");
  171. print_list (t, s1);
  172. fprintf (ECM_STDOUT, "We compute (1) - (3)\n");
  173. #endif
  174. tot_muls += TKarMul (b, nu - 1, a, mu - 1, t, s1 - 1, t + s1);
  175. /* (1) - (3) */
  176. #ifdef DEBUG
  177. print_list (b, nu);
  178. fprintf (ECM_STDOUT, "We compute (2) - (4)\n");
  179. #endif
  180. if (s1 >= nu + 1) { /* nu - 1 */
  181. tot_muls += TKarMul (b + nu, n - nu, a + mu, m - mu,
  182. t + nu, s1 - nu - 1, t + s1);
  183. /* (2) - (4) */
  184. }
  185. else {
  186. list_zero (b + nu, n - nu + 1);
  187. }
  188. #ifdef DEBUG
  189. print_list (b + nu, n - nu + 1);
  190. #endif
  191. list_add_wrapper (t, a, a + mu, mu, m + 1 - mu);
  192. #ifdef DEBUG
  193. fprintf (ECM_STDOUT, "We compute (2) + (3)\n");
  194. #endif
  195. if (l >= nu) {
  196. tot_muls += TKarMul (t + mu, nu - 1, t, mu - 1, c + nu, l - nu,
  197. t + mu + nu);
  198. }
  199. else
  200. list_zero (t + mu, nu);
  201. /* (2) + (3) */
  202. #ifdef DEBUG
  203. print_list (t + mu, nu);
  204. #endif
  205. list_add (b, b, t + mu, nu);
  206. list_sub (b + nu, t + mu, b + nu, n - nu + 1);
  207. return tot_muls;
  208. }
  209. /* Computes the space needed for TKarMul of b[0..n],
  210. * a[0..m] and c[0..l]
  211. */
  212. static unsigned int
  213. TKarMul_space (unsigned int n, unsigned int m, unsigned int l)
  214. {
  215. unsigned int mu, nu, h;
  216. unsigned int s1;
  217. unsigned int r1, r2;
  218. if (n == 0)
  219. return 0;
  220. if (m == 0)
  221. return 0;
  222. mu = (m / 2) + 1;
  223. nu = (n / 2) + 1;
  224. h = MAX (mu, nu);
  225. if (mu > n)
  226. {
  227. r1 = TKarMul_space (n, mu - 1, l);
  228. if (l >= mu)
  229. {
  230. r2 = TKarMul_space (n, m - mu, l - mu) + n + 1;
  231. r1 = MAX (r1, r2);
  232. }
  233. return r1;
  234. }
  235. if (nu > m)
  236. {
  237. r1 = TKarMul_space (nu - 1, m, MIN (l, m + nu - 1));
  238. if (l >= nu)
  239. {
  240. r2 = TKarMul_space (n - nu, m,l - nu);
  241. r1 = MAX (r1, r2);
  242. }
  243. return r1;
  244. }
  245. mu = nu = h;
  246. s1 = MIN (l + 1, n + mu);
  247. r1 = TKarMul_space (nu - 1, mu - 1, s1 - 1) + s1;
  248. if (s1 >= nu + 1) {
  249. r2 = TKarMul_space (n - nu, m - mu, s1 - nu - 1) + s1;
  250. r1 = MAX (r1, r2);
  251. }
  252. if (l >= nu) {
  253. r2 = TKarMul_space (nu - 1, mu - 1, l - nu) + mu + nu;
  254. r1 = MAX (r1, r2);
  255. }
  256. return r1;
  257. }
  258. /* list_sub with bound checking
  259. */
  260. static void
  261. list_sub_safe (listz_t ret, listz_t a, listz_t b,
  262. unsigned int sizea, unsigned int sizeb,
  263. unsigned int needed)
  264. {
  265. unsigned int i;
  266. unsigned int safe;
  267. safe = MIN(sizea, sizeb);
  268. safe = MIN(safe, needed);
  269. list_sub (ret, a, b, safe);
  270. i = safe;
  271. while (i < needed)
  272. {
  273. if (i < sizea)
  274. {
  275. if (i < sizeb)
  276. mpz_sub (ret[i], a[i], b[i]);
  277. else
  278. mpz_set (ret[i], a[i]);
  279. }
  280. else
  281. {
  282. if (i < sizeb)
  283. mpz_neg (ret[i], b[i]);
  284. else
  285. mpz_set_ui (ret[i], 0);
  286. }
  287. i++;
  288. }
  289. }
  290. /* list_add with bound checking
  291. */
  292. static void
  293. list_add_safe (listz_t ret, listz_t a, listz_t b,
  294. unsigned int sizea, unsigned int sizeb,
  295. unsigned int needed)
  296. {
  297. unsigned int i;
  298. unsigned int safe;
  299. safe = MIN(sizea, sizeb);
  300. safe = MIN(safe, needed);
  301. list_add (ret, a, b, i = safe);
  302. while (i < needed)
  303. {
  304. if (i < sizea)
  305. {
  306. if (i < sizeb)
  307. mpz_add (ret[i], a[i], b[i]);
  308. else
  309. mpz_set (ret[i], a[i]);
  310. }
  311. else
  312. {
  313. if (i < sizeb)
  314. mpz_set (ret[i], b[i]);
  315. else
  316. mpz_set_ui (ret[i], 0);
  317. }
  318. i++;
  319. }
  320. }
  321. static unsigned int
  322. TToomCookMul (listz_t b, unsigned int n,
  323. listz_t a, unsigned int m, listz_t c, unsigned int l,
  324. listz_t tmp)
  325. {
  326. unsigned int nu, mu, h;
  327. unsigned int i;
  328. unsigned int btmp;
  329. unsigned int tot_muls = 0;
  330. nu = n / 3 + 1;
  331. mu = m / 3 + 1;
  332. /* ensures n + 1 > 2 * nu */
  333. if ((n < 2 * nu) || (m < 2 * mu))
  334. {
  335. #ifdef DEBUG
  336. fprintf (ECM_STDOUT, "Too small operands, calling TKara.\n");
  337. #endif
  338. return TKarMul (b, n, a, m, c, l, tmp);
  339. }
  340. /* First strip unnecessary trailing coefficients of c:
  341. */
  342. l = MIN(l, n + m);
  343. /* Now the degenerate cases. We want 2 * nu <= m.
  344. *
  345. */
  346. if (m < 2 * nu)
  347. {
  348. #ifdef DEBUG
  349. fprintf (ECM_STDOUT, "Degenerate Case 1.\n");
  350. #endif
  351. tot_muls += TToomCookMul (b, nu - 1, a, m, c, l, tmp);
  352. if (l >= nu)
  353. tot_muls += TToomCookMul (b + nu, nu - 1, a, m,
  354. c + nu, l - nu, tmp);
  355. else
  356. list_zero (b + nu, nu);
  357. if (l >= 2 * nu) /* n >= 2 * nu is assured. Hopefully */
  358. tot_muls += TToomCookMul (b + 2 * nu, n - 2 * nu, a, m,
  359. c + 2 * nu, l - 2 * nu, tmp);
  360. else
  361. list_zero (b + 2 * nu, n - 2 * nu + 1);
  362. return tot_muls;
  363. }
  364. /* Second degenerate case. We want 2 * mu <= n.
  365. */
  366. if (n < 2 * mu)
  367. {
  368. #ifdef DEBUG
  369. fprintf (ECM_STDOUT, "Degenerate Case 2.\n");
  370. #endif
  371. tot_muls += TToomCookMul (b, n, a, mu - 1, c, l, tmp);
  372. if (l >= mu)
  373. {
  374. tot_muls += TToomCookMul (tmp, n, a + mu, mu - 1,
  375. c + mu, l - mu, tmp + n + 1);
  376. list_add (b, b, tmp, n + 1);
  377. }
  378. if (l >= 2 * mu)
  379. {
  380. tot_muls += TToomCookMul (tmp, n, a + 2 * mu, m - 2 * mu,
  381. c + 2 * mu, l - 2 * mu, tmp + n + 1);
  382. list_add (b, b, tmp, n + 1);
  383. }
  384. return tot_muls;
  385. }
  386. #ifdef DEBUG
  387. fprintf (ECM_STDOUT, "Base Case.\n");
  388. fprintf (ECM_STDOUT, "a = ");
  389. print_list (a, m + 1);
  390. fprintf (ECM_STDOUT, "\nc = ");
  391. print_list (c, l + 1);
  392. #endif
  393. h = MAX(nu, mu);
  394. nu = mu = h;
  395. list_sub_safe (tmp, c + 3 * h, c + h,
  396. (l + 1 > 3 * h ? l + 1 - 3 * h : 0),
  397. (l + 1 > h ? l + 1 - h : 0), 2 * h - 1);
  398. list_sub_safe (tmp + 2 * h - 1, c, c + 2 * h,
  399. l + 1, (l + 1 > 2 * h ? l + 1 - 2 * h : 0),
  400. 2 * h - 1);
  401. for (i = 0; i < 2 * h - 1; i++)
  402. mpz_mul_2exp (tmp[2 * h - 1 + i], tmp[2 * h - 1 + i], 1);
  403. #ifdef DEBUG
  404. print_list (tmp, 4 * h - 2);
  405. #endif
  406. /* --------------------------------
  407. * | 0 .. 2*h-2 | 2*h-1 .. 4*h-3 |
  408. * --------------------------------
  409. * | c3 - c1 | 2(c0 - c2) |
  410. * --------------------------------
  411. */
  412. list_add (tmp + 2 * h - 1, tmp + 2 * h - 1, tmp, 2 * h - 1);
  413. tot_muls += TToomCookMul (b, h - 1, a, h - 1, tmp + 2 * h - 1,
  414. 2 * h - 2, tmp + 4 * h - 2);
  415. /* b[0 .. h - 1] = 2 * m0 */
  416. #ifdef DEBUG
  417. fprintf (ECM_STDOUT, "2 * m0 = ");
  418. print_list (b, h);
  419. #endif
  420. list_add (tmp + 2 * h - 1, a, a + h, h);
  421. list_add (tmp + 2 * h - 1, tmp + 2 * h - 1, a + 2 * h,
  422. MIN(h, m + 1 - 2 * h));
  423. /* tmp[2*h-1 .. 3*h-2] = a0 + a1 + a2 */
  424. #ifdef DEBUG
  425. fprintf (ECM_STDOUT, "\na0 + a1 + a2 = ");
  426. print_list (tmp + 2 * h - 1, h);
  427. #endif
  428. list_sub_safe (tmp + 3 * h - 1, c + 2 * h, c + 3 * h,
  429. (l + 1 > 2 * h ? l + 1 - 2 * h : 0),
  430. (l + 1 > 3 * h ? l + 1 - 3 * h : 0),
  431. 2 * h - 1);
  432. /* -------------------------------------------------
  433. * | 0 .. 2*h-2 | 2*h-1 .. 3*h-2 | 3*h-1 .. 5*h-3 |
  434. * -------------------------------------------------
  435. * | c3 - c1 | a0 + a1 + a2 | c2 - c3 |
  436. * -------------------------------------------------
  437. */
  438. btmp = (l + 1 > h ? l + 1 - h : 0);
  439. btmp = MIN(btmp, 2 * h - 1);
  440. for (i = 0; i < btmp; i++)
  441. {
  442. mpz_mul_2exp (tmp[5 * h - 2 + i], c[h + i], 1);
  443. mpz_add (tmp[5 * h - 2 + i], tmp[5 * h - 2 + i], tmp[3 * h - 1 + i]);
  444. }
  445. while (i < 2 * h - 1)
  446. {
  447. mpz_set (tmp[5 * h - 2 + i], tmp[3 * h - 1 + i]);
  448. i++;
  449. }
  450. tot_muls += TToomCookMul (b + h, h - 1, tmp + 2 * h - 1, h - 1,
  451. tmp + 5 * h - 2, 2 * h - 2,
  452. tmp + 7 * h - 3);
  453. /* b[h .. 2 * h - 1] = 2 * m1 */
  454. #ifdef DEBUG
  455. fprintf (ECM_STDOUT, "\n2 * m1 = ");
  456. print_list (b + h, h);
  457. #endif
  458. /* ------------------------------------------------------------------
  459. * | 0 .. 2*h-2 | 2*h-1 .. 3*h-2 | 3*h-1 .. 5*h-3 | 5*h-2 .. 7*h-4 |
  460. * ------------------------------------------------------------------
  461. * | c3 - c1 | a0 + a1 + a2 | c2 - c3 | c2 - c3 + 2c1 |
  462. * ------------------------------------------------------------------
  463. */
  464. for (i = 0; i < h; i++)
  465. {
  466. mpz_add (tmp[2 * h - 1 + i], tmp[2 * h - 1 + i], a[i + h]);
  467. if (2 * h + i <= m)
  468. mpz_addmul_ui (tmp[2 * h - 1 + i], a[2 * h + i], 3);
  469. }
  470. tot_muls += TToomCookMul (tmp + 5 * h - 2, h - 1,
  471. tmp + 2 * h - 1, h - 1,
  472. tmp, 2 * h - 2, tmp + 6 * h - 2);
  473. /* tmp[5*h-2 .. 6*h - 3] = 6 * m2 */
  474. #ifdef DEBUG
  475. fprintf (ECM_STDOUT, "\n6 * m2 = ");
  476. print_list (tmp + 5 * h - 2, h);
  477. #endif
  478. for (i = 0; i < h; i++)
  479. {
  480. mpz_sub (tmp[2 * h - 1 + i], a[i], a[h + i]);
  481. if (i + 2 * h <= m)
  482. mpz_add (tmp[2 * h - 1 + i], tmp[2 * h - 1 + i], a[2 * h + i]);
  483. }
  484. for (i = 0; i < 2 * h - 1; i++)
  485. {
  486. mpz_mul_ui (tmp[3 * h - 1 + i], tmp[3 * h - 1 + i], 3);
  487. mpz_mul_2exp (tmp[i], tmp[i], 1);
  488. }
  489. list_add (tmp + 3 * h - 1, tmp + 3 * h - 1, tmp, 2 * h - 1);
  490. tot_muls += TToomCookMul (tmp + 6 * h - 2, h - 1,
  491. tmp + 2 * h - 1, h - 1,
  492. tmp + 3 * h - 1, 2 * h - 2,
  493. tmp + 7 * h - 2);
  494. /* tmp[6h-2 .. 7h - 3] = 6 * mm1 */
  495. #ifdef DEBUG
  496. fprintf (ECM_STDOUT, "\n6 * mm1 = ");
  497. print_list (tmp + 6 * h - 2, h);
  498. #endif
  499. list_add_safe (tmp, tmp, c + 2 * h,
  500. 2 * h,
  501. (l + 1 > 2 * h ? l + 1 - 2 * h : 0),
  502. 2 * h - 1);
  503. list_sub_safe (tmp, c + 4 * h, tmp,
  504. (l + 1 > 4 * h ? l + 1 - 4 * h : 0),
  505. 2 * h - 1, 2 * h - 1);
  506. tot_muls += TToomCookMul (b + 2 * h, n - 2 * h, a + 2 * h, m - 2 * h,
  507. tmp, 2 * h - 1, tmp + 7 * h - 2);
  508. /* b[2 * h .. n] = minf */
  509. #ifdef DEBUG
  510. fprintf (ECM_STDOUT, "\nminf = ");
  511. print_list (b + 2 * h, n + 1 - 2 * h);
  512. #endif
  513. /* Layout of b :
  514. * ---------------------------------------
  515. * | 0 ... h-1 | h ... 2*h-1 | 2*h ... n |
  516. * ---------------------------------------
  517. * | 2 * m0 | 2 * m1 | minf |
  518. * ---------------------------------------
  519. *
  520. * Layout of tmp :
  521. * ---------------------------------------------------
  522. * | 0 ... 5*h-1 | 5*h-2 ... 6*h-3 | 6*h-2 ... 7*h-3 |
  523. * ---------------------------------------------------
  524. * | ?????? | 6 * m2 | 6 * mm1 |
  525. * ---------------------------------------------------
  526. */
  527. list_add (tmp, tmp + 5 * h - 2, tmp + 6 * h - 2, h);
  528. for (i = 0; i < h; i++)
  529. mpz_divby3_1op (tmp[i]);
  530. /* t1 = 2 (m2 + mm1)
  531. * tmp[0 .. h - 1] = t1
  532. */
  533. list_add (b, b, b + h, h);
  534. list_add (b, b, tmp, h);
  535. for (i = 0; i < h; i++)
  536. mpz_tdiv_q_2exp (b[i], b[i], 1);
  537. /* b_{low} should be correct */
  538. list_add (tmp + h, b + h, tmp, h);
  539. /* t2 = t1 + 2 m1
  540. * tmp[h .. 2h - 1] = t2
  541. */
  542. list_add (b + h, tmp, tmp + h, h);
  543. list_sub (b + h, b + h, tmp + 6 * h - 2, h);
  544. for (i = 0; i < h; i++)
  545. mpz_tdiv_q_2exp (b[h + i], b[h + i], 1);
  546. /* b_{mid} should be correct */
  547. list_add (tmp + h, tmp + h, tmp + 5 * h - 2, n + 1 - 2 * h);
  548. for (i = 0; i < n + 1 - 2 * h; i++)
  549. mpz_tdiv_q_2exp (tmp[h + i], tmp[h + i], 1);
  550. list_add (b + 2 * h, b + 2 * h, tmp + h, n + 1 - 2 * h);
  551. /* b_{high} should be correct */
  552. return tot_muls;
  553. }
  554. /* Returns space needed by TToomCookMul */
  555. unsigned int
  556. TToomCookMul_space (unsigned int n, unsigned int m, unsigned int l)
  557. {
  558. unsigned int nu, mu, h;
  559. unsigned int stmp1, stmp2;
  560. nu = n / 3 + 1;
  561. mu = m / 3 + 1;
  562. stmp1 = stmp2 = 0;
  563. /* ensures n + 1 > 2 * nu */
  564. if ((n < 2 * nu) || (m < 2 * mu))
  565. return TKarMul_space (n, m, l);
  566. /* First strip unnecessary trailing coefficients of c:
  567. */
  568. l = MIN(l, n + m);
  569. /* Now the degenerate cases. We want 2 * nu < m.
  570. *
  571. */
  572. if (m <= 2 * nu)
  573. {
  574. stmp1 = TToomCookMul_space (nu - 1, m, l);
  575. if (l >= 2 * nu)
  576. stmp2 = TToomCookMul_space (n - 2 * nu, m, l - 2 * nu);
  577. else if (l >= nu)
  578. stmp2 = TToomCookMul_space (nu - 1, m, l - nu);
  579. return MAX(stmp1, stmp2);
  580. }
  581. /* Second degenerate case. We want 2 * mu < n.
  582. */
  583. if (n <= 2 * mu)
  584. {
  585. stmp1 += TToomCookMul_space (n, mu - 1, l);
  586. if (l >= 2 * mu)
  587. stmp2 = TToomCookMul_space (n, m - 2 * mu, l - 2 * mu) + n + 1;
  588. else if (l >= mu)
  589. stmp2 = TToomCookMul_space (n, mu - 1, l - mu) + n + 1;
  590. return MAX(stmp1, stmp2);
  591. }
  592. h = MAX(nu, mu);
  593. stmp1 = TToomCookMul_space (h - 1, h - 1, 2 * h - 2);
  594. stmp2 = stmp1 + 7 * h - 2;
  595. stmp1 = stmp1 + 6 * h - 2;
  596. stmp1 = MAX(stmp1, stmp2);
  597. stmp2 = TToomCookMul_space (n - 2 * h, m - 2 * h, 2 * h - 1) + 7*h-2;
  598. return MAX(stmp1, stmp2);
  599. }
  600. /* Given a[0..m] and c[0..l], puts in b[0..n] the coefficients
  601. of degree m to n+m of rev(a)*c, i.e.
  602. b[0] = a[0]*c[0] + ... + a[i]*c[i] with i = min(m, l)
  603. ...
  604. b[k] = a[0]*c[k] + ... + a[i]*c[i+k] with i = min(m, l-k)
  605. ...
  606. b[n] = a[0]*c[n] + ... + a[i]*c[i+n] with i = min(m, l-n) [=l-n].
  607. Using auxiliary memory in tmp.
  608. Assumes n <= l.
  609. Returns number of multiplications if known, 0 if not known,
  610. and -1 for error.
  611. */
  612. int
  613. TMulGen (listz_t b, unsigned int n, listz_t a, unsigned int m,
  614. listz_t c, unsigned int l, listz_t tmp, mpz_t modulus)
  615. {
  616. ASSERT (n <= l);
  617. if (Fermat)
  618. {
  619. unsigned int i;
  620. for (i = l + 1; i > 1 && (i&1) == 0; i >>= 1);
  621. ASSERT(i == 1);
  622. ASSERT(n + 1 == (l + 1) / 2);
  623. ASSERT(m == l - n || m + 1 == l - n);
  624. return F_mul_trans (b, a, c, m + 1, l + 1, Fermat, tmp);
  625. }
  626. #ifdef KS_MULTIPLY
  627. if ((double) n * (double) mpz_sizeinbase (modulus, 2) >= KS_TMUL_THRESHOLD)
  628. {
  629. if (TMulKS (b, n, a, m, c, l, modulus, 1)) /* Non-zero means error */
  630. return -1;
  631. return 0; /* We have no mul count so we return 0 */
  632. }
  633. #endif
  634. return TToomCookMul (b, n, a, m, c, l, tmp);
  635. }
  636. unsigned int
  637. TMulGen_space (unsigned int n, unsigned int m, unsigned int l)
  638. {
  639. if (Fermat)
  640. return 2 * (l + 1);
  641. else
  642. return TToomCookMul_space (n, m, l);
  643. }