/std/internal/math/biguintnoasm.d

http://github.com/jcd/phobos · D · 360 lines · 270 code · 31 blank · 59 comment · 74 complexity · 65728d2d08a4409d5ccca9ab6fe3adca MD5 · raw file

  1. /** Arbitrary precision arithmetic ('bignum') for processors with no asm support
  2. *
  3. * All functions operate on arrays of uints, stored LSB first.
  4. * If there is a destination array, it will be the first parameter.
  5. * Currently, all of these functions are subject to change, and are
  6. * intended for internal use only.
  7. * This module is intended only to assist development of high-speed routines
  8. * on currently unsupported processors.
  9. * The X86 asm version is about 30 times faster than the D version(DMD).
  10. */
  11. /* Copyright Don Clugston 2008 - 2010.
  12. * Distributed under the Boost Software License, Version 1.0.
  13. * (See accompanying file LICENSE_1_0.txt or copy at
  14. * http://www.boost.org/LICENSE_1_0.txt)
  15. */
  16. module std.internal.math.biguintnoasm;
  17. public:
  18. alias uint BigDigit; // A Bignum is an array of BigDigits.
  19. // Limits for when to switch between multiplication algorithms.
  20. enum : int { KARATSUBALIMIT = 10 }; // Minimum value for which Karatsuba is worthwhile.
  21. enum : int { KARATSUBASQUARELIMIT=12 }; // Minimum value for which square Karatsuba is worthwhile
  22. /** Multi-byte addition or subtraction
  23. * dest[] = src1[] + src2[] + carry (0 or 1).
  24. * or dest[] = src1[] - src2[] - carry (0 or 1).
  25. * Returns carry or borrow (0 or 1).
  26. * Set op == '+' for addition, '-' for subtraction.
  27. */
  28. uint multibyteAddSub(char op)(uint[] dest, const(uint) [] src1,
  29. const (uint) [] src2, uint carry) pure
  30. {
  31. ulong c = carry;
  32. for (size_t i = 0; i < src2.length; ++i)
  33. {
  34. static if (op=='+') c = c + src1[i] + src2[i];
  35. else c = cast(ulong)src1[i] - src2[i] - c;
  36. dest[i] = cast(uint)c;
  37. c = (c > 0xFFFF_FFFF);
  38. }
  39. return cast(uint)c;
  40. }
  41. unittest
  42. {
  43. uint [] a = new uint[40];
  44. uint [] b = new uint[40];
  45. uint [] c = new uint[40];
  46. for (size_t i = 0; i < a.length; ++i)
  47. {
  48. if (i&1) a[i]=cast(uint)(0x8000_0000 + i);
  49. else a[i]=cast(uint)i;
  50. b[i]= 0x8000_0003;
  51. }
  52. c[19]=0x3333_3333;
  53. uint carry = multibyteAddSub!('+')(c[0..18], b[0..18], a[0..18], 0);
  54. assert(c[0]==0x8000_0003);
  55. assert(c[1]==4);
  56. assert(c[19]==0x3333_3333); // check for overrun
  57. assert(carry==1);
  58. for (size_t i = 0; i < a.length; ++i)
  59. {
  60. a[i] = b[i] = c[i] = 0;
  61. }
  62. a[8]=0x048D159E;
  63. b[8]=0x048D159E;
  64. a[10]=0x1D950C84;
  65. b[10]=0x1D950C84;
  66. a[5] =0x44444444;
  67. carry = multibyteAddSub!('-')(a[0..12], a[0..12], b[0..12], 0);
  68. assert(a[11] == 0);
  69. for (size_t i = 0; i < 10; ++i)
  70. if (i != 5)
  71. assert(a[i] == 0);
  72. for (size_t q = 3; q < 36; ++q)
  73. {
  74. for (size_t i = 0; i< a.length; ++i)
  75. {
  76. a[i] = b[i] = c[i] = 0;
  77. }
  78. a[q-2]=0x040000;
  79. b[q-2]=0x040000;
  80. carry = multibyteAddSub!('-')(a[0..q], a[0..q], b[0..q], 0);
  81. assert(a[q-2]==0);
  82. }
  83. }
  84. /** dest[] += carry, or dest[] -= carry.
  85. * op must be '+' or '-'
  86. * Returns final carry or borrow (0 or 1)
  87. */
  88. uint multibyteIncrementAssign(char op)(uint[] dest, uint carry) pure
  89. {
  90. static if (op=='+')
  91. {
  92. ulong c = carry;
  93. c += dest[0];
  94. dest[0] = cast(uint)c;
  95. if (c<=0xFFFF_FFFF)
  96. return 0;
  97. for (size_t i = 1; i < dest.length; ++i)
  98. {
  99. ++dest[i];
  100. if (dest[i] != 0)
  101. return 0;
  102. }
  103. return 1;
  104. }
  105. else
  106. {
  107. ulong c = carry;
  108. c = dest[0] - c;
  109. dest[0] = cast(uint)c;
  110. if (c<=0xFFFF_FFFF)
  111. return 0;
  112. for (size_t i = 1; i < dest.length; ++i)
  113. {
  114. --dest[i];
  115. if (dest[i] != 0xFFFF_FFFF)
  116. return 0;
  117. }
  118. return 1;
  119. }
  120. }
  121. /** dest[] = src[] << numbits
  122. * numbits must be in the range 1..31
  123. */
  124. uint multibyteShl(uint [] dest, const(uint) [] src, uint numbits) pure
  125. {
  126. ulong c = 0;
  127. for (size_t i = 0; i < dest.length; ++i)
  128. {
  129. c += (cast(ulong)(src[i]) << numbits);
  130. dest[i] = cast(uint)c;
  131. c >>>= 32;
  132. }
  133. return cast(uint)c;
  134. }
  135. /** dest[] = src[] >> numbits
  136. * numbits must be in the range 1..31
  137. */
  138. void multibyteShr(uint [] dest, const(uint) [] src, uint numbits) pure
  139. {
  140. ulong c = 0;
  141. for(ptrdiff_t i = dest.length; i!=0; --i)
  142. {
  143. c += (src[i-1] >>numbits) + (cast(ulong)(src[i-1]) << (64 - numbits));
  144. dest[i-1] = cast(uint)c;
  145. c >>>= 32;
  146. }
  147. }
  148. unittest
  149. {
  150. uint [] aa = [0x1222_2223, 0x4555_5556, 0x8999_999A, 0xBCCC_CCCD, 0xEEEE_EEEE];
  151. multibyteShr(aa[0..$-2], aa, 4);
  152. assert(aa[0] == 0x6122_2222 && aa[1] == 0xA455_5555 && aa[2] == 0x0899_9999);
  153. assert(aa[3] == 0xBCCC_CCCD);
  154. aa = [0x1222_2223, 0x4555_5556, 0x8999_999A, 0xBCCC_CCCD, 0xEEEE_EEEE];
  155. multibyteShr(aa[0..$-1], aa, 4);
  156. assert(aa[0] == 0x6122_2222 && aa[1] == 0xA455_5555
  157. && aa[2] == 0xD899_9999 && aa[3] == 0x0BCC_CCCC);
  158. aa = [0xF0FF_FFFF, 0x1222_2223, 0x4555_5556, 0x8999_999A, 0xBCCC_CCCD,
  159. 0xEEEE_EEEE];
  160. multibyteShl(aa[1..4], aa[1..$], 4);
  161. assert(aa[0] == 0xF0FF_FFFF && aa[1] == 0x2222_2230
  162. && aa[2]==0x5555_5561 && aa[3]==0x9999_99A4 && aa[4]==0x0BCCC_CCCD);
  163. }
  164. /** dest[] = src[] * multiplier + carry.
  165. * Returns carry.
  166. */
  167. uint multibyteMul(uint[] dest, const(uint)[] src, uint multiplier, uint carry)
  168. pure
  169. {
  170. assert(dest.length == src.length);
  171. ulong c = carry;
  172. for(size_t i = 0; i < src.length; ++i)
  173. {
  174. c += cast(ulong)(src[i]) * multiplier;
  175. dest[i] = cast(uint)c;
  176. c>>=32;
  177. }
  178. return cast(uint)c;
  179. }
  180. unittest
  181. {
  182. uint [] aa = [0xF0FF_FFFF, 0x1222_2223, 0x4555_5556, 0x8999_999A,
  183. 0xBCCC_CCCD, 0xEEEE_EEEE];
  184. multibyteMul(aa[1..4], aa[1..4], 16, 0);
  185. assert(aa[0] == 0xF0FF_FFFF && aa[1] == 0x2222_2230 && aa[2]==0x5555_5561
  186. && aa[3]==0x9999_99A4 && aa[4]==0x0BCCC_CCCD);
  187. }
  188. /**
  189. * dest[] += src[] * multiplier + carry(0..FFFF_FFFF).
  190. * Returns carry out of MSB (0..FFFF_FFFF).
  191. */
  192. uint multibyteMulAdd(char op)(uint [] dest, const(uint)[] src,
  193. uint multiplier, uint carry) pure
  194. {
  195. assert(dest.length == src.length);
  196. ulong c = carry;
  197. for(size_t i = 0; i < src.length; ++i)
  198. {
  199. static if(op=='+')
  200. {
  201. c += cast(ulong)(multiplier) * src[i] + dest[i];
  202. dest[i] = cast(uint)c;
  203. c >>= 32;
  204. }
  205. else
  206. {
  207. c += cast(ulong)multiplier * src[i];
  208. ulong t = cast(ulong)dest[i] - cast(uint)c;
  209. dest[i] = cast(uint)t;
  210. c = cast(uint)((c>>32) - (t>>32));
  211. }
  212. }
  213. return cast(uint)c;
  214. }
  215. unittest
  216. {
  217. uint [] aa = [0xF0FF_FFFF, 0x1222_2223, 0x4555_5556, 0x8999_999A,
  218. 0xBCCC_CCCD, 0xEEEE_EEEE];
  219. uint [] bb = [0x1234_1234, 0xF0F0_F0F0, 0x00C0_C0C0, 0xF0F0_F0F0,
  220. 0xC0C0_C0C0];
  221. multibyteMulAdd!('+')(bb[1..$-1], aa[1..$-2], 16, 5);
  222. assert(bb[0] == 0x1234_1234 && bb[4] == 0xC0C0_C0C0);
  223. assert(bb[1] == 0x2222_2230 + 0xF0F0_F0F0 + 5
  224. && bb[2] == 0x5555_5561 + 0x00C0_C0C0 + 1
  225. && bb[3] == 0x9999_99A4 + 0xF0F0_F0F0 );
  226. }
  227. /**
  228. Sets result = result[0..left.length] + left * right
  229. It is defined in this way to allow cache-efficient multiplication.
  230. This function is equivalent to:
  231. ----
  232. for (size_t i = 0; i< right.length; ++i) {
  233. dest[left.length + i] = multibyteMulAdd(dest[i..left.length+i],
  234. left, right[i], 0);
  235. }
  236. ----
  237. */
  238. void multibyteMultiplyAccumulate(uint [] dest, const(uint)[] left, const(uint)
  239. [] right) pure
  240. {
  241. for (size_t i = 0; i < right.length; ++i)
  242. {
  243. dest[left.length + i] = multibyteMulAdd!('+')(dest[i..left.length+i],
  244. left, right[i], 0);
  245. }
  246. }
  247. /** dest[] /= divisor.
  248. * overflow is the initial remainder, and must be in the range 0..divisor-1.
  249. */
  250. uint multibyteDivAssign(uint [] dest, uint divisor, uint overflow) pure
  251. {
  252. ulong c = cast(ulong)overflow;
  253. for(ptrdiff_t i = dest.length-1; i>= 0; --i)
  254. {
  255. c = (c<<32) + cast(ulong)(dest[i]);
  256. uint q = cast(uint)(c/divisor);
  257. c -= divisor * q;
  258. dest[i] = q;
  259. }
  260. return cast(uint)c;
  261. }
  262. unittest
  263. {
  264. uint [] aa = new uint[101];
  265. for (uint i = 0; i < aa.length; ++i)
  266. aa[i] = 0x8765_4321 * (i+3);
  267. uint overflow = multibyteMul(aa, aa, 0x8EFD_FCFB, 0x33FF_7461);
  268. uint r = multibyteDivAssign(aa, 0x8EFD_FCFB, overflow);
  269. for (uint i=0; i<aa.length; ++i)
  270. {
  271. assert(aa[i] == 0x8765_4321 * (i+3));
  272. }
  273. assert(r == 0x33FF_7461);
  274. }
  275. // Set dest[2*i..2*i+1]+=src[i]*src[i]
  276. void multibyteAddDiagonalSquares(uint[] dest, const(uint)[] src) pure
  277. {
  278. ulong c = 0;
  279. for(size_t i = 0; i < src.length; ++i)
  280. {
  281. // At this point, c is 0 or 1, since FFFF*FFFF+FFFF_FFFF = 1_0000_0000.
  282. c += cast(ulong)(src[i]) * src[i] + dest[2*i];
  283. dest[2*i] = cast(uint)c;
  284. c = (c>>=32) + dest[2*i+1];
  285. dest[2*i+1] = cast(uint)c;
  286. c >>= 32;
  287. }
  288. }
  289. // Does half a square multiply. (square = diagonal + 2*triangle)
  290. void multibyteTriangleAccumulate(uint[] dest, const(uint)[] x) pure
  291. {
  292. // x[0]*x[1...$] + x[1]*x[2..$] + ... + x[$-2]x[$-1..$]
  293. dest[x.length] = multibyteMul(dest[1 .. x.length], x[1..$], x[0], 0);
  294. if (x.length < 4)
  295. {
  296. if (x.length == 3)
  297. {
  298. ulong c = cast(ulong)(x[$-1]) * x[$-2] + dest[2*x.length-3];
  299. dest[2*x.length - 3] = cast(uint)c;
  300. c >>= 32;
  301. dest[2*x.length - 2] = cast(uint)c;
  302. }
  303. return;
  304. }
  305. for (size_t i = 2; i < x.length - 2; ++i)
  306. {
  307. dest[i-1+ x.length] = multibyteMulAdd!('+')(
  308. dest[i+i-1 .. i+x.length-1], x[i..$], x[i-1], 0);
  309. }
  310. // Unroll the last two entries, to reduce loop overhead:
  311. ulong c = cast(ulong)(x[$-3]) * x[$-2] + dest[2*x.length-5];
  312. dest[2*x.length-5] = cast(uint)c;
  313. c >>= 32;
  314. c += cast(ulong)(x[$-3]) * x[$-1] + dest[2*x.length-4];
  315. dest[2*x.length-4] = cast(uint)c;
  316. c >>= 32;
  317. c += cast(ulong)(x[$-1]) * x[$-2];
  318. dest[2*x.length-3] = cast(uint)c;
  319. c >>= 32;
  320. dest[2*x.length-2] = cast(uint)c;
  321. }
  322. void multibyteSquare(BigDigit[] result, const(BigDigit) [] x) pure
  323. {
  324. multibyteTriangleAccumulate(result, x);
  325. result[$-1] = multibyteShl(result[1..$-1], result[1..$-1], 1); // mul by 2
  326. result[0] = 0;
  327. multibyteAddDiagonalSquares(result, x);
  328. }