PageRenderTime 86ms CodeModel.GetById 11ms app.highlight 70ms RepoModel.GetById 1ms app.codeStats 1ms

/std/internal/math/biguintnoasm.d

http://github.com/jcd/phobos
D | 360 lines | 270 code | 31 blank | 59 comment | 74 complexity | 65728d2d08a4409d5ccca9ab6fe3adca MD5 | raw file
  1/** Arbitrary precision arithmetic ('bignum') for processors with no asm support
  2 *
  3 * All functions operate on arrays of uints, stored LSB first.
  4 * If there is a destination array, it will be the first parameter.
  5 * Currently, all of these functions are subject to change, and are
  6 * intended for internal use only.
  7 * This module is intended only to assist development of high-speed routines
  8 * on currently unsupported processors.
  9 * The X86 asm version is about 30 times faster than the D version(DMD).
 10 */
 11
 12/*          Copyright Don Clugston 2008 - 2010.
 13 * Distributed under the Boost Software License, Version 1.0.
 14 *    (See accompanying file LICENSE_1_0.txt or copy at
 15 *          http://www.boost.org/LICENSE_1_0.txt)
 16 */
 17
 18module std.internal.math.biguintnoasm;
 19
 20public:
 21alias uint BigDigit; // A Bignum is an array of BigDigits.
 22
 23    // Limits for when to switch between multiplication algorithms.
 24enum : int { KARATSUBALIMIT = 10 }; // Minimum value for which Karatsuba is worthwhile.
 25enum : int { KARATSUBASQUARELIMIT=12 }; // Minimum value for which square Karatsuba is worthwhile
 26
 27
 28/** Multi-byte addition or subtraction
 29 *    dest[] = src1[] + src2[] + carry (0 or 1).
 30 * or dest[] = src1[] - src2[] - carry (0 or 1).
 31 * Returns carry or borrow (0 or 1).
 32 * Set op == '+' for addition, '-' for subtraction.
 33 */
 34uint multibyteAddSub(char op)(uint[] dest, const(uint) [] src1,
 35    const (uint) [] src2, uint carry) pure
 36{
 37    ulong c = carry;
 38    for (size_t i = 0; i < src2.length; ++i)
 39    {
 40        static if (op=='+') c = c  + src1[i] + src2[i];
 41             else           c = cast(ulong)src1[i] - src2[i] - c;
 42        dest[i] = cast(uint)c;
 43        c = (c > 0xFFFF_FFFF);
 44    }
 45    return cast(uint)c;
 46}
 47
 48unittest
 49{
 50    uint [] a = new uint[40];
 51    uint [] b = new uint[40];
 52    uint [] c = new uint[40];
 53    for (size_t i = 0; i < a.length; ++i)
 54    {
 55        if (i&1) a[i]=cast(uint)(0x8000_0000 + i);
 56        else a[i]=cast(uint)i;
 57        b[i]= 0x8000_0003;
 58    }
 59    c[19]=0x3333_3333;
 60    uint carry = multibyteAddSub!('+')(c[0..18], b[0..18], a[0..18], 0);
 61    assert(c[0]==0x8000_0003);
 62    assert(c[1]==4);
 63    assert(c[19]==0x3333_3333); // check for overrun
 64    assert(carry==1);
 65    for (size_t i = 0; i < a.length; ++i)
 66    {
 67        a[i] = b[i] = c[i] = 0;
 68    }
 69    a[8]=0x048D159E;
 70    b[8]=0x048D159E;
 71    a[10]=0x1D950C84;
 72    b[10]=0x1D950C84;
 73    a[5] =0x44444444;
 74    carry = multibyteAddSub!('-')(a[0..12], a[0..12], b[0..12], 0);
 75    assert(a[11] == 0);
 76    for (size_t i = 0; i < 10; ++i)
 77        if (i != 5)
 78            assert(a[i] == 0);
 79
 80    for (size_t q = 3; q < 36; ++q)
 81    {
 82        for (size_t i = 0; i< a.length; ++i)
 83        {
 84            a[i] = b[i] = c[i] = 0;
 85        }
 86        a[q-2]=0x040000;
 87        b[q-2]=0x040000;
 88       carry = multibyteAddSub!('-')(a[0..q], a[0..q], b[0..q], 0);
 89       assert(a[q-2]==0);
 90    }
 91}
 92
 93
 94
 95/** dest[] += carry, or dest[] -= carry.
 96 *  op must be '+' or '-'
 97 *  Returns final carry or borrow (0 or 1)
 98 */
 99uint multibyteIncrementAssign(char op)(uint[] dest, uint carry) pure
100{
101    static if (op=='+')
102    {
103        ulong c = carry;
104        c += dest[0];
105        dest[0] = cast(uint)c;
106        if (c<=0xFFFF_FFFF)
107            return 0;
108
109        for (size_t i = 1; i < dest.length; ++i)
110        {
111            ++dest[i];
112            if (dest[i] != 0)
113                return 0;
114        }
115        return 1;
116    }
117    else
118    {
119        ulong c = carry;
120        c = dest[0] - c;
121        dest[0] = cast(uint)c;
122        if (c<=0xFFFF_FFFF)
123            return 0;
124        for (size_t i = 1; i < dest.length; ++i)
125        {
126            --dest[i];
127            if (dest[i] != 0xFFFF_FFFF)
128                return 0;
129        }
130        return 1;
131    }
132}
133
134/** dest[] = src[] << numbits
135 *  numbits must be in the range 1..31
136 */
137uint multibyteShl(uint [] dest, const(uint) [] src, uint numbits) pure
138{
139    ulong c = 0;
140    for (size_t i = 0; i < dest.length; ++i)
141    {
142        c += (cast(ulong)(src[i]) << numbits);
143        dest[i] = cast(uint)c;
144        c >>>= 32;
145   }
146   return cast(uint)c;
147}
148
149
150/** dest[] = src[] >> numbits
151 *  numbits must be in the range 1..31
152 */
153void multibyteShr(uint [] dest, const(uint) [] src, uint numbits) pure
154{
155    ulong c = 0;
156    for(ptrdiff_t i = dest.length; i!=0; --i)
157    {
158        c += (src[i-1] >>numbits) + (cast(ulong)(src[i-1]) << (64 - numbits));
159        dest[i-1] = cast(uint)c;
160        c >>>= 32;
161   }
162}
163
164unittest
165{
166
167    uint [] aa = [0x1222_2223, 0x4555_5556, 0x8999_999A, 0xBCCC_CCCD, 0xEEEE_EEEE];
168    multibyteShr(aa[0..$-2], aa, 4);
169    assert(aa[0] == 0x6122_2222 && aa[1] == 0xA455_5555 && aa[2] == 0x0899_9999);
170    assert(aa[3] == 0xBCCC_CCCD);
171
172    aa = [0x1222_2223, 0x4555_5556, 0x8999_999A, 0xBCCC_CCCD, 0xEEEE_EEEE];
173    multibyteShr(aa[0..$-1], aa, 4);
174    assert(aa[0] == 0x6122_2222 && aa[1] == 0xA455_5555
175        && aa[2] == 0xD899_9999 && aa[3] == 0x0BCC_CCCC);
176
177    aa = [0xF0FF_FFFF, 0x1222_2223, 0x4555_5556, 0x8999_999A, 0xBCCC_CCCD,
178        0xEEEE_EEEE];
179    multibyteShl(aa[1..4], aa[1..$], 4);
180    assert(aa[0] == 0xF0FF_FFFF && aa[1] == 0x2222_2230
181        && aa[2]==0x5555_5561 && aa[3]==0x9999_99A4 && aa[4]==0x0BCCC_CCCD);
182}
183
184/** dest[] = src[] * multiplier + carry.
185 * Returns carry.
186 */
187uint multibyteMul(uint[] dest, const(uint)[] src, uint multiplier, uint carry)
188	pure
189{
190    assert(dest.length == src.length);
191    ulong c = carry;
192    for(size_t i = 0; i < src.length; ++i)
193    {
194        c += cast(ulong)(src[i]) * multiplier;
195        dest[i] = cast(uint)c;
196        c>>=32;
197    }
198    return cast(uint)c;
199}
200
201unittest
202{
203    uint [] aa = [0xF0FF_FFFF, 0x1222_2223, 0x4555_5556, 0x8999_999A,
204        0xBCCC_CCCD, 0xEEEE_EEEE];
205    multibyteMul(aa[1..4], aa[1..4], 16, 0);
206    assert(aa[0] == 0xF0FF_FFFF && aa[1] == 0x2222_2230 && aa[2]==0x5555_5561
207        && aa[3]==0x9999_99A4 && aa[4]==0x0BCCC_CCCD);
208}
209
210/**
211 * dest[] += src[] * multiplier + carry(0..FFFF_FFFF).
212 * Returns carry out of MSB (0..FFFF_FFFF).
213 */
214uint multibyteMulAdd(char op)(uint [] dest, const(uint)[] src,
215    uint multiplier, uint carry) pure
216{
217    assert(dest.length == src.length);
218    ulong c = carry;
219    for(size_t i = 0; i < src.length; ++i)
220    {
221        static if(op=='+')
222        {
223            c += cast(ulong)(multiplier) * src[i]  + dest[i];
224            dest[i] = cast(uint)c;
225            c >>= 32;
226        }
227        else
228        {
229            c += cast(ulong)multiplier * src[i];
230            ulong t = cast(ulong)dest[i] - cast(uint)c;
231            dest[i] = cast(uint)t;
232            c = cast(uint)((c>>32) - (t>>32));
233        }
234    }
235    return cast(uint)c;
236}
237
238unittest
239{
240
241    uint [] aa = [0xF0FF_FFFF, 0x1222_2223, 0x4555_5556, 0x8999_999A,
242        0xBCCC_CCCD, 0xEEEE_EEEE];
243    uint [] bb = [0x1234_1234, 0xF0F0_F0F0, 0x00C0_C0C0, 0xF0F0_F0F0,
244        0xC0C0_C0C0];
245    multibyteMulAdd!('+')(bb[1..$-1], aa[1..$-2], 16, 5);
246    assert(bb[0] == 0x1234_1234 && bb[4] == 0xC0C0_C0C0);
247    assert(bb[1] == 0x2222_2230 + 0xF0F0_F0F0 + 5
248        && bb[2] == 0x5555_5561 + 0x00C0_C0C0 + 1
249        && bb[3] == 0x9999_99A4 + 0xF0F0_F0F0 );
250}
251
252
253/**
254   Sets result = result[0..left.length] + left * right
255
256   It is defined in this way to allow cache-efficient multiplication.
257   This function is equivalent to:
258    ----
259    for (size_t i = 0; i< right.length; ++i) {
260        dest[left.length + i] = multibyteMulAdd(dest[i..left.length+i],
261                left, right[i], 0);
262    }
263    ----
264 */
265void multibyteMultiplyAccumulate(uint [] dest, const(uint)[] left, const(uint)
266		[] right) pure
267{
268    for (size_t i = 0; i < right.length; ++i)
269    {
270        dest[left.length + i] = multibyteMulAdd!('+')(dest[i..left.length+i],
271                left, right[i], 0);
272    }
273}
274
275/**  dest[] /= divisor.
276 * overflow is the initial remainder, and must be in the range 0..divisor-1.
277 */
278uint multibyteDivAssign(uint [] dest, uint divisor, uint overflow) pure
279{
280    ulong c = cast(ulong)overflow;
281    for(ptrdiff_t i = dest.length-1; i>= 0; --i)
282    {
283        c = (c<<32) + cast(ulong)(dest[i]);
284        uint q = cast(uint)(c/divisor);
285        c -= divisor * q;
286        dest[i] = q;
287    }
288    return cast(uint)c;
289}
290
291unittest
292{
293    uint [] aa = new uint[101];
294    for (uint i = 0; i < aa.length; ++i)
295        aa[i] = 0x8765_4321 * (i+3);
296    uint overflow = multibyteMul(aa, aa, 0x8EFD_FCFB, 0x33FF_7461);
297    uint r = multibyteDivAssign(aa, 0x8EFD_FCFB, overflow);
298    for (uint i=0; i<aa.length; ++i)
299    {
300        assert(aa[i] == 0x8765_4321 * (i+3));
301    }
302    assert(r == 0x33FF_7461);
303
304}
305// Set dest[2*i..2*i+1]+=src[i]*src[i]
306void multibyteAddDiagonalSquares(uint[] dest, const(uint)[] src) pure
307{
308    ulong c = 0;
309    for(size_t i = 0; i < src.length; ++i)
310    {
311        // At this point, c is 0 or 1, since FFFF*FFFF+FFFF_FFFF = 1_0000_0000.
312        c += cast(ulong)(src[i]) * src[i] + dest[2*i];
313        dest[2*i] = cast(uint)c;
314        c = (c>>=32) + dest[2*i+1];
315        dest[2*i+1] = cast(uint)c;
316        c >>= 32;
317    }
318}
319
320// Does half a square multiply. (square = diagonal + 2*triangle)
321void multibyteTriangleAccumulate(uint[] dest, const(uint)[] x) pure
322{
323    // x[0]*x[1...$] + x[1]*x[2..$] + ... + x[$-2]x[$-1..$]
324    dest[x.length] = multibyteMul(dest[1 .. x.length], x[1..$], x[0], 0);
325    if (x.length < 4)
326    {
327        if (x.length == 3)
328        {
329            ulong c = cast(ulong)(x[$-1]) * x[$-2]  + dest[2*x.length-3];
330            dest[2*x.length - 3] = cast(uint)c;
331            c >>= 32;
332            dest[2*x.length - 2] = cast(uint)c;
333        }
334        return;
335    }
336    for (size_t i = 2; i < x.length - 2; ++i)
337    {
338        dest[i-1+ x.length] = multibyteMulAdd!('+')(
339             dest[i+i-1 .. i+x.length-1], x[i..$], x[i-1], 0);
340    }
341        // Unroll the last two entries, to reduce loop overhead:
342    ulong  c = cast(ulong)(x[$-3]) * x[$-2] + dest[2*x.length-5];
343    dest[2*x.length-5] = cast(uint)c;
344    c >>= 32;
345    c += cast(ulong)(x[$-3]) * x[$-1] + dest[2*x.length-4];
346    dest[2*x.length-4] = cast(uint)c;
347    c >>= 32;
348    c += cast(ulong)(x[$-1]) * x[$-2];
349    dest[2*x.length-3] = cast(uint)c;
350    c >>= 32;
351    dest[2*x.length-2] = cast(uint)c;
352}
353
354void multibyteSquare(BigDigit[] result, const(BigDigit) [] x) pure
355{
356    multibyteTriangleAccumulate(result, x);
357    result[$-1] = multibyteShl(result[1..$-1], result[1..$-1], 1); // mul by 2
358    result[0] = 0;
359    multibyteAddDiagonalSquares(result, x);
360}