biguintnoasm.d - This D code implements a set of mathematic…

/std/internal/math/biguintnoasm.d

http://github.com/jcd/phobos · D · 360 lines · 270 code · 31 blank · 59 comment · 74 complexity · 65728d2d08a4409d5ccca9ab6fe3adca MD5 · raw file

/** Arbitrary precision arithmetic ('bignum') for processors with no asm support
 *
 * All functions operate on arrays of uints, stored LSB first.
 * If there is a destination array, it will be the first parameter.
 * Currently, all of these functions are subject to change, and are
 * intended for internal use only.
 * This module is intended only to assist development of high-speed routines
 * on currently unsupported processors.
 * The X86 asm version is about 30 times faster than the D version(DMD).
 */

/*          Copyright Don Clugston 2008 - 2010.
 * Distributed under the Boost Software License, Version 1.0.
 *    (See accompanying file LICENSE_1_0.txt or copy at
 *          http://www.boost.org/LICENSE_1_0.txt)
 */

module std.internal.math.biguintnoasm;

public:
alias uint BigDigit; // A Bignum is an array of BigDigits.

    // Limits for when to switch between multiplication algorithms.
enum : int { KARATSUBALIMIT = 10 }; // Minimum value for which Karatsuba is worthwhile.
enum : int { KARATSUBASQUARELIMIT=12 }; // Minimum value for which square Karatsuba is worthwhile


/** Multi-byte addition or subtraction
 *    dest[] = src1[] + src2[] + carry (0 or 1).
 * or dest[] = src1[] - src2[] - carry (0 or 1).
 * Returns carry or borrow (0 or 1).
 * Set op == '+' for addition, '-' for subtraction.
 */
uint multibyteAddSub(char op)(uint[] dest, const(uint) [] src1,
    const (uint) [] src2, uint carry) pure
{
    ulong c = carry;
    for (size_t i = 0; i < src2.length; ++i)
    {
        static if (op=='+') c = c  + src1[i] + src2[i];
             else           c = cast(ulong)src1[i] - src2[i] - c;
        dest[i] = cast(uint)c;
        c = (c > 0xFFFF_FFFF);
    }
    return cast(uint)c;
}

unittest
{
    uint [] a = new uint[40];
    uint [] b = new uint[40];
    uint [] c = new uint[40];
    for (size_t i = 0; i < a.length; ++i)
    {
        if (i&1) a[i]=cast(uint)(0x8000_0000 + i);
        else a[i]=cast(uint)i;
        b[i]= 0x8000_0003;
    }
    c[19]=0x3333_3333;
    uint carry = multibyteAddSub!('+')(c[0..18], b[0..18], a[0..18], 0);
    assert(c[0]==0x8000_0003);
    assert(c[1]==4);
    assert(c[19]==0x3333_3333); // check for overrun
    assert(carry==1);
    for (size_t i = 0; i < a.length; ++i)
    {
        a[i] = b[i] = c[i] = 0;
    }
    a[8]=0x048D159E;
    b[8]=0x048D159E;
    a[10]=0x1D950C84;
    b[10]=0x1D950C84;
    a[5] =0x44444444;
    carry = multibyteAddSub!('-')(a[0..12], a[0..12], b[0..12], 0);
    assert(a[11] == 0);
    for (size_t i = 0; i < 10; ++i)
        if (i != 5)
            assert(a[i] == 0);

    for (size_t q = 3; q < 36; ++q)
    {
        for (size_t i = 0; i< a.length; ++i)
        {
            a[i] = b[i] = c[i] = 0;
        }
        a[q-2]=0x040000;
        b[q-2]=0x040000;
       carry = multibyteAddSub!('-')(a[0..q], a[0..q], b[0..q], 0);
       assert(a[q-2]==0);
    }
}



/** dest[] += carry, or dest[] -= carry.
 *  op must be '+' or '-'
 *  Returns final carry or borrow (0 or 1)
 */
uint multibyteIncrementAssign(char op)(uint[] dest, uint carry) pure
{
    static if (op=='+')
    {
        ulong c = carry;
        c += dest[0];
        dest[0] = cast(uint)c;
        if (c<=0xFFFF_FFFF)
            return 0;

        for (size_t i = 1; i < dest.length; ++i)
        {
            ++dest[i];
            if (dest[i] != 0)
                return 0;
        }
        return 1;
    }
    else
    {
        ulong c = carry;
        c = dest[0] - c;
        dest[0] = cast(uint)c;
        if (c<=0xFFFF_FFFF)
            return 0;
        for (size_t i = 1; i < dest.length; ++i)
        {
            --dest[i];
            if (dest[i] != 0xFFFF_FFFF)
                return 0;
        }
        return 1;
    }
}

/** dest[] = src[] << numbits
 *  numbits must be in the range 1..31
 */
uint multibyteShl(uint [] dest, const(uint) [] src, uint numbits) pure
{
    ulong c = 0;
    for (size_t i = 0; i < dest.length; ++i)
    {
        c += (cast(ulong)(src[i]) << numbits);
        dest[i] = cast(uint)c;
        c >>>= 32;
   }
   return cast(uint)c;
}


/** dest[] = src[] >> numbits
 *  numbits must be in the range 1..31
 */
void multibyteShr(uint [] dest, const(uint) [] src, uint numbits) pure
{
    ulong c = 0;
    for(ptrdiff_t i = dest.length; i!=0; --i)
    {
        c += (src[i-1] >>numbits) + (cast(ulong)(src[i-1]) << (64 - numbits));
        dest[i-1] = cast(uint)c;
        c >>>= 32;
   }
}

unittest
{

    uint [] aa = [0x1222_2223, 0x4555_5556, 0x8999_999A, 0xBCCC_CCCD, 0xEEEE_EEEE];
    multibyteShr(aa[0..$-2], aa, 4);
    assert(aa[0] == 0x6122_2222 && aa[1] == 0xA455_5555 && aa[2] == 0x0899_9999);
    assert(aa[3] == 0xBCCC_CCCD);

    aa = [0x1222_2223, 0x4555_5556, 0x8999_999A, 0xBCCC_CCCD, 0xEEEE_EEEE];
    multibyteShr(aa[0..$-1], aa, 4);
    assert(aa[0] == 0x6122_2222 && aa[1] == 0xA455_5555
        && aa[2] == 0xD899_9999 && aa[3] == 0x0BCC_CCCC);

    aa = [0xF0FF_FFFF, 0x1222_2223, 0x4555_5556, 0x8999_999A, 0xBCCC_CCCD,
        0xEEEE_EEEE];
    multibyteShl(aa[1..4], aa[1..$], 4);
    assert(aa[0] == 0xF0FF_FFFF && aa[1] == 0x2222_2230
        && aa[2]==0x5555_5561 && aa[3]==0x9999_99A4 && aa[4]==0x0BCCC_CCCD);
}

/** dest[] = src[] * multiplier + carry.
 * Returns carry.
 */
uint multibyteMul(uint[] dest, const(uint)[] src, uint multiplier, uint carry)
	pure
{
    assert(dest.length == src.length);
    ulong c = carry;
    for(size_t i = 0; i < src.length; ++i)
    {
        c += cast(ulong)(src[i]) * multiplier;
        dest[i] = cast(uint)c;
        c>>=32;
    }
    return cast(uint)c;
}

unittest
{
    uint [] aa = [0xF0FF_FFFF, 0x1222_2223, 0x4555_5556, 0x8999_999A,
        0xBCCC_CCCD, 0xEEEE_EEEE];
    multibyteMul(aa[1..4], aa[1..4], 16, 0);
    assert(aa[0] == 0xF0FF_FFFF && aa[1] == 0x2222_2230 && aa[2]==0x5555_5561
        && aa[3]==0x9999_99A4 && aa[4]==0x0BCCC_CCCD);
}

/**
 * dest[] += src[] * multiplier + carry(0..FFFF_FFFF).
 * Returns carry out of MSB (0..FFFF_FFFF).
 */
uint multibyteMulAdd(char op)(uint [] dest, const(uint)[] src,
    uint multiplier, uint carry) pure
{
    assert(dest.length == src.length);
    ulong c = carry;
    for(size_t i = 0; i < src.length; ++i)
    {
        static if(op=='+')
        {
            c += cast(ulong)(multiplier) * src[i]  + dest[i];
            dest[i] = cast(uint)c;
            c >>= 32;
        }
        else
        {
            c += cast(ulong)multiplier * src[i];
            ulong t = cast(ulong)dest[i] - cast(uint)c;
            dest[i] = cast(uint)t;
            c = cast(uint)((c>>32) - (t>>32));
        }
    }
    return cast(uint)c;
}

unittest
{

    uint [] aa = [0xF0FF_FFFF, 0x1222_2223, 0x4555_5556, 0x8999_999A,
        0xBCCC_CCCD, 0xEEEE_EEEE];
    uint [] bb = [0x1234_1234, 0xF0F0_F0F0, 0x00C0_C0C0, 0xF0F0_F0F0,
        0xC0C0_C0C0];
    multibyteMulAdd!('+')(bb[1..$-1], aa[1..$-2], 16, 5);
    assert(bb[0] == 0x1234_1234 && bb[4] == 0xC0C0_C0C0);
    assert(bb[1] == 0x2222_2230 + 0xF0F0_F0F0 + 5
        && bb[2] == 0x5555_5561 + 0x00C0_C0C0 + 1
        && bb[3] == 0x9999_99A4 + 0xF0F0_F0F0 );
}


/**
   Sets result = result[0..left.length] + left * right

   It is defined in this way to allow cache-efficient multiplication.
   This function is equivalent to:
    ----
    for (size_t i = 0; i< right.length; ++i) {
        dest[left.length + i] = multibyteMulAdd(dest[i..left.length+i],
                left, right[i], 0);
    }
    ----
 */
void multibyteMultiplyAccumulate(uint [] dest, const(uint)[] left, const(uint)
		[] right) pure
{
    for (size_t i = 0; i < right.length; ++i)
    {
        dest[left.length + i] = multibyteMulAdd!('+')(dest[i..left.length+i],
                left, right[i], 0);
    }
}

/**  dest[] /= divisor.
 * overflow is the initial remainder, and must be in the range 0..divisor-1.
 */
uint multibyteDivAssign(uint [] dest, uint divisor, uint overflow) pure
{
    ulong c = cast(ulong)overflow;
    for(ptrdiff_t i = dest.length-1; i>= 0; --i)
    {
        c = (c<<32) + cast(ulong)(dest[i]);
        uint q = cast(uint)(c/divisor);
        c -= divisor * q;
        dest[i] = q;
    }
    return cast(uint)c;
}

unittest
{
    uint [] aa = new uint[101];
    for (uint i = 0; i < aa.length; ++i)
        aa[i] = 0x8765_4321 * (i+3);
    uint overflow = multibyteMul(aa, aa, 0x8EFD_FCFB, 0x33FF_7461);
    uint r = multibyteDivAssign(aa, 0x8EFD_FCFB, overflow);
    for (uint i=0; i<aa.length; ++i)
    {
        assert(aa[i] == 0x8765_4321 * (i+3));
    }
    assert(r == 0x33FF_7461);

}
// Set dest[2*i..2*i+1]+=src[i]*src[i]
void multibyteAddDiagonalSquares(uint[] dest, const(uint)[] src) pure
{
    ulong c = 0;
    for(size_t i = 0; i < src.length; ++i)
    {
        // At this point, c is 0 or 1, since FFFF*FFFF+FFFF_FFFF = 1_0000_0000.
        c += cast(ulong)(src[i]) * src[i] + dest[2*i];
        dest[2*i] = cast(uint)c;
        c = (c>>=32) + dest[2*i+1];
        dest[2*i+1] = cast(uint)c;
        c >>= 32;
    }
}

// Does half a square multiply. (square = diagonal + 2*triangle)
void multibyteTriangleAccumulate(uint[] dest, const(uint)[] x) pure
{
    // x[0]*x[1...$] + x[1]*x[2..$] + ... + x[$-2]x[$-1..$]
    dest[x.length] = multibyteMul(dest[1 .. x.length], x[1..$], x[0], 0);
    if (x.length < 4)
    {
        if (x.length == 3)
        {
            ulong c = cast(ulong)(x[$-1]) * x[$-2]  + dest[2*x.length-3];
            dest[2*x.length - 3] = cast(uint)c;
            c >>= 32;
            dest[2*x.length - 2] = cast(uint)c;
        }
        return;
    }
    for (size_t i = 2; i < x.length - 2; ++i)
    {
        dest[i-1+ x.length] = multibyteMulAdd!('+')(
             dest[i+i-1 .. i+x.length-1], x[i..$], x[i-1], 0);
    }
        // Unroll the last two entries, to reduce loop overhead:
    ulong  c = cast(ulong)(x[$-3]) * x[$-2] + dest[2*x.length-5];
    dest[2*x.length-5] = cast(uint)c;
    c >>= 32;
    c += cast(ulong)(x[$-3]) * x[$-1] + dest[2*x.length-4];
    dest[2*x.length-4] = cast(uint)c;
    c >>= 32;
    c += cast(ulong)(x[$-1]) * x[$-2];
    dest[2*x.length-3] = cast(uint)c;
    c >>= 32;
    dest[2*x.length-2] = cast(uint)c;
}

void multibyteSquare(BigDigit[] result, const(BigDigit) [] x) pure
{
    multibyteTriangleAccumulate(result, x);
    result[$-1] = multibyteShl(result[1..$-1], result[1..$-1], 1); // mul by 2
    result[0] = 0;
    multibyteAddDiagonalSquares(result, x);
}
Summary ✨

This D code implements a set of mathematical functions for efficient multiplication and division of large numbers, optimized for performance on microcontrollers with limited memory. It includes functions for multiplying two numbers, dividing one number by another, adding diagonal squares, accumulating triangles, and calculating squares. The code is designed to minimize memory usage and optimize execution speed.
Alerts (4)

Complexity hotspot; lines 246 to 249 (total complexity: 8)
246 247 248 249