PageRenderTime 105ms CodeModel.GetById 18ms app.highlight 74ms RepoModel.GetById 2ms app.codeStats 0ms

/arch/arm/nwfpe/softfloat.c

https://bitbucket.org/evzijst/gittest
C | 3443 lines | 2402 code | 265 blank | 776 comment | 983 complexity | 05b1f4fec436b37e94e664db23000f47 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1/*
   2===============================================================================
   3
   4This C source file is part of the SoftFloat IEC/IEEE Floating-point
   5Arithmetic Package, Release 2.
   6
   7Written by John R. Hauser.  This work was made possible in part by the
   8International Computer Science Institute, located at Suite 600, 1947 Center
   9Street, Berkeley, California 94704.  Funding was partially provided by the
  10National Science Foundation under grant MIP-9311980.  The original version
  11of this code was written as part of a project to build a fixed-point vector
  12processor in collaboration with the University of California at Berkeley,
  13overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  14is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  15arithmetic/softfloat.html'.
  16
  17THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  18has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  19TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  20PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  21AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  22
  23Derivative works are acceptable, even for commercial purposes, so long as
  24(1) they include prominent notice that the work is derivative, and (2) they
  25include prominent notice akin to these three paragraphs for those parts of
  26this code that are retained.
  27
  28===============================================================================
  29*/
  30
  31#include "fpa11.h"
  32//#include "milieu.h"
  33//#include "softfloat.h"
  34
  35/*
  36-------------------------------------------------------------------------------
  37Floating-point rounding mode, extended double-precision rounding precision,
  38and exception flags.
  39-------------------------------------------------------------------------------
  40*/
  41int8 float_rounding_mode = float_round_nearest_even;
  42int8 floatx80_rounding_precision = 80;
  43int8 float_exception_flags;
  44
  45/*
  46-------------------------------------------------------------------------------
  47Primitive arithmetic functions, including multi-word arithmetic, and
  48division and square root approximations.  (Can be specialized to target if
  49desired.)
  50-------------------------------------------------------------------------------
  51*/
  52#include "softfloat-macros"
  53
  54/*
  55-------------------------------------------------------------------------------
  56Functions and definitions to determine:  (1) whether tininess for underflow
  57is detected before or after rounding by default, (2) what (if anything)
  58happens when exceptions are raised, (3) how signaling NaNs are distinguished
  59from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
  60are propagated from function inputs to output.  These details are target-
  61specific.
  62-------------------------------------------------------------------------------
  63*/
  64#include "softfloat-specialize"
  65
  66/*
  67-------------------------------------------------------------------------------
  68Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
  69and 7, and returns the properly rounded 32-bit integer corresponding to the
  70input.  If `zSign' is nonzero, the input is negated before being converted
  71to an integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point
  72input is simply rounded to an integer, with the inexact exception raised if
  73the input cannot be represented exactly as an integer.  If the fixed-point
  74input is too large, however, the invalid exception is raised and the largest
  75positive or negative integer is returned.
  76-------------------------------------------------------------------------------
  77*/
  78static int32 roundAndPackInt32( flag zSign, bits64 absZ )
  79{
  80    int8 roundingMode;
  81    flag roundNearestEven;
  82    int8 roundIncrement, roundBits;
  83    int32 z;
  84
  85    roundingMode = float_rounding_mode;
  86    roundNearestEven = ( roundingMode == float_round_nearest_even );
  87    roundIncrement = 0x40;
  88    if ( ! roundNearestEven ) {
  89        if ( roundingMode == float_round_to_zero ) {
  90            roundIncrement = 0;
  91        }
  92        else {
  93            roundIncrement = 0x7F;
  94            if ( zSign ) {
  95                if ( roundingMode == float_round_up ) roundIncrement = 0;
  96            }
  97            else {
  98                if ( roundingMode == float_round_down ) roundIncrement = 0;
  99            }
 100        }
 101    }
 102    roundBits = absZ & 0x7F;
 103    absZ = ( absZ + roundIncrement )>>7;
 104    absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
 105    z = absZ;
 106    if ( zSign ) z = - z;
 107    if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
 108        float_exception_flags |= float_flag_invalid;
 109        return zSign ? 0x80000000 : 0x7FFFFFFF;
 110    }
 111    if ( roundBits ) float_exception_flags |= float_flag_inexact;
 112    return z;
 113
 114}
 115
 116/*
 117-------------------------------------------------------------------------------
 118Returns the fraction bits of the single-precision floating-point value `a'.
 119-------------------------------------------------------------------------------
 120*/
 121INLINE bits32 extractFloat32Frac( float32 a )
 122{
 123
 124    return a & 0x007FFFFF;
 125
 126}
 127
 128/*
 129-------------------------------------------------------------------------------
 130Returns the exponent bits of the single-precision floating-point value `a'.
 131-------------------------------------------------------------------------------
 132*/
 133INLINE int16 extractFloat32Exp( float32 a )
 134{
 135
 136    return ( a>>23 ) & 0xFF;
 137
 138}
 139
 140/*
 141-------------------------------------------------------------------------------
 142Returns the sign bit of the single-precision floating-point value `a'.
 143-------------------------------------------------------------------------------
 144*/
 145#if 0	/* in softfloat.h */
 146INLINE flag extractFloat32Sign( float32 a )
 147{
 148
 149    return a>>31;
 150
 151}
 152#endif
 153
 154/*
 155-------------------------------------------------------------------------------
 156Normalizes the subnormal single-precision floating-point value represented
 157by the denormalized significand `aSig'.  The normalized exponent and
 158significand are stored at the locations pointed to by `zExpPtr' and
 159`zSigPtr', respectively.
 160-------------------------------------------------------------------------------
 161*/
 162static void
 163 normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
 164{
 165    int8 shiftCount;
 166
 167    shiftCount = countLeadingZeros32( aSig ) - 8;
 168    *zSigPtr = aSig<<shiftCount;
 169    *zExpPtr = 1 - shiftCount;
 170
 171}
 172
 173/*
 174-------------------------------------------------------------------------------
 175Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
 176single-precision floating-point value, returning the result.  After being
 177shifted into the proper positions, the three fields are simply added
 178together to form the result.  This means that any integer portion of `zSig'
 179will be added into the exponent.  Since a properly normalized significand
 180will have an integer portion equal to 1, the `zExp' input should be 1 less
 181than the desired result exponent whenever `zSig' is a complete, normalized
 182significand.
 183-------------------------------------------------------------------------------
 184*/
 185INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
 186{
 187#if 0
 188   float32 f;
 189   __asm__("@ packFloat32				\n\
 190   	    mov %0, %1, asl #31				\n\
 191   	    orr %0, %2, asl #23				\n\
 192   	    orr %0, %3"
 193   	    : /* no outputs */
 194   	    : "g" (f), "g" (zSign), "g" (zExp), "g" (zSig)
 195   	    : "cc");
 196   return f;
 197#else
 198    return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
 199#endif 
 200}
 201
 202/*
 203-------------------------------------------------------------------------------
 204Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 205and significand `zSig', and returns the proper single-precision floating-
 206point value corresponding to the abstract input.  Ordinarily, the abstract
 207value is simply rounded and packed into the single-precision format, with
 208the inexact exception raised if the abstract input cannot be represented
 209exactly.  If the abstract value is too large, however, the overflow and
 210inexact exceptions are raised and an infinity or maximal finite value is
 211returned.  If the abstract value is too small, the input value is rounded to
 212a subnormal number, and the underflow and inexact exceptions are raised if
 213the abstract input cannot be represented exactly as a subnormal single-
 214precision floating-point number.
 215    The input significand `zSig' has its binary point between bits 30
 216and 29, which is 7 bits to the left of the usual location.  This shifted
 217significand must be normalized or smaller.  If `zSig' is not normalized,
 218`zExp' must be 0; in that case, the result returned is a subnormal number,
 219and it must not require rounding.  In the usual case that `zSig' is
 220normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
 221The handling of underflow and overflow follows the IEC/IEEE Standard for
 222Binary Floating-point Arithmetic.
 223-------------------------------------------------------------------------------
 224*/
 225static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
 226{
 227    int8 roundingMode;
 228    flag roundNearestEven;
 229    int8 roundIncrement, roundBits;
 230    flag isTiny;
 231
 232    roundingMode = float_rounding_mode;
 233    roundNearestEven = ( roundingMode == float_round_nearest_even );
 234    roundIncrement = 0x40;
 235    if ( ! roundNearestEven ) {
 236        if ( roundingMode == float_round_to_zero ) {
 237            roundIncrement = 0;
 238        }
 239        else {
 240            roundIncrement = 0x7F;
 241            if ( zSign ) {
 242                if ( roundingMode == float_round_up ) roundIncrement = 0;
 243            }
 244            else {
 245                if ( roundingMode == float_round_down ) roundIncrement = 0;
 246            }
 247        }
 248    }
 249    roundBits = zSig & 0x7F;
 250    if ( 0xFD <= (bits16) zExp ) {
 251        if (    ( 0xFD < zExp )
 252             || (    ( zExp == 0xFD )
 253                  && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
 254           ) {
 255            float_raise( float_flag_overflow | float_flag_inexact );
 256            return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
 257        }
 258        if ( zExp < 0 ) {
 259            isTiny =
 260                   ( float_detect_tininess == float_tininess_before_rounding )
 261                || ( zExp < -1 )
 262                || ( zSig + roundIncrement < 0x80000000 );
 263            shift32RightJamming( zSig, - zExp, &zSig );
 264            zExp = 0;
 265            roundBits = zSig & 0x7F;
 266            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
 267        }
 268    }
 269    if ( roundBits ) float_exception_flags |= float_flag_inexact;
 270    zSig = ( zSig + roundIncrement )>>7;
 271    zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
 272    if ( zSig == 0 ) zExp = 0;
 273    return packFloat32( zSign, zExp, zSig );
 274
 275}
 276
 277/*
 278-------------------------------------------------------------------------------
 279Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 280and significand `zSig', and returns the proper single-precision floating-
 281point value corresponding to the abstract input.  This routine is just like
 282`roundAndPackFloat32' except that `zSig' does not have to be normalized in
 283any way.  In all cases, `zExp' must be 1 less than the ``true'' floating-
 284point exponent.
 285-------------------------------------------------------------------------------
 286*/
 287static float32
 288 normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
 289{
 290    int8 shiftCount;
 291
 292    shiftCount = countLeadingZeros32( zSig ) - 1;
 293    return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
 294
 295}
 296
 297/*
 298-------------------------------------------------------------------------------
 299Returns the fraction bits of the double-precision floating-point value `a'.
 300-------------------------------------------------------------------------------
 301*/
 302INLINE bits64 extractFloat64Frac( float64 a )
 303{
 304
 305    return a & LIT64( 0x000FFFFFFFFFFFFF );
 306
 307}
 308
 309/*
 310-------------------------------------------------------------------------------
 311Returns the exponent bits of the double-precision floating-point value `a'.
 312-------------------------------------------------------------------------------
 313*/
 314INLINE int16 extractFloat64Exp( float64 a )
 315{
 316
 317    return ( a>>52 ) & 0x7FF;
 318
 319}
 320
 321/*
 322-------------------------------------------------------------------------------
 323Returns the sign bit of the double-precision floating-point value `a'.
 324-------------------------------------------------------------------------------
 325*/
 326#if 0	/* in softfloat.h */
 327INLINE flag extractFloat64Sign( float64 a )
 328{
 329
 330    return a>>63;
 331
 332}
 333#endif
 334
 335/*
 336-------------------------------------------------------------------------------
 337Normalizes the subnormal double-precision floating-point value represented
 338by the denormalized significand `aSig'.  The normalized exponent and
 339significand are stored at the locations pointed to by `zExpPtr' and
 340`zSigPtr', respectively.
 341-------------------------------------------------------------------------------
 342*/
 343static void
 344 normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
 345{
 346    int8 shiftCount;
 347
 348    shiftCount = countLeadingZeros64( aSig ) - 11;
 349    *zSigPtr = aSig<<shiftCount;
 350    *zExpPtr = 1 - shiftCount;
 351
 352}
 353
 354/*
 355-------------------------------------------------------------------------------
 356Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
 357double-precision floating-point value, returning the result.  After being
 358shifted into the proper positions, the three fields are simply added
 359together to form the result.  This means that any integer portion of `zSig'
 360will be added into the exponent.  Since a properly normalized significand
 361will have an integer portion equal to 1, the `zExp' input should be 1 less
 362than the desired result exponent whenever `zSig' is a complete, normalized
 363significand.
 364-------------------------------------------------------------------------------
 365*/
 366INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
 367{
 368
 369    return ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig;
 370
 371}
 372
 373/*
 374-------------------------------------------------------------------------------
 375Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 376and significand `zSig', and returns the proper double-precision floating-
 377point value corresponding to the abstract input.  Ordinarily, the abstract
 378value is simply rounded and packed into the double-precision format, with
 379the inexact exception raised if the abstract input cannot be represented
 380exactly.  If the abstract value is too large, however, the overflow and
 381inexact exceptions are raised and an infinity or maximal finite value is
 382returned.  If the abstract value is too small, the input value is rounded to
 383a subnormal number, and the underflow and inexact exceptions are raised if
 384the abstract input cannot be represented exactly as a subnormal double-
 385precision floating-point number.
 386    The input significand `zSig' has its binary point between bits 62
 387and 61, which is 10 bits to the left of the usual location.  This shifted
 388significand must be normalized or smaller.  If `zSig' is not normalized,
 389`zExp' must be 0; in that case, the result returned is a subnormal number,
 390and it must not require rounding.  In the usual case that `zSig' is
 391normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
 392The handling of underflow and overflow follows the IEC/IEEE Standard for
 393Binary Floating-point Arithmetic.
 394-------------------------------------------------------------------------------
 395*/
 396static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
 397{
 398    int8 roundingMode;
 399    flag roundNearestEven;
 400    int16 roundIncrement, roundBits;
 401    flag isTiny;
 402
 403    roundingMode = float_rounding_mode;
 404    roundNearestEven = ( roundingMode == float_round_nearest_even );
 405    roundIncrement = 0x200;
 406    if ( ! roundNearestEven ) {
 407        if ( roundingMode == float_round_to_zero ) {
 408            roundIncrement = 0;
 409        }
 410        else {
 411            roundIncrement = 0x3FF;
 412            if ( zSign ) {
 413                if ( roundingMode == float_round_up ) roundIncrement = 0;
 414            }
 415            else {
 416                if ( roundingMode == float_round_down ) roundIncrement = 0;
 417            }
 418        }
 419    }
 420    roundBits = zSig & 0x3FF;
 421    if ( 0x7FD <= (bits16) zExp ) {
 422        if (    ( 0x7FD < zExp )
 423             || (    ( zExp == 0x7FD )
 424                  && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
 425           ) {
 426            //register int lr = __builtin_return_address(0);
 427            //printk("roundAndPackFloat64 called from 0x%08x\n",lr);
 428            float_raise( float_flag_overflow | float_flag_inexact );
 429            return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );
 430        }
 431        if ( zExp < 0 ) {
 432            isTiny =
 433                   ( float_detect_tininess == float_tininess_before_rounding )
 434                || ( zExp < -1 )
 435                || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
 436            shift64RightJamming( zSig, - zExp, &zSig );
 437            zExp = 0;
 438            roundBits = zSig & 0x3FF;
 439            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
 440        }
 441    }
 442    if ( roundBits ) float_exception_flags |= float_flag_inexact;
 443    zSig = ( zSig + roundIncrement )>>10;
 444    zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
 445    if ( zSig == 0 ) zExp = 0;
 446    return packFloat64( zSign, zExp, zSig );
 447
 448}
 449
 450/*
 451-------------------------------------------------------------------------------
 452Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 453and significand `zSig', and returns the proper double-precision floating-
 454point value corresponding to the abstract input.  This routine is just like
 455`roundAndPackFloat64' except that `zSig' does not have to be normalized in
 456any way.  In all cases, `zExp' must be 1 less than the ``true'' floating-
 457point exponent.
 458-------------------------------------------------------------------------------
 459*/
 460static float64
 461 normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
 462{
 463    int8 shiftCount;
 464
 465    shiftCount = countLeadingZeros64( zSig ) - 1;
 466    return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
 467
 468}
 469
 470#ifdef FLOATX80
 471
 472/*
 473-------------------------------------------------------------------------------
 474Returns the fraction bits of the extended double-precision floating-point
 475value `a'.
 476-------------------------------------------------------------------------------
 477*/
 478INLINE bits64 extractFloatx80Frac( floatx80 a )
 479{
 480
 481    return a.low;
 482
 483}
 484
 485/*
 486-------------------------------------------------------------------------------
 487Returns the exponent bits of the extended double-precision floating-point
 488value `a'.
 489-------------------------------------------------------------------------------
 490*/
 491INLINE int32 extractFloatx80Exp( floatx80 a )
 492{
 493
 494    return a.high & 0x7FFF;
 495
 496}
 497
 498/*
 499-------------------------------------------------------------------------------
 500Returns the sign bit of the extended double-precision floating-point value
 501`a'.
 502-------------------------------------------------------------------------------
 503*/
 504INLINE flag extractFloatx80Sign( floatx80 a )
 505{
 506
 507    return a.high>>15;
 508
 509}
 510
 511/*
 512-------------------------------------------------------------------------------
 513Normalizes the subnormal extended double-precision floating-point value
 514represented by the denormalized significand `aSig'.  The normalized exponent
 515and significand are stored at the locations pointed to by `zExpPtr' and
 516`zSigPtr', respectively.
 517-------------------------------------------------------------------------------
 518*/
 519static void
 520 normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
 521{
 522    int8 shiftCount;
 523
 524    shiftCount = countLeadingZeros64( aSig );
 525    *zSigPtr = aSig<<shiftCount;
 526    *zExpPtr = 1 - shiftCount;
 527
 528}
 529
 530/*
 531-------------------------------------------------------------------------------
 532Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
 533extended double-precision floating-point value, returning the result.
 534-------------------------------------------------------------------------------
 535*/
 536INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
 537{
 538    floatx80 z;
 539
 540    z.low = zSig;
 541    z.high = ( ( (bits16) zSign )<<15 ) + zExp;
 542    return z;
 543
 544}
 545
 546/*
 547-------------------------------------------------------------------------------
 548Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 549and extended significand formed by the concatenation of `zSig0' and `zSig1',
 550and returns the proper extended double-precision floating-point value
 551corresponding to the abstract input.  Ordinarily, the abstract value is
 552rounded and packed into the extended double-precision format, with the
 553inexact exception raised if the abstract input cannot be represented
 554exactly.  If the abstract value is too large, however, the overflow and
 555inexact exceptions are raised and an infinity or maximal finite value is
 556returned.  If the abstract value is too small, the input value is rounded to
 557a subnormal number, and the underflow and inexact exceptions are raised if
 558the abstract input cannot be represented exactly as a subnormal extended
 559double-precision floating-point number.
 560    If `roundingPrecision' is 32 or 64, the result is rounded to the same
 561number of bits as single or double precision, respectively.  Otherwise, the
 562result is rounded to the full precision of the extended double-precision
 563format.
 564    The input significand must be normalized or smaller.  If the input
 565significand is not normalized, `zExp' must be 0; in that case, the result
 566returned is a subnormal number, and it must not require rounding.  The
 567handling of underflow and overflow follows the IEC/IEEE Standard for Binary
 568Floating-point Arithmetic.
 569-------------------------------------------------------------------------------
 570*/
 571static floatx80
 572 roundAndPackFloatx80(
 573     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
 574 )
 575{
 576    int8 roundingMode;
 577    flag roundNearestEven, increment, isTiny;
 578    int64 roundIncrement, roundMask, roundBits;
 579
 580    roundingMode = float_rounding_mode;
 581    roundNearestEven = ( roundingMode == float_round_nearest_even );
 582    if ( roundingPrecision == 80 ) goto precision80;
 583    if ( roundingPrecision == 64 ) {
 584        roundIncrement = LIT64( 0x0000000000000400 );
 585        roundMask = LIT64( 0x00000000000007FF );
 586    }
 587    else if ( roundingPrecision == 32 ) {
 588        roundIncrement = LIT64( 0x0000008000000000 );
 589        roundMask = LIT64( 0x000000FFFFFFFFFF );
 590    }
 591    else {
 592        goto precision80;
 593    }
 594    zSig0 |= ( zSig1 != 0 );
 595    if ( ! roundNearestEven ) {
 596        if ( roundingMode == float_round_to_zero ) {
 597            roundIncrement = 0;
 598        }
 599        else {
 600            roundIncrement = roundMask;
 601            if ( zSign ) {
 602                if ( roundingMode == float_round_up ) roundIncrement = 0;
 603            }
 604            else {
 605                if ( roundingMode == float_round_down ) roundIncrement = 0;
 606            }
 607        }
 608    }
 609    roundBits = zSig0 & roundMask;
 610    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
 611        if (    ( 0x7FFE < zExp )
 612             || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
 613           ) {
 614            goto overflow;
 615        }
 616        if ( zExp <= 0 ) {
 617            isTiny =
 618                   ( float_detect_tininess == float_tininess_before_rounding )
 619                || ( zExp < 0 )
 620                || ( zSig0 <= zSig0 + roundIncrement );
 621            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
 622            zExp = 0;
 623            roundBits = zSig0 & roundMask;
 624            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
 625            if ( roundBits ) float_exception_flags |= float_flag_inexact;
 626            zSig0 += roundIncrement;
 627            if ( (sbits64) zSig0 < 0 ) zExp = 1;
 628            roundIncrement = roundMask + 1;
 629            if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
 630                roundMask |= roundIncrement;
 631            }
 632            zSig0 &= ~ roundMask;
 633            return packFloatx80( zSign, zExp, zSig0 );
 634        }
 635    }
 636    if ( roundBits ) float_exception_flags |= float_flag_inexact;
 637    zSig0 += roundIncrement;
 638    if ( zSig0 < roundIncrement ) {
 639        ++zExp;
 640        zSig0 = LIT64( 0x8000000000000000 );
 641    }
 642    roundIncrement = roundMask + 1;
 643    if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
 644        roundMask |= roundIncrement;
 645    }
 646    zSig0 &= ~ roundMask;
 647    if ( zSig0 == 0 ) zExp = 0;
 648    return packFloatx80( zSign, zExp, zSig0 );
 649 precision80:
 650    increment = ( (sbits64) zSig1 < 0 );
 651    if ( ! roundNearestEven ) {
 652        if ( roundingMode == float_round_to_zero ) {
 653            increment = 0;
 654        }
 655        else {
 656            if ( zSign ) {
 657                increment = ( roundingMode == float_round_down ) && zSig1;
 658            }
 659            else {
 660                increment = ( roundingMode == float_round_up ) && zSig1;
 661            }
 662        }
 663    }
 664    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
 665        if (    ( 0x7FFE < zExp )
 666             || (    ( zExp == 0x7FFE )
 667                  && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
 668                  && increment
 669                )
 670           ) {
 671            roundMask = 0;
 672 overflow:
 673            float_raise( float_flag_overflow | float_flag_inexact );
 674            if (    ( roundingMode == float_round_to_zero )
 675                 || ( zSign && ( roundingMode == float_round_up ) )
 676                 || ( ! zSign && ( roundingMode == float_round_down ) )
 677               ) {
 678                return packFloatx80( zSign, 0x7FFE, ~ roundMask );
 679            }
 680            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
 681        }
 682        if ( zExp <= 0 ) {
 683            isTiny =
 684                   ( float_detect_tininess == float_tininess_before_rounding )
 685                || ( zExp < 0 )
 686                || ! increment
 687                || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
 688            shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
 689            zExp = 0;
 690            if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
 691            if ( zSig1 ) float_exception_flags |= float_flag_inexact;
 692            if ( roundNearestEven ) {
 693                increment = ( (sbits64) zSig1 < 0 );
 694            }
 695            else {
 696                if ( zSign ) {
 697                    increment = ( roundingMode == float_round_down ) && zSig1;
 698                }
 699                else {
 700                    increment = ( roundingMode == float_round_up ) && zSig1;
 701                }
 702            }
 703            if ( increment ) {
 704                ++zSig0;
 705                zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
 706                if ( (sbits64) zSig0 < 0 ) zExp = 1;
 707            }
 708            return packFloatx80( zSign, zExp, zSig0 );
 709        }
 710    }
 711    if ( zSig1 ) float_exception_flags |= float_flag_inexact;
 712    if ( increment ) {
 713        ++zSig0;
 714        if ( zSig0 == 0 ) {
 715            ++zExp;
 716            zSig0 = LIT64( 0x8000000000000000 );
 717        }
 718        else {
 719            zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
 720        }
 721    }
 722    else {
 723        if ( zSig0 == 0 ) zExp = 0;
 724    }
 725    
 726    return packFloatx80( zSign, zExp, zSig0 );
 727}
 728
 729/*
 730-------------------------------------------------------------------------------
 731Takes an abstract floating-point value having sign `zSign', exponent
 732`zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
 733and returns the proper extended double-precision floating-point value
 734corresponding to the abstract input.  This routine is just like
 735`roundAndPackFloatx80' except that the input significand does not have to be
 736normalized.
 737-------------------------------------------------------------------------------
 738*/
 739static floatx80
 740 normalizeRoundAndPackFloatx80(
 741     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
 742 )
 743{
 744    int8 shiftCount;
 745
 746    if ( zSig0 == 0 ) {
 747        zSig0 = zSig1;
 748        zSig1 = 0;
 749        zExp -= 64;
 750    }
 751    shiftCount = countLeadingZeros64( zSig0 );
 752    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
 753    zExp -= shiftCount;
 754    return
 755        roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
 756
 757}
 758
 759#endif
 760
 761/*
 762-------------------------------------------------------------------------------
 763Returns the result of converting the 32-bit two's complement integer `a' to
 764the single-precision floating-point format.  The conversion is performed
 765according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
 766-------------------------------------------------------------------------------
 767*/
 768float32 int32_to_float32( int32 a )
 769{
 770    flag zSign;
 771
 772    if ( a == 0 ) return 0;
 773    if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
 774    zSign = ( a < 0 );
 775    return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
 776
 777}
 778
 779/*
 780-------------------------------------------------------------------------------
 781Returns the result of converting the 32-bit two's complement integer `a' to
 782the double-precision floating-point format.  The conversion is performed
 783according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
 784-------------------------------------------------------------------------------
 785*/
 786float64 int32_to_float64( int32 a )
 787{
 788    flag aSign;
 789    uint32 absA;
 790    int8 shiftCount;
 791    bits64 zSig;
 792
 793    if ( a == 0 ) return 0;
 794    aSign = ( a < 0 );
 795    absA = aSign ? - a : a;
 796    shiftCount = countLeadingZeros32( absA ) + 21;
 797    zSig = absA;
 798    return packFloat64( aSign, 0x432 - shiftCount, zSig<<shiftCount );
 799
 800}
 801
 802#ifdef FLOATX80
 803
 804/*
 805-------------------------------------------------------------------------------
 806Returns the result of converting the 32-bit two's complement integer `a'
 807to the extended double-precision floating-point format.  The conversion
 808is performed according to the IEC/IEEE Standard for Binary Floating-point
 809Arithmetic.
 810-------------------------------------------------------------------------------
 811*/
 812floatx80 int32_to_floatx80( int32 a )
 813{
 814    flag zSign;
 815    uint32 absA;
 816    int8 shiftCount;
 817    bits64 zSig;
 818
 819    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
 820    zSign = ( a < 0 );
 821    absA = zSign ? - a : a;
 822    shiftCount = countLeadingZeros32( absA ) + 32;
 823    zSig = absA;
 824    return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
 825
 826}
 827
 828#endif
 829
 830/*
 831-------------------------------------------------------------------------------
 832Returns the result of converting the single-precision floating-point value
 833`a' to the 32-bit two's complement integer format.  The conversion is
 834performed according to the IEC/IEEE Standard for Binary Floating-point
 835Arithmetic---which means in particular that the conversion is rounded
 836according to the current rounding mode.  If `a' is a NaN, the largest
 837positive integer is returned.  Otherwise, if the conversion overflows, the
 838largest integer with the same sign as `a' is returned.
 839-------------------------------------------------------------------------------
 840*/
 841int32 float32_to_int32( float32 a )
 842{
 843    flag aSign;
 844    int16 aExp, shiftCount;
 845    bits32 aSig;
 846    bits64 zSig;
 847
 848    aSig = extractFloat32Frac( a );
 849    aExp = extractFloat32Exp( a );
 850    aSign = extractFloat32Sign( a );
 851    if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
 852    if ( aExp ) aSig |= 0x00800000;
 853    shiftCount = 0xAF - aExp;
 854    zSig = aSig;
 855    zSig <<= 32;
 856    if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig );
 857    return roundAndPackInt32( aSign, zSig );
 858
 859}
 860
 861/*
 862-------------------------------------------------------------------------------
 863Returns the result of converting the single-precision floating-point value
 864`a' to the 32-bit two's complement integer format.  The conversion is
 865performed according to the IEC/IEEE Standard for Binary Floating-point
 866Arithmetic, except that the conversion is always rounded toward zero.  If
 867`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
 868conversion overflows, the largest integer with the same sign as `a' is
 869returned.
 870-------------------------------------------------------------------------------
 871*/
 872int32 float32_to_int32_round_to_zero( float32 a )
 873{
 874    flag aSign;
 875    int16 aExp, shiftCount;
 876    bits32 aSig;
 877    int32 z;
 878
 879    aSig = extractFloat32Frac( a );
 880    aExp = extractFloat32Exp( a );
 881    aSign = extractFloat32Sign( a );
 882    shiftCount = aExp - 0x9E;
 883    if ( 0 <= shiftCount ) {
 884        if ( a == 0xCF000000 ) return 0x80000000;
 885        float_raise( float_flag_invalid );
 886        if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
 887        return 0x80000000;
 888    }
 889    else if ( aExp <= 0x7E ) {
 890        if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
 891        return 0;
 892    }
 893    aSig = ( aSig | 0x00800000 )<<8;
 894    z = aSig>>( - shiftCount );
 895    if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
 896        float_exception_flags |= float_flag_inexact;
 897    }
 898    return aSign ? - z : z;
 899
 900}
 901
 902/*
 903-------------------------------------------------------------------------------
 904Returns the result of converting the single-precision floating-point value
 905`a' to the double-precision floating-point format.  The conversion is
 906performed according to the IEC/IEEE Standard for Binary Floating-point
 907Arithmetic.
 908-------------------------------------------------------------------------------
 909*/
 910float64 float32_to_float64( float32 a )
 911{
 912    flag aSign;
 913    int16 aExp;
 914    bits32 aSig;
 915
 916    aSig = extractFloat32Frac( a );
 917    aExp = extractFloat32Exp( a );
 918    aSign = extractFloat32Sign( a );
 919    if ( aExp == 0xFF ) {
 920        if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
 921        return packFloat64( aSign, 0x7FF, 0 );
 922    }
 923    if ( aExp == 0 ) {
 924        if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
 925        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
 926        --aExp;
 927    }
 928    return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
 929
 930}
 931
 932#ifdef FLOATX80
 933
 934/*
 935-------------------------------------------------------------------------------
 936Returns the result of converting the single-precision floating-point value
 937`a' to the extended double-precision floating-point format.  The conversion
 938is performed according to the IEC/IEEE Standard for Binary Floating-point
 939Arithmetic.
 940-------------------------------------------------------------------------------
 941*/
 942floatx80 float32_to_floatx80( float32 a )
 943{
 944    flag aSign;
 945    int16 aExp;
 946    bits32 aSig;
 947
 948    aSig = extractFloat32Frac( a );
 949    aExp = extractFloat32Exp( a );
 950    aSign = extractFloat32Sign( a );
 951    if ( aExp == 0xFF ) {
 952        if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
 953        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
 954    }
 955    if ( aExp == 0 ) {
 956        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
 957        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
 958    }
 959    aSig |= 0x00800000;
 960    return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
 961
 962}
 963
 964#endif
 965
 966/*
 967-------------------------------------------------------------------------------
 968Rounds the single-precision floating-point value `a' to an integer, and
 969returns the result as a single-precision floating-point value.  The
 970operation is performed according to the IEC/IEEE Standard for Binary
 971Floating-point Arithmetic.
 972-------------------------------------------------------------------------------
 973*/
 974float32 float32_round_to_int( float32 a )
 975{
 976    flag aSign;
 977    int16 aExp;
 978    bits32 lastBitMask, roundBitsMask;
 979    int8 roundingMode;
 980    float32 z;
 981
 982    aExp = extractFloat32Exp( a );
 983    if ( 0x96 <= aExp ) {
 984        if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
 985            return propagateFloat32NaN( a, a );
 986        }
 987        return a;
 988    }
 989    if ( aExp <= 0x7E ) {
 990        if ( (bits32) ( a<<1 ) == 0 ) return a;
 991        float_exception_flags |= float_flag_inexact;
 992        aSign = extractFloat32Sign( a );
 993        switch ( float_rounding_mode ) {
 994         case float_round_nearest_even:
 995            if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
 996                return packFloat32( aSign, 0x7F, 0 );
 997            }
 998            break;
 999         case float_round_down:
1000            return aSign ? 0xBF800000 : 0;
1001         case float_round_up:
1002            return aSign ? 0x80000000 : 0x3F800000;
1003        }
1004        return packFloat32( aSign, 0, 0 );
1005    }
1006    lastBitMask = 1;
1007    lastBitMask <<= 0x96 - aExp;
1008    roundBitsMask = lastBitMask - 1;
1009    z = a;
1010    roundingMode = float_rounding_mode;
1011    if ( roundingMode == float_round_nearest_even ) {
1012        z += lastBitMask>>1;
1013        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1014    }
1015    else if ( roundingMode != float_round_to_zero ) {
1016        if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
1017            z += roundBitsMask;
1018        }
1019    }
1020    z &= ~ roundBitsMask;
1021    if ( z != a ) float_exception_flags |= float_flag_inexact;
1022    return z;
1023
1024}
1025
1026/*
1027-------------------------------------------------------------------------------
1028Returns the result of adding the absolute values of the single-precision
1029floating-point values `a' and `b'.  If `zSign' is true, the sum is negated
1030before being returned.  `zSign' is ignored if the result is a NaN.  The
1031addition is performed according to the IEC/IEEE Standard for Binary
1032Floating-point Arithmetic.
1033-------------------------------------------------------------------------------
1034*/
1035static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
1036{
1037    int16 aExp, bExp, zExp;
1038    bits32 aSig, bSig, zSig;
1039    int16 expDiff;
1040
1041    aSig = extractFloat32Frac( a );
1042    aExp = extractFloat32Exp( a );
1043    bSig = extractFloat32Frac( b );
1044    bExp = extractFloat32Exp( b );
1045    expDiff = aExp - bExp;
1046    aSig <<= 6;
1047    bSig <<= 6;
1048    if ( 0 < expDiff ) {
1049        if ( aExp == 0xFF ) {
1050            if ( aSig ) return propagateFloat32NaN( a, b );
1051            return a;
1052        }
1053        if ( bExp == 0 ) {
1054            --expDiff;
1055        }
1056        else {
1057            bSig |= 0x20000000;
1058        }
1059        shift32RightJamming( bSig, expDiff, &bSig );
1060        zExp = aExp;
1061    }
1062    else if ( expDiff < 0 ) {
1063        if ( bExp == 0xFF ) {
1064            if ( bSig ) return propagateFloat32NaN( a, b );
1065            return packFloat32( zSign, 0xFF, 0 );
1066        }
1067        if ( aExp == 0 ) {
1068            ++expDiff;
1069        }
1070        else {
1071            aSig |= 0x20000000;
1072        }
1073        shift32RightJamming( aSig, - expDiff, &aSig );
1074        zExp = bExp;
1075    }
1076    else {
1077        if ( aExp == 0xFF ) {
1078            if ( aSig | bSig ) return propagateFloat32NaN( a, b );
1079            return a;
1080        }
1081        if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
1082        zSig = 0x40000000 + aSig + bSig;
1083        zExp = aExp;
1084        goto roundAndPack;
1085    }
1086    aSig |= 0x20000000;
1087    zSig = ( aSig + bSig )<<1;
1088    --zExp;
1089    if ( (sbits32) zSig < 0 ) {
1090        zSig = aSig + bSig;
1091        ++zExp;
1092    }
1093 roundAndPack:
1094    return roundAndPackFloat32( zSign, zExp, zSig );
1095
1096}
1097
1098/*
1099-------------------------------------------------------------------------------
1100Returns the result of subtracting the absolute values of the single-
1101precision floating-point values `a' and `b'.  If `zSign' is true, the
1102difference is negated before being returned.  `zSign' is ignored if the
1103result is a NaN.  The subtraction is performed according to the IEC/IEEE
1104Standard for Binary Floating-point Arithmetic.
1105-------------------------------------------------------------------------------
1106*/
1107static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
1108{
1109    int16 aExp, bExp, zExp;
1110    bits32 aSig, bSig, zSig;
1111    int16 expDiff;
1112
1113    aSig = extractFloat32Frac( a );
1114    aExp = extractFloat32Exp( a );
1115    bSig = extractFloat32Frac( b );
1116    bExp = extractFloat32Exp( b );
1117    expDiff = aExp - bExp;
1118    aSig <<= 7;
1119    bSig <<= 7;
1120    if ( 0 < expDiff ) goto aExpBigger;
1121    if ( expDiff < 0 ) goto bExpBigger;
1122    if ( aExp == 0xFF ) {
1123        if ( aSig | bSig ) return propagateFloat32NaN( a, b );
1124        float_raise( float_flag_invalid );
1125        return float32_default_nan;
1126    }
1127    if ( aExp == 0 ) {
1128        aExp = 1;
1129        bExp = 1;
1130    }
1131    if ( bSig < aSig ) goto aBigger;
1132    if ( aSig < bSig ) goto bBigger;
1133    return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
1134 bExpBigger:
1135    if ( bExp == 0xFF ) {
1136        if ( bSig ) return propagateFloat32NaN( a, b );
1137        return packFloat32( zSign ^ 1, 0xFF, 0 );
1138    }
1139    if ( aExp == 0 ) {
1140        ++expDiff;
1141    }
1142    else {
1143        aSig |= 0x40000000;
1144    }
1145    shift32RightJamming( aSig, - expDiff, &aSig );
1146    bSig |= 0x40000000;
1147 bBigger:
1148    zSig = bSig - aSig;
1149    zExp = bExp;
1150    zSign ^= 1;
1151    goto normalizeRoundAndPack;
1152 aExpBigger:
1153    if ( aExp == 0xFF ) {
1154        if ( aSig ) return propagateFloat32NaN( a, b );
1155        return a;
1156    }
1157    if ( bExp == 0 ) {
1158        --expDiff;
1159    }
1160    else {
1161        bSig |= 0x40000000;
1162    }
1163    shift32RightJamming( bSig, expDiff, &bSig );
1164    aSig |= 0x40000000;
1165 aBigger:
1166    zSig = aSig - bSig;
1167    zExp = aExp;
1168 normalizeRoundAndPack:
1169    --zExp;
1170    return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
1171
1172}
1173
1174/*
1175-------------------------------------------------------------------------------
1176Returns the result of adding the single-precision floating-point values `a'
1177and `b'.  The operation is performed according to the IEC/IEEE Standard for
1178Binary Floating-point Arithmetic.
1179-------------------------------------------------------------------------------
1180*/
1181float32 float32_add( float32 a, float32 b )
1182{
1183    flag aSign, bSign;
1184
1185    aSign = extractFloat32Sign( a );
1186    bSign = extractFloat32Sign( b );
1187    if ( aSign == bSign ) {
1188        return addFloat32Sigs( a, b, aSign );
1189    }
1190    else {
1191        return subFloat32Sigs( a, b, aSign );
1192    }
1193
1194}
1195
1196/*
1197-------------------------------------------------------------------------------
1198Returns the result of subtracting the single-precision floating-point values
1199`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1200for Binary Floating-point Arithmetic.
1201-------------------------------------------------------------------------------
1202*/
1203float32 float32_sub( float32 a, float32 b )
1204{
1205    flag aSign, bSign;
1206
1207    aSign = extractFloat32Sign( a );
1208    bSign = extractFloat32Sign( b );
1209    if ( aSign == bSign ) {
1210        return subFloat32Sigs( a, b, aSign );
1211    }
1212    else {
1213        return addFloat32Sigs( a, b, aSign );
1214    }
1215
1216}
1217
1218/*
1219-------------------------------------------------------------------------------
1220Returns the result of multiplying the single-precision floating-point values
1221`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1222for Binary Floating-point Arithmetic.
1223-------------------------------------------------------------------------------
1224*/
1225float32 float32_mul( float32 a, float32 b )
1226{
1227    flag aSign, bSign, zSign;
1228    int16 aExp, bExp, zExp;
1229    bits32 aSig, bSig;
1230    bits64 zSig64;
1231    bits32 zSig;
1232
1233    aSig = extractFloat32Frac( a );
1234    aExp = extractFloat32Exp( a );
1235    aSign = extractFloat32Sign( a );
1236    bSig = extractFloat32Frac( b );
1237    bExp = extractFloat32Exp( b );
1238    bSign = extractFloat32Sign( b );
1239    zSign = aSign ^ bSign;
1240    if ( aExp == 0xFF ) {
1241        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1242            return propagateFloat32NaN( a, b );
1243        }
1244        if ( ( bExp | bSig ) == 0 ) {
1245            float_raise( float_flag_invalid );
1246            return float32_default_nan;
1247        }
1248        return packFloat32( zSign, 0xFF, 0 );
1249    }
1250    if ( bExp == 0xFF ) {
1251        if ( bSig ) return propagateFloat32NaN( a, b );
1252        if ( ( aExp | aSig ) == 0 ) {
1253            float_raise( float_flag_invalid );
1254            return float32_default_nan;
1255        }
1256        return packFloat32( zSign, 0xFF, 0 );
1257    }
1258    if ( aExp == 0 ) {
1259        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1260        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1261    }
1262    if ( bExp == 0 ) {
1263        if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1264        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1265    }
1266    zExp = aExp + bExp - 0x7F;
1267    aSig = ( aSig | 0x00800000 )<<7;
1268    bSig = ( bSig | 0x00800000 )<<8;
1269    shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
1270    zSig = zSig64;
1271    if ( 0 <= (sbits32) ( zSig<<1 ) ) {
1272        zSig <<= 1;
1273        --zExp;
1274    }
1275    return roundAndPackFloat32( zSign, zExp, zSig );
1276
1277}
1278
1279/*
1280-------------------------------------------------------------------------------
1281Returns the result of dividing the single-precision floating-point value `a'
1282by the corresponding value `b'.  The operation is performed according to the
1283IEC/IEEE Standard for Binary Floating-point Arithmetic.
1284-------------------------------------------------------------------------------
1285*/
1286float32 float32_div( float32 a, float32 b )
1287{
1288    flag aSign, bSign, zSign;
1289    int16 aExp, bExp, zExp;
1290    bits32 aSig, bSig, zSig;
1291
1292    aSig = extractFloat32Frac( a );
1293    aExp = extractFloat32Exp( a );
1294    aSign = extractFloat32Sign( a );
1295    bSig = extractFloat32Frac( b );
1296    bExp = extractFloat32Exp( b );
1297    bSign = extractFloat32Sign( b );
1298    zSign = aSign ^ bSign;
1299    if ( aExp == 0xFF ) {
1300        if ( aSig ) return propagateFloat32NaN( a, b );
1301        if ( bExp == 0xFF ) {
1302            if ( bSig ) return propagateFloat32NaN( a, b );
1303            float_raise( float_flag_invalid );
1304            return float32_default_nan;
1305        }
1306        return packFloat32( zSign, 0xFF, 0 );
1307    }
1308    if ( bExp == 0xFF ) {
1309        if ( bSig ) return propagateFloat32NaN( a, b );
1310        return packFloat32( zSign, 0, 0 );
1311    }
1312    if ( bExp == 0 ) {
1313        if ( bSig == 0 ) {
1314            if ( ( aExp | aSig ) == 0 ) {
1315                float_raise( float_flag_invalid );
1316                return float32_default_nan;
1317            }
1318            float_raise( float_flag_divbyzero );
1319            return packFloat32( zSign, 0xFF, 0 );
1320        }
1321        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1322    }
1323    if ( aExp == 0 ) {
1324        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1325        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1326    }
1327    zExp = aExp - bExp + 0x7D;
1328    aSig = ( aSig | 0x00800000 )<<7;
1329    bSig = ( bSig | 0x00800000 )<<8;
1330    if ( bSig <= ( aSig + aSig ) ) {
1331        aSig >>= 1;
1332        ++zExp;
1333    }
1334    zSig = ( ( (bits64) aSig )<<32 ) / bSig;
1335    if ( ( zSig & 0x3F ) == 0 ) {
1336        zSig |= ( ( (bits64) bSig ) * zSig != ( (bits64) aSig )<<32 );
1337    }
1338    return roundAndPackFloat32( zSign, zExp, zSig );
1339
1340}
1341
1342/*
1343-------------------------------------------------------------------------------
1344Returns the remainder of the single-precision floating-point value `a'
1345with respect to the corresponding value `b'.  The operation is performed
1346according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
1347-------------------------------------------------------------------------------
1348*/
1349float32 float32_rem( float32 a, float32 b )
1350{
1351    flag aSign, bSign, zSign;
1352    int16 aExp, bExp, expDiff;
1353    bits32 aSig, bSig;
1354    bits32 q;
1355    bits64 aSig64, bSig64, q64;
1356    bits32 alternateASig;
1357    sbits32 sigMean;
1358
1359    aSig = extractFloat32Frac( a );
1360    aExp = extractFloat32Exp( a );
1361    aSign = extractFloat32Sign( a );
1362    bSig = extractFloat32Frac( b );
1363    bExp = extractFloat32Exp( b );
1364    bSign = extractFloat32Sign( b );
1365    if ( aExp == 0xFF ) {
1366        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1367            return propagateFloat32NaN( a, b );
1368        }
1369        float_raise( float_flag_invalid );
1370        return float32_default_nan;
1371    }
1372    if ( bExp == 0xFF ) {
1373        if ( bSig ) return propagateFloat32NaN( a, b );
1374        return a;
1375    }
1376    if ( bExp == 0 ) {
1377        if ( bSig == 0 ) {
1378            float_raise( float_flag_invalid );
1379            return float32_default_nan;
1380        }
1381        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1382    }
1383    if ( aExp == 0 ) {
1384        if ( aSig == 0 ) return a;
1385        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1386    }
1387    expDiff = aExp - bExp;
1388    aSig |= 0x00800000;
1389    bSig |= 0x00800000;
1390    if ( expDiff < 32 ) {
1391        aSig <<= 8;
1392        bSig <<= 8;
1393        if ( expDiff < 0 ) {
1394            if ( expDiff < -1 ) return a;
1395            aSig >>= 1;
1396        }
1397        q = ( bSig <= aSig );
1398        if ( q ) aSig -= bSig;
1399        if ( 0 < expDiff ) {
1400            q = ( ( (bits64) aSig )<<32 ) / bSig;
1401            q >>= 32 - expDiff;
1402            bSig >>= 2;
1403            aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
1404        }
1405        else {
1406            aSig >>= 2;
1407            bSig >>= 2;
1408        }
1409    }
1410    else {
1411        if ( bSig <= aSig ) aSig -= bSig;
1412        aSig64 = ( (bits64) aSig )<<40;
1413        bSig64 = ( (bits64) bSig )<<40;
1414        expDiff -= 64;
1415        while ( 0 < expDiff ) {
1416            q64 = estimateDiv128To64( aSig64, 0, bSig64 );
1417            q64 = ( 2 < q64 ) ? q64 - 2 : 0;
1418            aSig64 = - ( ( bSig * q64 )<<38 );
1419            expDiff -= 62;
1420        }
1421        expDiff += 64;
1422        q64 = estimateDiv128To64( aSig64, 0, bSig64 );
1423        q64 = ( 2 < q64 ) ? q64 - 2 : 0;
1424        q = q64>>( 64 - expDiff );
1425        bSig <<= 6;
1426        aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
1427    }
1428    do {
1429        alternateASig = aSig;
1430        ++q;
1431        aSig -= bSig;
1432    } while ( 0 <= (sbits32) aSig );
1433    sigMean = aSig + alternateASig;
1434    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
1435        aSig = alternateASig;
1436    }
1437    zSign = ( (sbits32) aSig < 0 );
1438    if ( zSign ) aSig = - aSig;
1439    return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
1440
1441}
1442
1443/*
1444-------------------------------------------------------------------------------
1445Returns the square root of the singleā€¦

Large files files are truncated, but you can click here to view the full file