PageRenderTime 55ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 1ms

/third_party/gofrontend/libgo/go/math/big/float.go

http://github.com/axw/llgo
Go | 1693 lines | 1027 code | 182 blank | 484 comment | 324 complexity | 682cb687f09abd799c0f0ceb49e21d6b MD5 | raw file
Possible License(s): BSD-3-Clause, MIT
  1. // Copyright 2014 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // This file implements multi-precision floating-point numbers.
  5. // Like in the GNU MPFR library (http://www.mpfr.org/), operands
  6. // can be of mixed precision. Unlike MPFR, the rounding mode is
  7. // not specified with each operation, but with each operand. The
  8. // rounding mode of the result operand determines the rounding
  9. // mode of an operation. This is a from-scratch implementation.
  10. package big
  11. import (
  12. "fmt"
  13. "math"
  14. )
  15. const debugFloat = false // enable for debugging
  16. // A nonzero finite Float represents a multi-precision floating point number
  17. //
  18. // sign × mantissa × 2**exponent
  19. //
  20. // with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp.
  21. // A Float may also be zero (+0, -0) or infinite (+Inf, -Inf).
  22. // All Floats are ordered, and the ordering of two Floats x and y
  23. // is defined by x.Cmp(y).
  24. //
  25. // Each Float value also has a precision, rounding mode, and accuracy.
  26. // The precision is the maximum number of mantissa bits available to
  27. // represent the value. The rounding mode specifies how a result should
  28. // be rounded to fit into the mantissa bits, and accuracy describes the
  29. // rounding error with respect to the exact result.
  30. //
  31. // Unless specified otherwise, all operations (including setters) that
  32. // specify a *Float variable for the result (usually via the receiver
  33. // with the exception of MantExp), round the numeric result according
  34. // to the precision and rounding mode of the result variable.
  35. //
  36. // If the provided result precision is 0 (see below), it is set to the
  37. // precision of the argument with the largest precision value before any
  38. // rounding takes place, and the rounding mode remains unchanged. Thus,
  39. // uninitialized Floats provided as result arguments will have their
  40. // precision set to a reasonable value determined by the operands and
  41. // their mode is the zero value for RoundingMode (ToNearestEven).
  42. //
  43. // By setting the desired precision to 24 or 53 and using matching rounding
  44. // mode (typically ToNearestEven), Float operations produce the same results
  45. // as the corresponding float32 or float64 IEEE-754 arithmetic for operands
  46. // that correspond to normal (i.e., not denormal) float32 or float64 numbers.
  47. // Exponent underflow and overflow lead to a 0 or an Infinity for different
  48. // values than IEEE-754 because Float exponents have a much larger range.
  49. //
  50. // The zero (uninitialized) value for a Float is ready to use and represents
  51. // the number +0.0 exactly, with precision 0 and rounding mode ToNearestEven.
  52. //
  53. type Float struct {
  54. prec uint32
  55. mode RoundingMode
  56. acc Accuracy
  57. form form
  58. neg bool
  59. mant nat
  60. exp int32
  61. }
  62. // An ErrNaN panic is raised by a Float operation that would lead to
  63. // a NaN under IEEE-754 rules. An ErrNaN implements the error interface.
  64. type ErrNaN struct {
  65. msg string
  66. }
  67. func (err ErrNaN) Error() string {
  68. return err.msg
  69. }
  70. // NewFloat allocates and returns a new Float set to x,
  71. // with precision 53 and rounding mode ToNearestEven.
  72. // NewFloat panics with ErrNaN if x is a NaN.
  73. func NewFloat(x float64) *Float {
  74. if math.IsNaN(x) {
  75. panic(ErrNaN{"NewFloat(NaN)"})
  76. }
  77. return new(Float).SetFloat64(x)
  78. }
  79. // Exponent and precision limits.
  80. const (
  81. MaxExp = math.MaxInt32 // largest supported exponent
  82. MinExp = math.MinInt32 // smallest supported exponent
  83. MaxPrec = math.MaxUint32 // largest (theoretically) supported precision; likely memory-limited
  84. )
  85. // Internal representation: The mantissa bits x.mant of a nonzero finite
  86. // Float x are stored in a nat slice long enough to hold up to x.prec bits;
  87. // the slice may (but doesn't have to) be shorter if the mantissa contains
  88. // trailing 0 bits. x.mant is normalized if the msb of x.mant == 1 (i.e.,
  89. // the msb is shifted all the way "to the left"). Thus, if the mantissa has
  90. // trailing 0 bits or x.prec is not a multiple of the the Word size _W,
  91. // x.mant[0] has trailing zero bits. The msb of the mantissa corresponds
  92. // to the value 0.5; the exponent x.exp shifts the binary point as needed.
  93. //
  94. // A zero or non-finite Float x ignores x.mant and x.exp.
  95. //
  96. // x form neg mant exp
  97. // ----------------------------------------------------------
  98. // ±0 zero sign - -
  99. // 0 < |x| < +Inf finite sign mantissa exponent
  100. // ±Inf inf sign - -
  101. // A form value describes the internal representation.
  102. type form byte
  103. // The form value order is relevant - do not change!
  104. const (
  105. zero form = iota
  106. finite
  107. inf
  108. )
  109. // RoundingMode determines how a Float value is rounded to the
  110. // desired precision. Rounding may change the Float value; the
  111. // rounding error is described by the Float's Accuracy.
  112. type RoundingMode byte
  113. // The following rounding modes are supported.
  114. const (
  115. ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven
  116. ToNearestAway // == IEEE 754-2008 roundTiesToAway
  117. ToZero // == IEEE 754-2008 roundTowardZero
  118. AwayFromZero // no IEEE 754-2008 equivalent
  119. ToNegativeInf // == IEEE 754-2008 roundTowardNegative
  120. ToPositiveInf // == IEEE 754-2008 roundTowardPositive
  121. )
  122. //go:generate stringer -type=RoundingMode
  123. // Accuracy describes the rounding error produced by the most recent
  124. // operation that generated a Float value, relative to the exact value.
  125. type Accuracy int8
  126. // Constants describing the Accuracy of a Float.
  127. const (
  128. Below Accuracy = -1
  129. Exact Accuracy = 0
  130. Above Accuracy = +1
  131. )
  132. //go:generate stringer -type=Accuracy
  133. // SetPrec sets z's precision to prec and returns the (possibly) rounded
  134. // value of z. Rounding occurs according to z's rounding mode if the mantissa
  135. // cannot be represented in prec bits without loss of precision.
  136. // SetPrec(0) maps all finite values to ±0; infinite values remain unchanged.
  137. // If prec > MaxPrec, it is set to MaxPrec.
  138. func (z *Float) SetPrec(prec uint) *Float {
  139. z.acc = Exact // optimistically assume no rounding is needed
  140. // special case
  141. if prec == 0 {
  142. z.prec = 0
  143. if z.form == finite {
  144. // truncate z to 0
  145. z.acc = makeAcc(z.neg)
  146. z.form = zero
  147. }
  148. return z
  149. }
  150. // general case
  151. if prec > MaxPrec {
  152. prec = MaxPrec
  153. }
  154. old := z.prec
  155. z.prec = uint32(prec)
  156. if z.prec < old {
  157. z.round(0)
  158. }
  159. return z
  160. }
  161. func makeAcc(above bool) Accuracy {
  162. if above {
  163. return Above
  164. }
  165. return Below
  166. }
  167. // SetMode sets z's rounding mode to mode and returns an exact z.
  168. // z remains unchanged otherwise.
  169. // z.SetMode(z.Mode()) is a cheap way to set z's accuracy to Exact.
  170. func (z *Float) SetMode(mode RoundingMode) *Float {
  171. z.mode = mode
  172. z.acc = Exact
  173. return z
  174. }
  175. // Prec returns the mantissa precision of x in bits.
  176. // The result may be 0 for |x| == 0 and |x| == Inf.
  177. func (x *Float) Prec() uint {
  178. return uint(x.prec)
  179. }
  180. // MinPrec returns the minimum precision required to represent x exactly
  181. // (i.e., the smallest prec before x.SetPrec(prec) would start rounding x).
  182. // The result is 0 for |x| == 0 and |x| == Inf.
  183. func (x *Float) MinPrec() uint {
  184. if x.form != finite {
  185. return 0
  186. }
  187. return uint(len(x.mant))*_W - x.mant.trailingZeroBits()
  188. }
  189. // Mode returns the rounding mode of x.
  190. func (x *Float) Mode() RoundingMode {
  191. return x.mode
  192. }
  193. // Acc returns the accuracy of x produced by the most recent operation.
  194. func (x *Float) Acc() Accuracy {
  195. return x.acc
  196. }
  197. // Sign returns:
  198. //
  199. // -1 if x < 0
  200. // 0 if x is ±0
  201. // +1 if x > 0
  202. //
  203. func (x *Float) Sign() int {
  204. if debugFloat {
  205. x.validate()
  206. }
  207. if x.form == zero {
  208. return 0
  209. }
  210. if x.neg {
  211. return -1
  212. }
  213. return 1
  214. }
  215. // MantExp breaks x into its mantissa and exponent components
  216. // and returns the exponent. If a non-nil mant argument is
  217. // provided its value is set to the mantissa of x, with the
  218. // same precision and rounding mode as x. The components
  219. // satisfy x == mant × 2**exp, with 0.5 <= |mant| < 1.0.
  220. // Calling MantExp with a nil argument is an efficient way to
  221. // get the exponent of the receiver.
  222. //
  223. // Special cases are:
  224. //
  225. // ( ±0).MantExp(mant) = 0, with mant set to ±0
  226. // (±Inf).MantExp(mant) = 0, with mant set to ±Inf
  227. //
  228. // x and mant may be the same in which case x is set to its
  229. // mantissa value.
  230. func (x *Float) MantExp(mant *Float) (exp int) {
  231. if debugFloat {
  232. x.validate()
  233. }
  234. if x.form == finite {
  235. exp = int(x.exp)
  236. }
  237. if mant != nil {
  238. mant.Copy(x)
  239. if mant.form == finite {
  240. mant.exp = 0
  241. }
  242. }
  243. return
  244. }
  245. func (z *Float) setExpAndRound(exp int64, sbit uint) {
  246. if exp < MinExp {
  247. // underflow
  248. z.acc = makeAcc(z.neg)
  249. z.form = zero
  250. return
  251. }
  252. if exp > MaxExp {
  253. // overflow
  254. z.acc = makeAcc(!z.neg)
  255. z.form = inf
  256. return
  257. }
  258. z.form = finite
  259. z.exp = int32(exp)
  260. z.round(sbit)
  261. }
  262. // SetMantExp sets z to mant × 2**exp and and returns z.
  263. // The result z has the same precision and rounding mode
  264. // as mant. SetMantExp is an inverse of MantExp but does
  265. // not require 0.5 <= |mant| < 1.0. Specifically:
  266. //
  267. // mant := new(Float)
  268. // new(Float).SetMantExp(mant, x.SetMantExp(mant)).Cmp(x).Eql() is true
  269. //
  270. // Special cases are:
  271. //
  272. // z.SetMantExp( ±0, exp) = ±0
  273. // z.SetMantExp(±Inf, exp) = ±Inf
  274. //
  275. // z and mant may be the same in which case z's exponent
  276. // is set to exp.
  277. func (z *Float) SetMantExp(mant *Float, exp int) *Float {
  278. if debugFloat {
  279. z.validate()
  280. mant.validate()
  281. }
  282. z.Copy(mant)
  283. if z.form != finite {
  284. return z
  285. }
  286. z.setExpAndRound(int64(z.exp)+int64(exp), 0)
  287. return z
  288. }
  289. // Signbit returns true if x is negative or negative zero.
  290. func (x *Float) Signbit() bool {
  291. return x.neg
  292. }
  293. // IsInf reports whether x is +Inf or -Inf.
  294. func (x *Float) IsInf() bool {
  295. return x.form == inf
  296. }
  297. // IsInt reports whether x is an integer.
  298. // ±Inf values are not integers.
  299. func (x *Float) IsInt() bool {
  300. if debugFloat {
  301. x.validate()
  302. }
  303. // special cases
  304. if x.form != finite {
  305. return x.form == zero
  306. }
  307. // x.form == finite
  308. if x.exp <= 0 {
  309. return false
  310. }
  311. // x.exp > 0
  312. return x.prec <= uint32(x.exp) || x.MinPrec() <= uint(x.exp) // not enough bits for fractional mantissa
  313. }
  314. // debugging support
  315. func (x *Float) validate() {
  316. if !debugFloat {
  317. // avoid performance bugs
  318. panic("validate called but debugFloat is not set")
  319. }
  320. if x.form != finite {
  321. return
  322. }
  323. m := len(x.mant)
  324. if m == 0 {
  325. panic("nonzero finite number with empty mantissa")
  326. }
  327. const msb = 1 << (_W - 1)
  328. if x.mant[m-1]&msb == 0 {
  329. panic(fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.Text('p', 0)))
  330. }
  331. if x.prec == 0 {
  332. panic("zero precision finite number")
  333. }
  334. }
  335. // round rounds z according to z.mode to z.prec bits and sets z.acc accordingly.
  336. // sbit must be 0 or 1 and summarizes any "sticky bit" information one might
  337. // have before calling round. z's mantissa must be normalized (with the msb set)
  338. // or empty.
  339. //
  340. // CAUTION: The rounding modes ToNegativeInf, ToPositiveInf are affected by the
  341. // sign of z. For correct rounding, the sign of z must be set correctly before
  342. // calling round.
  343. func (z *Float) round(sbit uint) {
  344. if debugFloat {
  345. z.validate()
  346. }
  347. z.acc = Exact
  348. if z.form != finite {
  349. // ±0 or ±Inf => nothing left to do
  350. return
  351. }
  352. // z.form == finite && len(z.mant) > 0
  353. // m > 0 implies z.prec > 0 (checked by validate)
  354. m := uint32(len(z.mant)) // present mantissa length in words
  355. bits := m * _W // present mantissa bits
  356. if bits <= z.prec {
  357. // mantissa fits => nothing to do
  358. return
  359. }
  360. // bits > z.prec
  361. n := (z.prec + (_W - 1)) / _W // mantissa length in words for desired precision
  362. // Rounding is based on two bits: the rounding bit (rbit) and the
  363. // sticky bit (sbit). The rbit is the bit immediately before the
  364. // z.prec leading mantissa bits (the "0.5"). The sbit is set if any
  365. // of the bits before the rbit are set (the "0.25", "0.125", etc.):
  366. //
  367. // rbit sbit => "fractional part"
  368. //
  369. // 0 0 == 0
  370. // 0 1 > 0 , < 0.5
  371. // 1 0 == 0.5
  372. // 1 1 > 0.5, < 1.0
  373. // bits > z.prec: mantissa too large => round
  374. r := uint(bits - z.prec - 1) // rounding bit position; r >= 0
  375. rbit := z.mant.bit(r) // rounding bit
  376. if sbit == 0 {
  377. sbit = z.mant.sticky(r)
  378. }
  379. if debugFloat && sbit&^1 != 0 {
  380. panic(fmt.Sprintf("invalid sbit %#x", sbit))
  381. }
  382. // convert ToXInf rounding modes
  383. mode := z.mode
  384. switch mode {
  385. case ToNegativeInf:
  386. mode = ToZero
  387. if z.neg {
  388. mode = AwayFromZero
  389. }
  390. case ToPositiveInf:
  391. mode = AwayFromZero
  392. if z.neg {
  393. mode = ToZero
  394. }
  395. }
  396. // cut off extra words
  397. if m > n {
  398. copy(z.mant, z.mant[m-n:]) // move n last words to front
  399. z.mant = z.mant[:n]
  400. }
  401. // determine number of trailing zero bits t
  402. t := n*_W - z.prec // 0 <= t < _W
  403. lsb := Word(1) << t
  404. // make rounding decision
  405. // TODO(gri) This can be simplified (see Bits.round in bits_test.go).
  406. switch mode {
  407. case ToZero:
  408. // nothing to do
  409. case ToNearestEven, ToNearestAway:
  410. if rbit == 0 {
  411. // rounding bits == 0b0x
  412. mode = ToZero
  413. } else if sbit == 1 {
  414. // rounding bits == 0b11
  415. mode = AwayFromZero
  416. }
  417. case AwayFromZero:
  418. if rbit|sbit == 0 {
  419. mode = ToZero
  420. }
  421. default:
  422. // ToXInf modes have been converted to ToZero or AwayFromZero
  423. panic("unreachable")
  424. }
  425. // round and determine accuracy
  426. switch mode {
  427. case ToZero:
  428. if rbit|sbit != 0 {
  429. z.acc = Below
  430. }
  431. case ToNearestEven, ToNearestAway:
  432. if debugFloat && rbit != 1 {
  433. panic("internal error in rounding")
  434. }
  435. if mode == ToNearestEven && sbit == 0 && z.mant[0]&lsb == 0 {
  436. z.acc = Below
  437. break
  438. }
  439. // mode == ToNearestAway || sbit == 1 || z.mant[0]&lsb != 0
  440. fallthrough
  441. case AwayFromZero:
  442. // add 1 to mantissa
  443. if addVW(z.mant, z.mant, lsb) != 0 {
  444. // overflow => shift mantissa right by 1 and add msb
  445. shrVU(z.mant, z.mant, 1)
  446. z.mant[n-1] |= 1 << (_W - 1)
  447. // adjust exponent
  448. if z.exp < MaxExp {
  449. z.exp++
  450. } else {
  451. // exponent overflow
  452. z.acc = makeAcc(!z.neg)
  453. z.form = inf
  454. return
  455. }
  456. }
  457. z.acc = Above
  458. }
  459. // zero out trailing bits in least-significant word
  460. z.mant[0] &^= lsb - 1
  461. // update accuracy
  462. if z.acc != Exact && z.neg {
  463. z.acc = -z.acc
  464. }
  465. if debugFloat {
  466. z.validate()
  467. }
  468. return
  469. }
  470. func (z *Float) setBits64(neg bool, x uint64) *Float {
  471. if z.prec == 0 {
  472. z.prec = 64
  473. }
  474. z.acc = Exact
  475. z.neg = neg
  476. if x == 0 {
  477. z.form = zero
  478. return z
  479. }
  480. // x != 0
  481. z.form = finite
  482. s := nlz64(x)
  483. z.mant = z.mant.setUint64(x << s)
  484. z.exp = int32(64 - s) // always fits
  485. if z.prec < 64 {
  486. z.round(0)
  487. }
  488. return z
  489. }
  490. // SetUint64 sets z to the (possibly rounded) value of x and returns z.
  491. // If z's precision is 0, it is changed to 64 (and rounding will have
  492. // no effect).
  493. func (z *Float) SetUint64(x uint64) *Float {
  494. return z.setBits64(false, x)
  495. }
  496. // SetInt64 sets z to the (possibly rounded) value of x and returns z.
  497. // If z's precision is 0, it is changed to 64 (and rounding will have
  498. // no effect).
  499. func (z *Float) SetInt64(x int64) *Float {
  500. u := x
  501. if u < 0 {
  502. u = -u
  503. }
  504. // We cannot simply call z.SetUint64(uint64(u)) and change
  505. // the sign afterwards because the sign affects rounding.
  506. return z.setBits64(x < 0, uint64(u))
  507. }
  508. // SetFloat64 sets z to the (possibly rounded) value of x and returns z.
  509. // If z's precision is 0, it is changed to 53 (and rounding will have
  510. // no effect). SetFloat64 panics with ErrNaN if x is a NaN.
  511. func (z *Float) SetFloat64(x float64) *Float {
  512. if z.prec == 0 {
  513. z.prec = 53
  514. }
  515. if math.IsNaN(x) {
  516. panic(ErrNaN{"Float.SetFloat64(NaN)"})
  517. }
  518. z.acc = Exact
  519. z.neg = math.Signbit(x) // handle -0, -Inf correctly
  520. if x == 0 {
  521. z.form = zero
  522. return z
  523. }
  524. if math.IsInf(x, 0) {
  525. z.form = inf
  526. return z
  527. }
  528. // normalized x != 0
  529. z.form = finite
  530. fmant, exp := math.Frexp(x) // get normalized mantissa
  531. z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11)
  532. z.exp = int32(exp) // always fits
  533. if z.prec < 53 {
  534. z.round(0)
  535. }
  536. return z
  537. }
  538. // fnorm normalizes mantissa m by shifting it to the left
  539. // such that the msb of the most-significant word (msw) is 1.
  540. // It returns the shift amount. It assumes that len(m) != 0.
  541. func fnorm(m nat) int64 {
  542. if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) {
  543. panic("msw of mantissa is 0")
  544. }
  545. s := nlz(m[len(m)-1])
  546. if s > 0 {
  547. c := shlVU(m, m, s)
  548. if debugFloat && c != 0 {
  549. panic("nlz or shlVU incorrect")
  550. }
  551. }
  552. return int64(s)
  553. }
  554. // SetInt sets z to the (possibly rounded) value of x and returns z.
  555. // If z's precision is 0, it is changed to the larger of x.BitLen()
  556. // or 64 (and rounding will have no effect).
  557. func (z *Float) SetInt(x *Int) *Float {
  558. // TODO(gri) can be more efficient if z.prec > 0
  559. // but small compared to the size of x, or if there
  560. // are many trailing 0's.
  561. bits := uint32(x.BitLen())
  562. if z.prec == 0 {
  563. z.prec = umax32(bits, 64)
  564. }
  565. z.acc = Exact
  566. z.neg = x.neg
  567. if len(x.abs) == 0 {
  568. z.form = zero
  569. return z
  570. }
  571. // x != 0
  572. z.mant = z.mant.set(x.abs)
  573. fnorm(z.mant)
  574. z.setExpAndRound(int64(bits), 0)
  575. return z
  576. }
  577. // SetRat sets z to the (possibly rounded) value of x and returns z.
  578. // If z's precision is 0, it is changed to the largest of a.BitLen(),
  579. // b.BitLen(), or 64; with x = a/b.
  580. func (z *Float) SetRat(x *Rat) *Float {
  581. if x.IsInt() {
  582. return z.SetInt(x.Num())
  583. }
  584. var a, b Float
  585. a.SetInt(x.Num())
  586. b.SetInt(x.Denom())
  587. if z.prec == 0 {
  588. z.prec = umax32(a.prec, b.prec)
  589. }
  590. return z.Quo(&a, &b)
  591. }
  592. // SetInf sets z to the infinite Float -Inf if signbit is
  593. // set, or +Inf if signbit is not set, and returns z. The
  594. // precision of z is unchanged and the result is always
  595. // Exact.
  596. func (z *Float) SetInf(signbit bool) *Float {
  597. z.acc = Exact
  598. z.form = inf
  599. z.neg = signbit
  600. return z
  601. }
  602. // Set sets z to the (possibly rounded) value of x and returns z.
  603. // If z's precision is 0, it is changed to the precision of x
  604. // before setting z (and rounding will have no effect).
  605. // Rounding is performed according to z's precision and rounding
  606. // mode; and z's accuracy reports the result error relative to the
  607. // exact (not rounded) result.
  608. func (z *Float) Set(x *Float) *Float {
  609. if debugFloat {
  610. x.validate()
  611. }
  612. z.acc = Exact
  613. if z != x {
  614. z.form = x.form
  615. z.neg = x.neg
  616. if x.form == finite {
  617. z.exp = x.exp
  618. z.mant = z.mant.set(x.mant)
  619. }
  620. if z.prec == 0 {
  621. z.prec = x.prec
  622. } else if z.prec < x.prec {
  623. z.round(0)
  624. }
  625. }
  626. return z
  627. }
  628. // Copy sets z to x, with the same precision, rounding mode, and
  629. // accuracy as x, and returns z. x is not changed even if z and
  630. // x are the same.
  631. func (z *Float) Copy(x *Float) *Float {
  632. if debugFloat {
  633. x.validate()
  634. }
  635. if z != x {
  636. z.prec = x.prec
  637. z.mode = x.mode
  638. z.acc = x.acc
  639. z.form = x.form
  640. z.neg = x.neg
  641. if z.form == finite {
  642. z.mant = z.mant.set(x.mant)
  643. z.exp = x.exp
  644. }
  645. }
  646. return z
  647. }
  648. // msb32 returns the 32 most significant bits of x.
  649. func msb32(x nat) uint32 {
  650. i := len(x) - 1
  651. if i < 0 {
  652. return 0
  653. }
  654. if debugFloat && x[i]&(1<<(_W-1)) == 0 {
  655. panic("x not normalized")
  656. }
  657. switch _W {
  658. case 32:
  659. return uint32(x[i])
  660. case 64:
  661. return uint32(x[i] >> 32)
  662. }
  663. panic("unreachable")
  664. }
  665. // msb64 returns the 64 most significant bits of x.
  666. func msb64(x nat) uint64 {
  667. i := len(x) - 1
  668. if i < 0 {
  669. return 0
  670. }
  671. if debugFloat && x[i]&(1<<(_W-1)) == 0 {
  672. panic("x not normalized")
  673. }
  674. switch _W {
  675. case 32:
  676. v := uint64(x[i]) << 32
  677. if i > 0 {
  678. v |= uint64(x[i-1])
  679. }
  680. return v
  681. case 64:
  682. return uint64(x[i])
  683. }
  684. panic("unreachable")
  685. }
  686. // Uint64 returns the unsigned integer resulting from truncating x
  687. // towards zero. If 0 <= x <= math.MaxUint64, the result is Exact
  688. // if x is an integer and Below otherwise.
  689. // The result is (0, Above) for x < 0, and (math.MaxUint64, Below)
  690. // for x > math.MaxUint64.
  691. func (x *Float) Uint64() (uint64, Accuracy) {
  692. if debugFloat {
  693. x.validate()
  694. }
  695. switch x.form {
  696. case finite:
  697. if x.neg {
  698. return 0, Above
  699. }
  700. // 0 < x < +Inf
  701. if x.exp <= 0 {
  702. // 0 < x < 1
  703. return 0, Below
  704. }
  705. // 1 <= x < Inf
  706. if x.exp <= 64 {
  707. // u = trunc(x) fits into a uint64
  708. u := msb64(x.mant) >> (64 - uint32(x.exp))
  709. if x.MinPrec() <= 64 {
  710. return u, Exact
  711. }
  712. return u, Below // x truncated
  713. }
  714. // x too large
  715. return math.MaxUint64, Below
  716. case zero:
  717. return 0, Exact
  718. case inf:
  719. if x.neg {
  720. return 0, Above
  721. }
  722. return math.MaxUint64, Below
  723. }
  724. panic("unreachable")
  725. }
  726. // Int64 returns the integer resulting from truncating x towards zero.
  727. // If math.MinInt64 <= x <= math.MaxInt64, the result is Exact if x is
  728. // an integer, and Above (x < 0) or Below (x > 0) otherwise.
  729. // The result is (math.MinInt64, Above) for x < math.MinInt64,
  730. // and (math.MaxInt64, Below) for x > math.MaxInt64.
  731. func (x *Float) Int64() (int64, Accuracy) {
  732. if debugFloat {
  733. x.validate()
  734. }
  735. switch x.form {
  736. case finite:
  737. // 0 < |x| < +Inf
  738. acc := makeAcc(x.neg)
  739. if x.exp <= 0 {
  740. // 0 < |x| < 1
  741. return 0, acc
  742. }
  743. // x.exp > 0
  744. // 1 <= |x| < +Inf
  745. if x.exp <= 63 {
  746. // i = trunc(x) fits into an int64 (excluding math.MinInt64)
  747. i := int64(msb64(x.mant) >> (64 - uint32(x.exp)))
  748. if x.neg {
  749. i = -i
  750. }
  751. if x.MinPrec() <= uint(x.exp) {
  752. return i, Exact
  753. }
  754. return i, acc // x truncated
  755. }
  756. if x.neg {
  757. // check for special case x == math.MinInt64 (i.e., x == -(0.5 << 64))
  758. if x.exp == 64 && x.MinPrec() == 1 {
  759. acc = Exact
  760. }
  761. return math.MinInt64, acc
  762. }
  763. // x too large
  764. return math.MaxInt64, Below
  765. case zero:
  766. return 0, Exact
  767. case inf:
  768. if x.neg {
  769. return math.MinInt64, Above
  770. }
  771. return math.MaxInt64, Below
  772. }
  773. panic("unreachable")
  774. }
  775. // Float32 returns the float32 value nearest to x. If x is too small to be
  776. // represented by a float32 (|x| < math.SmallestNonzeroFloat32), the result
  777. // is (0, Below) or (-0, Above), respectively, depending on the sign of x.
  778. // If x is too large to be represented by a float32 (|x| > math.MaxFloat32),
  779. // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x.
  780. func (x *Float) Float32() (float32, Accuracy) {
  781. if debugFloat {
  782. x.validate()
  783. }
  784. switch x.form {
  785. case finite:
  786. // 0 < |x| < +Inf
  787. const (
  788. fbits = 32 // float size
  789. mbits = 23 // mantissa size (excluding implicit msb)
  790. ebits = fbits - mbits - 1 // 8 exponent size
  791. bias = 1<<(ebits-1) - 1 // 127 exponent bias
  792. dmin = 1 - bias - mbits // -149 smallest unbiased exponent (denormal)
  793. emin = 1 - bias // -126 smallest unbiased exponent (normal)
  794. emax = bias // 127 largest unbiased exponent (normal)
  795. )
  796. // Float mantissa m is 0.5 <= m < 1.0; compute exponent for floatxx mantissa.
  797. e := x.exp - 1 // exponent for mantissa m with 1.0 <= m < 2.0
  798. p := mbits + 1 // precision of normal float
  799. // If the exponent is too small, we may have a denormal number
  800. // in which case we have fewer mantissa bits available: reduce
  801. // precision accordingly.
  802. if e < emin {
  803. p -= emin - int(e)
  804. // Make sure we have at least 1 bit so that we don't
  805. // lose numbers rounded up to the smallest denormal.
  806. if p < 1 {
  807. p = 1
  808. }
  809. }
  810. // round
  811. var r Float
  812. r.prec = uint32(p)
  813. r.Set(x)
  814. e = r.exp - 1
  815. // Rounding may have caused r to overflow to ±Inf
  816. // (rounding never causes underflows to 0).
  817. if r.form == inf {
  818. e = emax + 1 // cause overflow below
  819. }
  820. // If the exponent is too large, overflow to ±Inf.
  821. if e > emax {
  822. // overflow
  823. if x.neg {
  824. return float32(math.Inf(-1)), Below
  825. }
  826. return float32(math.Inf(+1)), Above
  827. }
  828. // e <= emax
  829. // Determine sign, biased exponent, and mantissa.
  830. var sign, bexp, mant uint32
  831. if x.neg {
  832. sign = 1 << (fbits - 1)
  833. }
  834. // Rounding may have caused a denormal number to
  835. // become normal. Check again.
  836. if e < emin {
  837. // denormal number
  838. if e < dmin {
  839. // underflow to ±0
  840. if x.neg {
  841. var z float32
  842. return -z, Above
  843. }
  844. return 0.0, Below
  845. }
  846. // bexp = 0
  847. mant = msb32(r.mant) >> (fbits - r.prec)
  848. } else {
  849. // normal number: emin <= e <= emax
  850. bexp = uint32(e+bias) << mbits
  851. mant = msb32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
  852. }
  853. return math.Float32frombits(sign | bexp | mant), r.acc
  854. case zero:
  855. if x.neg {
  856. var z float32
  857. return -z, Exact
  858. }
  859. return 0.0, Exact
  860. case inf:
  861. if x.neg {
  862. return float32(math.Inf(-1)), Exact
  863. }
  864. return float32(math.Inf(+1)), Exact
  865. }
  866. panic("unreachable")
  867. }
  868. // Float64 returns the float64 value nearest to x. If x is too small to be
  869. // represented by a float64 (|x| < math.SmallestNonzeroFloat64), the result
  870. // is (0, Below) or (-0, Above), respectively, depending on the sign of x.
  871. // If x is too large to be represented by a float64 (|x| > math.MaxFloat64),
  872. // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x.
  873. func (x *Float) Float64() (float64, Accuracy) {
  874. if debugFloat {
  875. x.validate()
  876. }
  877. switch x.form {
  878. case finite:
  879. // 0 < |x| < +Inf
  880. const (
  881. fbits = 64 // float size
  882. mbits = 52 // mantissa size (excluding implicit msb)
  883. ebits = fbits - mbits - 1 // 11 exponent size
  884. bias = 1<<(ebits-1) - 1 // 1023 exponent bias
  885. dmin = 1 - bias - mbits // -1074 smallest unbiased exponent (denormal)
  886. emin = 1 - bias // -1022 smallest unbiased exponent (normal)
  887. emax = bias // 1023 largest unbiased exponent (normal)
  888. )
  889. // Float mantissa m is 0.5 <= m < 1.0; compute exponent for floatxx mantissa.
  890. e := x.exp - 1 // exponent for mantissa m with 1.0 <= m < 2.0
  891. p := mbits + 1 // precision of normal float
  892. // If the exponent is too small, we may have a denormal number
  893. // in which case we have fewer mantissa bits available: reduce
  894. // precision accordingly.
  895. if e < emin {
  896. p -= emin - int(e)
  897. // Make sure we have at least 1 bit so that we don't
  898. // lose numbers rounded up to the smallest denormal.
  899. if p < 1 {
  900. p = 1
  901. }
  902. }
  903. // round
  904. var r Float
  905. r.prec = uint32(p)
  906. r.Set(x)
  907. e = r.exp - 1
  908. // Rounding may have caused r to overflow to ±Inf
  909. // (rounding never causes underflows to 0).
  910. if r.form == inf {
  911. e = emax + 1 // cause overflow below
  912. }
  913. // If the exponent is too large, overflow to ±Inf.
  914. if e > emax {
  915. // overflow
  916. if x.neg {
  917. return math.Inf(-1), Below
  918. }
  919. return math.Inf(+1), Above
  920. }
  921. // e <= emax
  922. // Determine sign, biased exponent, and mantissa.
  923. var sign, bexp, mant uint64
  924. if x.neg {
  925. sign = 1 << (fbits - 1)
  926. }
  927. // Rounding may have caused a denormal number to
  928. // become normal. Check again.
  929. if e < emin {
  930. // denormal number
  931. if e < dmin {
  932. // underflow to ±0
  933. if x.neg {
  934. var z float64
  935. return -z, Above
  936. }
  937. return 0.0, Below
  938. }
  939. // bexp = 0
  940. mant = msb64(r.mant) >> (fbits - r.prec)
  941. } else {
  942. // normal number: emin <= e <= emax
  943. bexp = uint64(e+bias) << mbits
  944. mant = msb64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
  945. }
  946. return math.Float64frombits(sign | bexp | mant), r.acc
  947. case zero:
  948. if x.neg {
  949. var z float64
  950. return -z, Exact
  951. }
  952. return 0.0, Exact
  953. case inf:
  954. if x.neg {
  955. return math.Inf(-1), Exact
  956. }
  957. return math.Inf(+1), Exact
  958. }
  959. panic("unreachable")
  960. }
  961. // Int returns the result of truncating x towards zero;
  962. // or nil if x is an infinity.
  963. // The result is Exact if x.IsInt(); otherwise it is Below
  964. // for x > 0, and Above for x < 0.
  965. // If a non-nil *Int argument z is provided, Int stores
  966. // the result in z instead of allocating a new Int.
  967. func (x *Float) Int(z *Int) (*Int, Accuracy) {
  968. if debugFloat {
  969. x.validate()
  970. }
  971. if z == nil && x.form <= finite {
  972. z = new(Int)
  973. }
  974. switch x.form {
  975. case finite:
  976. // 0 < |x| < +Inf
  977. acc := makeAcc(x.neg)
  978. if x.exp <= 0 {
  979. // 0 < |x| < 1
  980. return z.SetInt64(0), acc
  981. }
  982. // x.exp > 0
  983. // 1 <= |x| < +Inf
  984. // determine minimum required precision for x
  985. allBits := uint(len(x.mant)) * _W
  986. exp := uint(x.exp)
  987. if x.MinPrec() <= exp {
  988. acc = Exact
  989. }
  990. // shift mantissa as needed
  991. if z == nil {
  992. z = new(Int)
  993. }
  994. z.neg = x.neg
  995. switch {
  996. case exp > allBits:
  997. z.abs = z.abs.shl(x.mant, exp-allBits)
  998. default:
  999. z.abs = z.abs.set(x.mant)
  1000. case exp < allBits:
  1001. z.abs = z.abs.shr(x.mant, allBits-exp)
  1002. }
  1003. return z, acc
  1004. case zero:
  1005. return z.SetInt64(0), Exact
  1006. case inf:
  1007. return nil, makeAcc(x.neg)
  1008. }
  1009. panic("unreachable")
  1010. }
  1011. // Rat returns the rational number corresponding to x;
  1012. // or nil if x is an infinity.
  1013. // The result is Exact is x is not an Inf.
  1014. // If a non-nil *Rat argument z is provided, Rat stores
  1015. // the result in z instead of allocating a new Rat.
  1016. func (x *Float) Rat(z *Rat) (*Rat, Accuracy) {
  1017. if debugFloat {
  1018. x.validate()
  1019. }
  1020. if z == nil && x.form <= finite {
  1021. z = new(Rat)
  1022. }
  1023. switch x.form {
  1024. case finite:
  1025. // 0 < |x| < +Inf
  1026. allBits := int32(len(x.mant)) * _W
  1027. // build up numerator and denominator
  1028. z.a.neg = x.neg
  1029. switch {
  1030. case x.exp > allBits:
  1031. z.a.abs = z.a.abs.shl(x.mant, uint(x.exp-allBits))
  1032. z.b.abs = z.b.abs[:0] // == 1 (see Rat)
  1033. // z already in normal form
  1034. default:
  1035. z.a.abs = z.a.abs.set(x.mant)
  1036. z.b.abs = z.b.abs[:0] // == 1 (see Rat)
  1037. // z already in normal form
  1038. case x.exp < allBits:
  1039. z.a.abs = z.a.abs.set(x.mant)
  1040. t := z.b.abs.setUint64(1)
  1041. z.b.abs = t.shl(t, uint(allBits-x.exp))
  1042. z.norm()
  1043. }
  1044. return z, Exact
  1045. case zero:
  1046. return z.SetInt64(0), Exact
  1047. case inf:
  1048. return nil, makeAcc(x.neg)
  1049. }
  1050. panic("unreachable")
  1051. }
  1052. // Abs sets z to the (possibly rounded) value |x| (the absolute value of x)
  1053. // and returns z.
  1054. func (z *Float) Abs(x *Float) *Float {
  1055. z.Set(x)
  1056. z.neg = false
  1057. return z
  1058. }
  1059. // Neg sets z to the (possibly rounded) value of x with its sign negated,
  1060. // and returns z.
  1061. func (z *Float) Neg(x *Float) *Float {
  1062. z.Set(x)
  1063. z.neg = !z.neg
  1064. return z
  1065. }
  1066. func validateBinaryOperands(x, y *Float) {
  1067. if !debugFloat {
  1068. // avoid performance bugs
  1069. panic("validateBinaryOperands called but debugFloat is not set")
  1070. }
  1071. if len(x.mant) == 0 {
  1072. panic("empty mantissa for x")
  1073. }
  1074. if len(y.mant) == 0 {
  1075. panic("empty mantissa for y")
  1076. }
  1077. }
  1078. // z = x + y, ignoring signs of x and y for the addition
  1079. // but using the sign of z for rounding the result.
  1080. // x and y must have a non-empty mantissa and valid exponent.
  1081. func (z *Float) uadd(x, y *Float) {
  1082. // Note: This implementation requires 2 shifts most of the
  1083. // time. It is also inefficient if exponents or precisions
  1084. // differ by wide margins. The following article describes
  1085. // an efficient (but much more complicated) implementation
  1086. // compatible with the internal representation used here:
  1087. //
  1088. // Vincent Lefèvre: "The Generic Multiple-Precision Floating-
  1089. // Point Addition With Exact Rounding (as in the MPFR Library)"
  1090. // http://www.vinc17.net/research/papers/rnc6.pdf
  1091. if debugFloat {
  1092. validateBinaryOperands(x, y)
  1093. }
  1094. // compute exponents ex, ey for mantissa with "binary point"
  1095. // on the right (mantissa.0) - use int64 to avoid overflow
  1096. ex := int64(x.exp) - int64(len(x.mant))*_W
  1097. ey := int64(y.exp) - int64(len(y.mant))*_W
  1098. // TODO(gri) having a combined add-and-shift primitive
  1099. // could make this code significantly faster
  1100. switch {
  1101. case ex < ey:
  1102. // cannot re-use z.mant w/o testing for aliasing
  1103. t := nat(nil).shl(y.mant, uint(ey-ex))
  1104. z.mant = z.mant.add(x.mant, t)
  1105. default:
  1106. // ex == ey, no shift needed
  1107. z.mant = z.mant.add(x.mant, y.mant)
  1108. case ex > ey:
  1109. // cannot re-use z.mant w/o testing for aliasing
  1110. t := nat(nil).shl(x.mant, uint(ex-ey))
  1111. z.mant = z.mant.add(t, y.mant)
  1112. ex = ey
  1113. }
  1114. // len(z.mant) > 0
  1115. z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0)
  1116. }
  1117. // z = x - y for |x| > |y|, ignoring signs of x and y for the subtraction
  1118. // but using the sign of z for rounding the result.
  1119. // x and y must have a non-empty mantissa and valid exponent.
  1120. func (z *Float) usub(x, y *Float) {
  1121. // This code is symmetric to uadd.
  1122. // We have not factored the common code out because
  1123. // eventually uadd (and usub) should be optimized
  1124. // by special-casing, and the code will diverge.
  1125. if debugFloat {
  1126. validateBinaryOperands(x, y)
  1127. }
  1128. ex := int64(x.exp) - int64(len(x.mant))*_W
  1129. ey := int64(y.exp) - int64(len(y.mant))*_W
  1130. switch {
  1131. case ex < ey:
  1132. // cannot re-use z.mant w/o testing for aliasing
  1133. t := nat(nil).shl(y.mant, uint(ey-ex))
  1134. z.mant = t.sub(x.mant, t)
  1135. default:
  1136. // ex == ey, no shift needed
  1137. z.mant = z.mant.sub(x.mant, y.mant)
  1138. case ex > ey:
  1139. // cannot re-use z.mant w/o testing for aliasing
  1140. t := nat(nil).shl(x.mant, uint(ex-ey))
  1141. z.mant = t.sub(t, y.mant)
  1142. ex = ey
  1143. }
  1144. // operands may have cancelled each other out
  1145. if len(z.mant) == 0 {
  1146. z.acc = Exact
  1147. z.form = zero
  1148. z.neg = false
  1149. return
  1150. }
  1151. // len(z.mant) > 0
  1152. z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0)
  1153. }
  1154. // z = x * y, ignoring signs of x and y for the multiplication
  1155. // but using the sign of z for rounding the result.
  1156. // x and y must have a non-empty mantissa and valid exponent.
  1157. func (z *Float) umul(x, y *Float) {
  1158. if debugFloat {
  1159. validateBinaryOperands(x, y)
  1160. }
  1161. // Note: This is doing too much work if the precision
  1162. // of z is less than the sum of the precisions of x
  1163. // and y which is often the case (e.g., if all floats
  1164. // have the same precision).
  1165. // TODO(gri) Optimize this for the common case.
  1166. e := int64(x.exp) + int64(y.exp)
  1167. z.mant = z.mant.mul(x.mant, y.mant)
  1168. z.setExpAndRound(e-fnorm(z.mant), 0)
  1169. }
  1170. // z = x / y, ignoring signs of x and y for the division
  1171. // but using the sign of z for rounding the result.
  1172. // x and y must have a non-empty mantissa and valid exponent.
  1173. func (z *Float) uquo(x, y *Float) {
  1174. if debugFloat {
  1175. validateBinaryOperands(x, y)
  1176. }
  1177. // mantissa length in words for desired result precision + 1
  1178. // (at least one extra bit so we get the rounding bit after
  1179. // the division)
  1180. n := int(z.prec/_W) + 1
  1181. // compute adjusted x.mant such that we get enough result precision
  1182. xadj := x.mant
  1183. if d := n - len(x.mant) + len(y.mant); d > 0 {
  1184. // d extra words needed => add d "0 digits" to x
  1185. xadj = make(nat, len(x.mant)+d)
  1186. copy(xadj[d:], x.mant)
  1187. }
  1188. // TODO(gri): If we have too many digits (d < 0), we should be able
  1189. // to shorten x for faster division. But we must be extra careful
  1190. // with rounding in that case.
  1191. // Compute d before division since there may be aliasing of x.mant
  1192. // (via xadj) or y.mant with z.mant.
  1193. d := len(xadj) - len(y.mant)
  1194. // divide
  1195. var r nat
  1196. z.mant, r = z.mant.div(nil, xadj, y.mant)
  1197. e := int64(x.exp) - int64(y.exp) - int64(d-len(z.mant))*_W
  1198. // The result is long enough to include (at least) the rounding bit.
  1199. // If there's a non-zero remainder, the corresponding fractional part
  1200. // (if it were computed), would have a non-zero sticky bit (if it were
  1201. // zero, it couldn't have a non-zero remainder).
  1202. var sbit uint
  1203. if len(r) > 0 {
  1204. sbit = 1
  1205. }
  1206. z.setExpAndRound(e-fnorm(z.mant), sbit)
  1207. }
  1208. // ucmp returns -1, 0, or +1, depending on whether
  1209. // |x| < |y|, |x| == |y|, or |x| > |y|.
  1210. // x and y must have a non-empty mantissa and valid exponent.
  1211. func (x *Float) ucmp(y *Float) int {
  1212. if debugFloat {
  1213. validateBinaryOperands(x, y)
  1214. }
  1215. switch {
  1216. case x.exp < y.exp:
  1217. return -1
  1218. case x.exp > y.exp:
  1219. return +1
  1220. }
  1221. // x.exp == y.exp
  1222. // compare mantissas
  1223. i := len(x.mant)
  1224. j := len(y.mant)
  1225. for i > 0 || j > 0 {
  1226. var xm, ym Word
  1227. if i > 0 {
  1228. i--
  1229. xm = x.mant[i]
  1230. }
  1231. if j > 0 {
  1232. j--
  1233. ym = y.mant[j]
  1234. }
  1235. switch {
  1236. case xm < ym:
  1237. return -1
  1238. case xm > ym:
  1239. return +1
  1240. }
  1241. }
  1242. return 0
  1243. }
  1244. // Handling of sign bit as defined by IEEE 754-2008, section 6.3:
  1245. //
  1246. // When neither the inputs nor result are NaN, the sign of a product or
  1247. // quotient is the exclusive OR of the operands’ signs; the sign of a sum,
  1248. // or of a difference x−y regarded as a sum x+(−y), differs from at most
  1249. // one of the addends’ signs; and the sign of the result of conversions,
  1250. // the quantize operation, the roundToIntegral operations, and the
  1251. // roundToIntegralExact (see 5.3.1) is the sign of the first or only operand.
  1252. // These rules shall apply even when operands or results are zero or infinite.
  1253. //
  1254. // When the sum of two operands with opposite signs (or the difference of
  1255. // two operands with like signs) is exactly zero, the sign of that sum (or
  1256. // difference) shall be +0 in all rounding-direction attributes except
  1257. // roundTowardNegative; under that attribute, the sign of an exact zero
  1258. // sum (or difference) shall be −0. However, x+x = x−(−x) retains the same
  1259. // sign as x even when x is zero.
  1260. //
  1261. // See also: https://play.golang.org/p/RtH3UCt5IH
  1262. // Add sets z to the rounded sum x+y and returns z. If z's precision is 0,
  1263. // it is changed to the larger of x's or y's precision before the operation.
  1264. // Rounding is performed according to z's precision and rounding mode; and
  1265. // z's accuracy reports the result error relative to the exact (not rounded)
  1266. // result. Add panics with ErrNaN if x and y are infinities with opposite
  1267. // signs. The value of z is undefined in that case.
  1268. //
  1269. // BUG(gri) When rounding ToNegativeInf, the sign of Float values rounded to 0 is incorrect.
  1270. func (z *Float) Add(x, y *Float) *Float {
  1271. if debugFloat {
  1272. x.validate()
  1273. y.validate()
  1274. }
  1275. if z.prec == 0 {
  1276. z.prec = umax32(x.prec, y.prec)
  1277. }
  1278. if x.form == finite && y.form == finite {
  1279. // x + y (commom case)
  1280. z.neg = x.neg
  1281. if x.neg == y.neg {
  1282. // x + y == x + y
  1283. // (-x) + (-y) == -(x + y)
  1284. z.uadd(x, y)
  1285. } else {
  1286. // x + (-y) == x - y == -(y - x)
  1287. // (-x) + y == y - x == -(x - y)
  1288. if x.ucmp(y) > 0 {
  1289. z.usub(x, y)
  1290. } else {
  1291. z.neg = !z.neg
  1292. z.usub(y, x)
  1293. }
  1294. }
  1295. return z
  1296. }
  1297. if x.form == inf && y.form == inf && x.neg != y.neg {
  1298. // +Inf + -Inf
  1299. // -Inf + +Inf
  1300. // value of z is undefined but make sure it's valid
  1301. z.acc = Exact
  1302. z.form = zero
  1303. z.neg = false
  1304. panic(ErrNaN{"addition of infinities with opposite signs"})
  1305. }
  1306. if x.form == zero && y.form == zero {
  1307. // ±0 + ±0
  1308. z.acc = Exact
  1309. z.form = zero
  1310. z.neg = x.neg && y.neg // -0 + -0 == -0
  1311. return z
  1312. }
  1313. if x.form == inf || y.form == zero {
  1314. // ±Inf + y
  1315. // x + ±0
  1316. return z.Set(x)
  1317. }
  1318. // ±0 + y
  1319. // x + ±Inf
  1320. return z.Set(y)
  1321. }
  1322. // Sub sets z to the rounded difference x-y and returns z.
  1323. // Precision, rounding, and accuracy reporting are as for Add.
  1324. // Sub panics with ErrNaN if x and y are infinities with equal
  1325. // signs. The value of z is undefined in that case.
  1326. func (z *Float) Sub(x, y *Float) *Float {
  1327. if debugFloat {
  1328. x.validate()
  1329. y.validate()
  1330. }
  1331. if z.prec == 0 {
  1332. z.prec = umax32(x.prec, y.prec)
  1333. }
  1334. if x.form == finite && y.form == finite {
  1335. // x - y (common case)
  1336. z.neg = x.neg
  1337. if x.neg != y.neg {
  1338. // x - (-y) == x + y
  1339. // (-x) - y == -(x + y)
  1340. z.uadd(x, y)
  1341. } else {
  1342. // x - y == x - y == -(y - x)
  1343. // (-x) - (-y) == y - x == -(x - y)
  1344. if x.ucmp(y) > 0 {
  1345. z.usub(x, y)
  1346. } else {
  1347. z.neg = !z.neg
  1348. z.usub(y, x)
  1349. }
  1350. }
  1351. return z
  1352. }
  1353. if x.form == inf && y.form == inf && x.neg == y.neg {
  1354. // +Inf - +Inf
  1355. // -Inf - -Inf
  1356. // value of z is undefined but make sure it's valid
  1357. z.acc = Exact
  1358. z.form = zero
  1359. z.neg = false
  1360. panic(ErrNaN{"subtraction of infinities with equal signs"})
  1361. }
  1362. if x.form == zero && y.form == zero {
  1363. // ±0 - ±0
  1364. z.acc = Exact
  1365. z.form = zero
  1366. z.neg = x.neg && !y.neg // -0 - +0 == -0
  1367. return z
  1368. }
  1369. if x.form == inf || y.form == zero {
  1370. // ±Inf - y
  1371. // x - ±0
  1372. return z.Set(x)
  1373. }
  1374. // ±0 - y
  1375. // x - ±Inf
  1376. return z.Neg(y)
  1377. }
  1378. // Mul sets z to the rounded product x*y and returns z.
  1379. // Precision, rounding, and accuracy reporting are as for Add.
  1380. // Mul panics with ErrNaN if one operand is zero and the other
  1381. // operand an infinity. The value of z is undefined in that case.
  1382. func (z *Float) Mul(x, y *Float) *Float {
  1383. if debugFloat {
  1384. x.validate()
  1385. y.validate()
  1386. }
  1387. if z.prec == 0 {
  1388. z.prec = umax32(x.prec, y.prec)
  1389. }
  1390. z.neg = x.neg != y.neg
  1391. if x.form == finite && y.form == finite {
  1392. // x * y (common case)
  1393. z.umul(x, y)
  1394. return z
  1395. }
  1396. z.acc = Exact
  1397. if x.form == zero && y.form == inf || x.form == inf && y.form == zero {
  1398. // ±0 * ±Inf
  1399. // ±Inf * ±0
  1400. // value of z is undefined but make sure it's valid
  1401. z.form = zero
  1402. z.neg = false
  1403. panic(ErrNaN{"multiplication of zero with infinity"})
  1404. }
  1405. if x.form == inf || y.form == inf {
  1406. // ±Inf * y
  1407. // x * ±Inf
  1408. z.form = inf
  1409. return z
  1410. }
  1411. // ±0 * y
  1412. // x * ±0
  1413. z.form = zero
  1414. return z
  1415. }
  1416. // Quo sets z to the rounded quotient x/y and returns z.
  1417. // Precision, rounding, and accuracy reporting are as for Add.
  1418. // Quo panics with ErrNaN if both operands are zero or infinities.
  1419. // The value of z is undefined in that case.
  1420. func (z *Float) Quo(x, y *Float) *Float {
  1421. if debugFloat {
  1422. x.validate()
  1423. y.validate()
  1424. }
  1425. if z.prec == 0 {
  1426. z.prec = umax32(x.prec, y.prec)
  1427. }
  1428. z.neg = x.neg != y.neg
  1429. if x.form == finite && y.form == finite {
  1430. // x / y (common case)
  1431. z.uquo(x, y)
  1432. return z
  1433. }
  1434. z.acc = Exact
  1435. if x.form == zero && y.form == zero || x.form == inf && y.form == inf {
  1436. // ±0 / ±0
  1437. // ±Inf / ±Inf
  1438. // value of z is undefined but make sure it's valid
  1439. z.form = zero
  1440. z.neg = false
  1441. panic(ErrNaN{"division of zero by zero or infinity by infinity"})
  1442. }
  1443. if x.form == zero || y.form == inf {
  1444. // ±0 / y
  1445. // x / ±Inf
  1446. z.form = zero
  1447. return z
  1448. }
  1449. // x / ±0
  1450. // ±Inf / y
  1451. z.form = inf
  1452. return z
  1453. }
  1454. // Cmp compares x and y and returns:
  1455. //
  1456. // -1 if x < y
  1457. // 0 if x == y (incl. -0 == 0, -Inf == -Inf, and +Inf == +Inf)
  1458. // +1 if x > y
  1459. //
  1460. func (x *Float) Cmp(y *Float) int {
  1461. if debugFloat {
  1462. x.validate()
  1463. y.validate()
  1464. }
  1465. mx := x.ord()
  1466. my := y.ord()
  1467. switch {
  1468. case mx < my:
  1469. return -1
  1470. case mx > my:
  1471. return +1
  1472. }
  1473. // mx == my
  1474. // only if |mx| == 1 we have to compare the mantissae
  1475. switch mx {
  1476. case -1:
  1477. return y.ucmp(x)
  1478. case +1:
  1479. return x.ucmp(y)
  1480. }
  1481. return 0
  1482. }
  1483. // ord classifies x and returns:
  1484. //
  1485. // -2 if -Inf == x
  1486. // -1 if -Inf < x < 0
  1487. // 0 if x == 0 (signed or unsigned)
  1488. // +1 if 0 < x < +Inf
  1489. // +2 if x == +Inf
  1490. //
  1491. func (x *Float) ord() int {
  1492. var m int
  1493. switch x.form {
  1494. case finite:
  1495. m = 1
  1496. case zero:
  1497. return 0
  1498. case inf:
  1499. m = 2
  1500. }
  1501. if x.neg {
  1502. m = -m
  1503. }
  1504. return m
  1505. }
  1506. func umax32(x, y uint32) uint32 {
  1507. if x > y {
  1508. return x
  1509. }
  1510. return y
  1511. }