src/pkg/strconv/extfloat.go - The Go Programming Language

Golang

previous page next page
Source file src/pkg/strconv/extfloat.go

     1	// Copyright 2011 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package strconv
     6	
     7	import "math"
     8	
     9	// An extFloat represents an extended floating-point number, with more
    10	// precision than a float64. It does not try to save bits: the
    11	// number represented by the structure is mant*(2^exp), with a negative
    12	// sign if neg is true.
    13	type extFloat struct {
    14		mant uint64
    15		exp  int
    16		neg  bool
    17	}
    18	
    19	// Powers of ten taken from double-conversion library.
    20	// http://code.google.com/p/double-conversion/
    21	const (
    22		firstPowerOfTen = -348
    23		stepPowerOfTen  = 8
    24	)
    25	
    26	var smallPowersOfTen = [...]extFloat{
    27		{1 << 63, -63, false},        // 1
    28		{0xa << 60, -60, false},      // 1e1
    29		{0x64 << 57, -57, false},     // 1e2
    30		{0x3e8 << 54, -54, false},    // 1e3
    31		{0x2710 << 50, -50, false},   // 1e4
    32		{0x186a0 << 47, -47, false},  // 1e5
    33		{0xf4240 << 44, -44, false},  // 1e6
    34		{0x989680 << 40, -40, false}, // 1e7
    35	}
    36	
    37	var powersOfTen = [...]extFloat{
    38		{0xfa8fd5a0081c0288, -1220, false}, // 10^-348
    39		{0xbaaee17fa23ebf76, -1193, false}, // 10^-340
    40		{0x8b16fb203055ac76, -1166, false}, // 10^-332
    41		{0xcf42894a5dce35ea, -1140, false}, // 10^-324
    42		{0x9a6bb0aa55653b2d, -1113, false}, // 10^-316
    43		{0xe61acf033d1a45df, -1087, false}, // 10^-308
    44		{0xab70fe17c79ac6ca, -1060, false}, // 10^-300
    45		{0xff77b1fcbebcdc4f, -1034, false}, // 10^-292
    46		{0xbe5691ef416bd60c, -1007, false}, // 10^-284
    47		{0x8dd01fad907ffc3c, -980, false},  // 10^-276
    48		{0xd3515c2831559a83, -954, false},  // 10^-268
    49		{0x9d71ac8fada6c9b5, -927, false},  // 10^-260
    50		{0xea9c227723ee8bcb, -901, false},  // 10^-252
    51		{0xaecc49914078536d, -874, false},  // 10^-244
    52		{0x823c12795db6ce57, -847, false},  // 10^-236
    53		{0xc21094364dfb5637, -821, false},  // 10^-228
    54		{0x9096ea6f3848984f, -794, false},  // 10^-220
    55		{0xd77485cb25823ac7, -768, false},  // 10^-212
    56		{0xa086cfcd97bf97f4, -741, false},  // 10^-204
    57		{0xef340a98172aace5, -715, false},  // 10^-196
    58		{0xb23867fb2a35b28e, -688, false},  // 10^-188
    59		{0x84c8d4dfd2c63f3b, -661, false},  // 10^-180
    60		{0xc5dd44271ad3cdba, -635, false},  // 10^-172
    61		{0x936b9fcebb25c996, -608, false},  // 10^-164
    62		{0xdbac6c247d62a584, -582, false},  // 10^-156
    63		{0xa3ab66580d5fdaf6, -555, false},  // 10^-148
    64		{0xf3e2f893dec3f126, -529, false},  // 10^-140
    65		{0xb5b5ada8aaff80b8, -502, false},  // 10^-132
    66		{0x87625f056c7c4a8b, -475, false},  // 10^-124
    67		{0xc9bcff6034c13053, -449, false},  // 10^-116
    68		{0x964e858c91ba2655, -422, false},  // 10^-108
    69		{0xdff9772470297ebd, -396, false},  // 10^-100
    70		{0xa6dfbd9fb8e5b88f, -369, false},  // 10^-92
    71		{0xf8a95fcf88747d94, -343, false},  // 10^-84
    72		{0xb94470938fa89bcf, -316, false},  // 10^-76
    73		{0x8a08f0f8bf0f156b, -289, false},  // 10^-68
    74		{0xcdb02555653131b6, -263, false},  // 10^-60
    75		{0x993fe2c6d07b7fac, -236, false},  // 10^-52
    76		{0xe45c10c42a2b3b06, -210, false},  // 10^-44
    77		{0xaa242499697392d3, -183, false},  // 10^-36
    78		{0xfd87b5f28300ca0e, -157, false},  // 10^-28
    79		{0xbce5086492111aeb, -130, false},  // 10^-20
    80		{0x8cbccc096f5088cc, -103, false},  // 10^-12
    81		{0xd1b71758e219652c, -77, false},   // 10^-4
    82		{0x9c40000000000000, -50, false},   // 10^4
    83		{0xe8d4a51000000000, -24, false},   // 10^12
    84		{0xad78ebc5ac620000, 3, false},     // 10^20
    85		{0x813f3978f8940984, 30, false},    // 10^28
    86		{0xc097ce7bc90715b3, 56, false},    // 10^36
    87		{0x8f7e32ce7bea5c70, 83, false},    // 10^44
    88		{0xd5d238a4abe98068, 109, false},   // 10^52
    89		{0x9f4f2726179a2245, 136, false},   // 10^60
    90		{0xed63a231d4c4fb27, 162, false},   // 10^68
    91		{0xb0de65388cc8ada8, 189, false},   // 10^76
    92		{0x83c7088e1aab65db, 216, false},   // 10^84
    93		{0xc45d1df942711d9a, 242, false},   // 10^92
    94		{0x924d692ca61be758, 269, false},   // 10^100
    95		{0xda01ee641a708dea, 295, false},   // 10^108
    96		{0xa26da3999aef774a, 322, false},   // 10^116
    97		{0xf209787bb47d6b85, 348, false},   // 10^124
    98		{0xb454e4a179dd1877, 375, false},   // 10^132
    99		{0x865b86925b9bc5c2, 402, false},   // 10^140
   100		{0xc83553c5c8965d3d, 428, false},   // 10^148
   101		{0x952ab45cfa97a0b3, 455, false},   // 10^156
   102		{0xde469fbd99a05fe3, 481, false},   // 10^164
   103		{0xa59bc234db398c25, 508, false},   // 10^172
   104		{0xf6c69a72a3989f5c, 534, false},   // 10^180
   105		{0xb7dcbf5354e9bece, 561, false},   // 10^188
   106		{0x88fcf317f22241e2, 588, false},   // 10^196
   107		{0xcc20ce9bd35c78a5, 614, false},   // 10^204
   108		{0x98165af37b2153df, 641, false},   // 10^212
   109		{0xe2a0b5dc971f303a, 667, false},   // 10^220
   110		{0xa8d9d1535ce3b396, 694, false},   // 10^228
   111		{0xfb9b7cd9a4a7443c, 720, false},   // 10^236
   112		{0xbb764c4ca7a44410, 747, false},   // 10^244
   113		{0x8bab8eefb6409c1a, 774, false},   // 10^252
   114		{0xd01fef10a657842c, 800, false},   // 10^260
   115		{0x9b10a4e5e9913129, 827, false},   // 10^268
   116		{0xe7109bfba19c0c9d, 853, false},   // 10^276
   117		{0xac2820d9623bf429, 880, false},   // 10^284
   118		{0x80444b5e7aa7cf85, 907, false},   // 10^292
   119		{0xbf21e44003acdd2d, 933, false},   // 10^300
   120		{0x8e679c2f5e44ff8f, 960, false},   // 10^308
   121		{0xd433179d9c8cb841, 986, false},   // 10^316
   122		{0x9e19db92b4e31ba9, 1013, false},  // 10^324
   123		{0xeb96bf6ebadf77d9, 1039, false},  // 10^332
   124		{0xaf87023b9bf0ee6b, 1066, false},  // 10^340
   125	}
   126	
   127	// floatBits returns the bits of the float64 that best approximates
   128	// the extFloat passed as receiver. Overflow is set to true if
   129	// the resulting float64 is ±Inf.
   130	func (f *extFloat) floatBits() (bits uint64, overflow bool) {
   131		flt := &float64info
   132		f.Normalize()
   133	
   134		exp := f.exp + 63
   135	
   136		// Exponent too small.
   137		if exp < flt.bias+1 {
   138			n := flt.bias + 1 - exp
   139			f.mant >>= uint(n)
   140			exp += n
   141		}
   142	
   143		// Extract 1+flt.mantbits bits.
   144		mant := f.mant >> (63 - flt.mantbits)
   145		if f.mant&(1<<(62-flt.mantbits)) != 0 {
   146			// Round up.
   147			mant += 1
   148		}
   149	
   150		// Rounding might have added a bit; shift down.
   151		if mant == 2<<flt.mantbits {
   152			mant >>= 1
   153			exp++
   154		}
   155	
   156		// Infinities.
   157		if exp-flt.bias >= 1<<flt.expbits-1 {
   158			goto overflow
   159		}
   160	
   161		// Denormalized?
   162		if mant&(1<<flt.mantbits) == 0 {
   163			exp = flt.bias
   164		}
   165		goto out
   166	
   167	overflow:
   168		// ±Inf
   169		mant = 0
   170		exp = 1<<flt.expbits - 1 + flt.bias
   171		overflow = true
   172	
   173	out:
   174		// Assemble bits.
   175		bits = mant & (uint64(1)<<flt.mantbits - 1)
   176		bits |= uint64((exp-flt.bias)&(1<<flt.expbits-1)) << flt.mantbits
   177		if f.neg {
   178			bits |= 1 << (flt.mantbits + flt.expbits)
   179		}
   180		return
   181	}
   182	
   183	// Assign sets f to the value of x.
   184	func (f *extFloat) Assign(x float64) {
   185		if x < 0 {
   186			x = -x
   187			f.neg = true
   188		}
   189		x, f.exp = math.Frexp(x)
   190		f.mant = uint64(x * float64(1<<64))
   191		f.exp -= 64
   192	}
   193	
   194	// AssignComputeBounds sets f to the value of x and returns
   195	// lower, upper such that any number in the closed interval
   196	// [lower, upper] is converted back to x.
   197	func (f *extFloat) AssignComputeBounds(x float64) (lower, upper extFloat) {
   198		// Special cases.
   199		bits := math.Float64bits(x)
   200		flt := &float64info
   201		neg := bits>>(flt.expbits+flt.mantbits) != 0
   202		expBiased := int(bits>>flt.mantbits) & (1<<flt.expbits - 1)
   203		mant := bits & (uint64(1)<<flt.mantbits - 1)
   204	
   205		if expBiased == 0 {
   206			// denormalized.
   207			f.mant = mant
   208			f.exp = 1 + flt.bias - int(flt.mantbits)
   209		} else {
   210			f.mant = mant | 1<<flt.mantbits
   211			f.exp = expBiased + flt.bias - int(flt.mantbits)
   212		}
   213		f.neg = neg
   214	
   215		upper = extFloat{mant: 2*f.mant + 1, exp: f.exp - 1, neg: f.neg}
   216		if mant != 0 || expBiased == 1 {
   217			lower = extFloat{mant: 2*f.mant - 1, exp: f.exp - 1, neg: f.neg}
   218		} else {
   219			lower = extFloat{mant: 4*f.mant - 1, exp: f.exp - 2, neg: f.neg}
   220		}
   221		return
   222	}
   223	
   224	// Normalize normalizes f so that the highest bit of the mantissa is
   225	// set, and returns the number by which the mantissa was left-shifted.
   226	func (f *extFloat) Normalize() uint {
   227		if f.mant == 0 {
   228			return 0
   229		}
   230		exp_before := f.exp
   231		for f.mant < (1 << 55) {
   232			f.mant <<= 8
   233			f.exp -= 8
   234		}
   235		for f.mant < (1 << 63) {
   236			f.mant <<= 1
   237			f.exp -= 1
   238		}
   239		return uint(exp_before - f.exp)
   240	}
   241	
   242	// Multiply sets f to the product f*g: the result is correctly rounded,
   243	// but not normalized.
   244	func (f *extFloat) Multiply(g extFloat) {
   245		fhi, flo := f.mant>>32, uint64(uint32(f.mant))
   246		ghi, glo := g.mant>>32, uint64(uint32(g.mant))
   247	
   248		// Cross products.
   249		cross1 := fhi * glo
   250		cross2 := flo * ghi
   251	
   252		// f.mant*g.mant is fhi*ghi << 64 + (cross1+cross2) << 32 + flo*glo
   253		f.mant = fhi*ghi + (cross1 >> 32) + (cross2 >> 32)
   254		rem := uint64(uint32(cross1)) + uint64(uint32(cross2)) + ((flo * glo) >> 32)
   255		// Round up.
   256		rem += (1 << 31)
   257	
   258		f.mant += (rem >> 32)
   259		f.exp = f.exp + g.exp + 64
   260	}
   261	
   262	var uint64pow10 = [...]uint64{
   263		1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
   264		1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
   265	}
   266	
   267	// AssignDecimal sets f to an approximate value of the decimal d. It
   268	// returns true if the value represented by f is guaranteed to be the
   269	// best approximation of d after being rounded to a float64. 
   270	func (f *extFloat) AssignDecimal(d *decimal) (ok bool) {
   271		const uint64digits = 19
   272		const errorscale = 8
   273		mant10, digits := d.atou64()
   274		exp10 := d.dp - digits
   275		errors := 0 // An upper bound for error, computed in errorscale*ulp.
   276	
   277		if digits < d.nd {
   278			// the decimal number was truncated.
   279			errors += errorscale / 2
   280		}
   281	
   282		f.mant = mant10
   283		f.exp = 0
   284		f.neg = d.neg
   285	
   286		// Multiply by powers of ten.
   287		i := (exp10 - firstPowerOfTen) / stepPowerOfTen
   288		if exp10 < firstPowerOfTen || i >= len(powersOfTen) {
   289			return false
   290		}
   291		adjExp := (exp10 - firstPowerOfTen) % stepPowerOfTen
   292	
   293		// We multiply by exp%step
   294		if digits+adjExp <= uint64digits {
   295			// We can multiply the mantissa
   296			f.mant *= uint64(float64pow10[adjExp])
   297			f.Normalize()
   298		} else {
   299			f.Normalize()
   300			f.Multiply(smallPowersOfTen[adjExp])
   301			errors += errorscale / 2
   302		}
   303	
   304		// We multiply by 10 to the exp - exp%step.
   305		f.Multiply(powersOfTen[i])
   306		if errors > 0 {
   307			errors += 1
   308		}
   309		errors += errorscale / 2
   310	
   311		// Normalize
   312		shift := f.Normalize()
   313		errors <<= shift
   314	
   315		// Now f is a good approximation of the decimal.
   316		// Check whether the error is too large: that is, if the mantissa
   317		// is perturbated by the error, the resulting float64 will change.
   318		// The 64 bits mantissa is 1 + 52 bits for float64 + 11 extra bits.
   319		//
   320		// In many cases the approximation will be good enough.
   321		const denormalExp = -1023 - 63
   322		flt := &float64info
   323		var extrabits uint
   324		if f.exp <= denormalExp {
   325			extrabits = uint(63 - flt.mantbits + 1 + uint(denormalExp-f.exp))
   326		} else {
   327			extrabits = uint(63 - flt.mantbits)
   328		}
   329	
   330		halfway := uint64(1) << (extrabits - 1)
   331		mant_extra := f.mant & (1<<extrabits - 1)
   332	
   333		// Do a signed comparison here! If the error estimate could make
   334		// the mantissa round differently for the conversion to double,
   335		// then we can't give a definite answer.
   336		if int64(halfway)-int64(errors) < int64(mant_extra) &&
   337			int64(mant_extra) < int64(halfway)+int64(errors) {
   338			return false
   339		}
   340		return true
   341	}
   342	
   343	// Frexp10 is an analogue of math.Frexp for decimal powers. It scales
   344	// f by an approximate power of ten 10^-exp, and returns exp10, so
   345	// that f*10^exp10 has the same value as the old f, up to an ulp,
   346	// as well as the index of 10^-exp in the powersOfTen table.
   347	// The arguments expMin and expMax constrain the final value of the
   348	// binary exponent of f.
   349	func (f *extFloat) frexp10(expMin, expMax int) (exp10, index int) {
   350		// it is illegal to call this function with a too restrictive exponent range.
   351		if expMax-expMin <= 25 {
   352			panic("strconv: invalid exponent range")
   353		}
   354		// Find power of ten such that x * 10^n has a binary exponent
   355		// between expMin and expMax
   356		approxExp10 := -(f.exp + 100) * 28 / 93 // log(10)/log(2) is close to 93/28.
   357		i := (approxExp10 - firstPowerOfTen) / stepPowerOfTen
   358	Loop:
   359		for {
   360			exp := f.exp + powersOfTen[i].exp + 64
   361			switch {
   362			case exp < expMin:
   363				i++
   364			case exp > expMax:
   365				i--
   366			default:
   367				break Loop
   368			}
   369		}
   370		// Apply the desired decimal shift on f. It will have exponent
   371		// in the desired range. This is multiplication by 10^-exp10.
   372		f.Multiply(powersOfTen[i])
   373	
   374		return -(firstPowerOfTen + i*stepPowerOfTen), i
   375	}
   376	
   377	// frexp10Many applies a common shift by a power of ten to a, b, c.
   378	func frexp10Many(expMin, expMax int, a, b, c *extFloat) (exp10 int) {
   379		exp10, i := c.frexp10(expMin, expMax)
   380		a.Multiply(powersOfTen[i])
   381		b.Multiply(powersOfTen[i])
   382		return
   383	}
   384	
   385	// ShortestDecimal stores in d the shortest decimal representation of f
   386	// which belongs to the open interval (lower, upper), where f is supposed
   387	// to lie. It returns false whenever the result is unsure. The implementation
   388	// uses the Grisu3 algorithm.
   389	func (f *extFloat) ShortestDecimal(d *decimal, lower, upper *extFloat) bool {
   390		if f.mant == 0 {
   391			d.d[0] = '0'
   392			d.nd = 1
   393			d.dp = 0
   394			d.neg = f.neg
   395		}
   396		const minExp = -60
   397		const maxExp = -32
   398		upper.Normalize()
   399		// Uniformize exponents.
   400		if f.exp > upper.exp {
   401			f.mant <<= uint(f.exp - upper.exp)
   402			f.exp = upper.exp
   403		}
   404		if lower.exp > upper.exp {
   405			lower.mant <<= uint(lower.exp - upper.exp)
   406			lower.exp = upper.exp
   407		}
   408	
   409		exp10 := frexp10Many(minExp, maxExp, lower, f, upper)
   410		// Take a safety margin due to rounding in frexp10Many, but we lose precision.
   411		upper.mant++
   412		lower.mant--
   413	
   414		// The shortest representation of f is either rounded up or down, but
   415		// in any case, it is a truncation of upper.
   416		shift := uint(-upper.exp)
   417		integer := uint32(upper.mant >> shift)
   418		fraction := upper.mant - (uint64(integer) << shift)
   419	
   420		// How far we can go down from upper until the result is wrong.
   421		allowance := upper.mant - lower.mant
   422		// How far we should go to get a very precise result.
   423		targetDiff := upper.mant - f.mant
   424	
   425		// Count integral digits: there are at most 10.
   426		var integerDigits int
   427		for i, pow := range uint64pow10 {
   428			if uint64(integer) >= pow {
   429				integerDigits = i + 1
   430			}
   431		}
   432		for i := 0; i < integerDigits; i++ {
   433			pow := uint64pow10[integerDigits-i-1]
   434			digit := integer / uint32(pow)
   435			d.d[i] = byte(digit + '0')
   436			integer -= digit * uint32(pow)
   437			// evaluate whether we should stop.
   438			if currentDiff := uint64(integer)<<shift + fraction; currentDiff < allowance {
   439				d.nd = i + 1
   440				d.dp = integerDigits + exp10
   441				d.neg = f.neg
   442				// Sometimes allowance is so large the last digit might need to be
   443				// decremented to get closer to f.
   444				return adjustLastDigit(d, currentDiff, targetDiff, allowance, pow<<shift, 2)
   445			}
   446		}
   447		d.nd = integerDigits
   448		d.dp = d.nd + exp10
   449		d.neg = f.neg
   450	
   451		// Compute digits of the fractional part. At each step fraction does not
   452		// overflow. The choice of minExp implies that fraction is less than 2^60.
   453		var digit int
   454		multiplier := uint64(1)
   455		for {
   456			fraction *= 10
   457			multiplier *= 10
   458			digit = int(fraction >> shift)
   459			d.d[d.nd] = byte(digit + '0')
   460			d.nd++
   461			fraction -= uint64(digit) << shift
   462			if fraction < allowance*multiplier {
   463				// We are in the admissible range. Note that if allowance is about to
   464				// overflow, that is, allowance > 2^64/10, the condition is automatically
   465				// true due to the limited range of fraction.
   466				return adjustLastDigit(d,
   467					fraction, targetDiff*multiplier, allowance*multiplier,
   468					1<<shift, multiplier*2)
   469			}
   470		}
   471		return false
   472	}
   473	
   474	// adjustLastDigit modifies d = x-currentDiff*ε, to get closest to 
   475	// d = x-targetDiff*ε, without becoming smaller than x-maxDiff*ε.
   476	// It assumes that a decimal digit is worth ulpDecimal*ε, and that
   477	// all data is known with a error estimate of ulpBinary*ε.
   478	func adjustLastDigit(d *decimal, currentDiff, targetDiff, maxDiff, ulpDecimal, ulpBinary uint64) bool {
   479		if ulpDecimal < 2*ulpBinary {
   480			// Approximation is too wide.
   481			return false
   482		}
   483		for currentDiff+ulpDecimal/2+ulpBinary < targetDiff {
   484			d.d[d.nd-1]--
   485			currentDiff += ulpDecimal
   486		}
   487		if currentDiff+ulpDecimal <= targetDiff+ulpDecimal/2+ulpBinary {
   488			// we have two choices, and don't know what to do.
   489			return false
   490		}
   491		if currentDiff < ulpBinary || currentDiff > maxDiff-ulpBinary {
   492			// we went too far
   493			return false
   494		}
   495		if d.nd == 1 && d.d[0] == '0' {
   496			// the number has actually reached zero.
   497			d.nd = 0
   498			d.dp = 0
   499		}
   500		return true
   501	}
previous page start next page