src/pkg/math/expm1.go - The Go Programming Language

Golang

previous page next page
Source file src/pkg/math/expm1.go

     1	// Copyright 2010 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package math
     6	
     7	// The original C code, the long comment, and the constants
     8	// below are from FreeBSD's /usr/src/lib/msun/src/s_expm1.c
     9	// and came with this notice.  The go code is a simplified
    10	// version of the original C.
    11	//
    12	// ====================================================
    13	// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
    14	//
    15	// Developed at SunPro, a Sun Microsystems, Inc. business.
    16	// Permission to use, copy, modify, and distribute this
    17	// software is freely granted, provided that this notice
    18	// is preserved.
    19	// ====================================================
    20	//
    21	// expm1(x)
    22	// Returns exp(x)-1, the exponential of x minus 1.
    23	//
    24	// Method
    25	//   1. Argument reduction:
    26	//      Given x, find r and integer k such that
    27	//
    28	//               x = k*ln2 + r,  |r| <= 0.5*ln2 ~ 0.34658
    29	//
    30	//      Here a correction term c will be computed to compensate
    31	//      the error in r when rounded to a floating-point number.
    32	//
    33	//   2. Approximating expm1(r) by a special rational function on
    34	//      the interval [0,0.34658]:
    35	//      Since
    36	//          r*(exp(r)+1)/(exp(r)-1) = 2+ r**2/6 - r**4/360 + ...
    37	//      we define R1(r*r) by
    38	//          r*(exp(r)+1)/(exp(r)-1) = 2+ r**2/6 * R1(r*r)
    39	//      That is,
    40	//          R1(r**2) = 6/r *((exp(r)+1)/(exp(r)-1) - 2/r)
    41	//                   = 6/r * ( 1 + 2.0*(1/(exp(r)-1) - 1/r))
    42	//                   = 1 - r**2/60 + r**4/2520 - r**6/100800 + ...
    43	//      We use a special Reme algorithm on [0,0.347] to generate
    44	//      a polynomial of degree 5 in r*r to approximate R1. The
    45	//      maximum error of this polynomial approximation is bounded
    46	//      by 2**-61. In other words,
    47	//          R1(z) ~ 1.0 + Q1*z + Q2*z**2 + Q3*z**3 + Q4*z**4 + Q5*z**5
    48	//      where   Q1  =  -1.6666666666666567384E-2,
    49	//              Q2  =   3.9682539681370365873E-4,
    50	//              Q3  =  -9.9206344733435987357E-6,
    51	//              Q4  =   2.5051361420808517002E-7,
    52	//              Q5  =  -6.2843505682382617102E-9;
    53	//      (where z=r*r, and the values of Q1 to Q5 are listed below)
    54	//      with error bounded by
    55	//          |                  5           |     -61
    56	//          | 1.0+Q1*z+...+Q5*z   -  R1(z) | <= 2
    57	//          |                              |
    58	//
    59	//      expm1(r) = exp(r)-1 is then computed by the following
    60	//      specific way which minimize the accumulation rounding error:
    61	//                             2     3
    62	//                            r     r    [ 3 - (R1 + R1*r/2)  ]
    63	//            expm1(r) = r + --- + --- * [--------------------]
    64	//                            2     2    [ 6 - r*(3 - R1*r/2) ]
    65	//
    66	//      To compensate the error in the argument reduction, we use
    67	//              expm1(r+c) = expm1(r) + c + expm1(r)*c
    68	//                         ~ expm1(r) + c + r*c
    69	//      Thus c+r*c will be added in as the correction terms for
    70	//      expm1(r+c). Now rearrange the term to avoid optimization
    71	//      screw up:
    72	//                      (      2                                    2 )
    73	//                      ({  ( r    [ R1 -  (3 - R1*r/2) ]  )  }    r  )
    74	//       expm1(r+c)~r - ({r*(--- * [--------------------]-c)-c} - --- )
    75	//                      ({  ( 2    [ 6 - r*(3 - R1*r/2) ]  )  }    2  )
    76	//                      (                                             )
    77	//
    78	//                 = r - E
    79	//   3. Scale back to obtain expm1(x):
    80	//      From step 1, we have
    81	//         expm1(x) = either 2**k*[expm1(r)+1] - 1
    82	//                  = or     2**k*[expm1(r) + (1-2**-k)]
    83	//   4. Implementation notes:
    84	//      (A). To save one multiplication, we scale the coefficient Qi
    85	//           to Qi*2**i, and replace z by (x**2)/2.
    86	//      (B). To achieve maximum accuracy, we compute expm1(x) by
    87	//        (i)   if x < -56*ln2, return -1.0, (raise inexact if x!=inf)
    88	//        (ii)  if k=0, return r-E
    89	//        (iii) if k=-1, return 0.5*(r-E)-0.5
    90	//        (iv)  if k=1 if r < -0.25, return 2*((r+0.5)- E)
    91	//                     else          return  1.0+2.0*(r-E);
    92	//        (v)   if (k<-2||k>56) return 2**k(1-(E-r)) - 1 (or exp(x)-1)
    93	//        (vi)  if k <= 20, return 2**k((1-2**-k)-(E-r)), else
    94	//        (vii) return 2**k(1-((E+2**-k)-r))
    95	//
    96	// Special cases:
    97	//      expm1(INF) is INF, expm1(NaN) is NaN;
    98	//      expm1(-INF) is -1, and
    99	//      for finite argument, only expm1(0)=0 is exact.
   100	//
   101	// Accuracy:
   102	//      according to an error analysis, the error is always less than
   103	//      1 ulp (unit in the last place).
   104	//
   105	// Misc. info.
   106	//      For IEEE double
   107	//          if x >  7.09782712893383973096e+02 then expm1(x) overflow
   108	//
   109	// Constants:
   110	// The hexadecimal values are the intended ones for the following
   111	// constants. The decimal values may be used, provided that the
   112	// compiler will convert from decimal to binary accurately enough
   113	// to produce the hexadecimal values shown.
   114	//
   115	
   116	// Expm1 returns e**x - 1, the base-e exponential of x minus 1.
   117	// It is more accurate than Exp(x) - 1 when x is near zero.
   118	//
   119	// Special cases are:
   120	//	Expm1(+Inf) = +Inf
   121	//	Expm1(-Inf) = -1
   122	//	Expm1(NaN) = NaN
   123	// Very large values overflow to -1 or +Inf.
   124	func Expm1(x float64) float64
   125	
   126	func expm1(x float64) float64 {
   127		const (
   128			Othreshold = 7.09782712893383973096e+02 // 0x40862E42FEFA39EF
   129			Ln2X56     = 3.88162421113569373274e+01 // 0x4043687a9f1af2b1
   130			Ln2HalfX3  = 1.03972077083991796413e+00 // 0x3ff0a2b23f3bab73
   131			Ln2Half    = 3.46573590279972654709e-01 // 0x3fd62e42fefa39ef
   132			Ln2Hi      = 6.93147180369123816490e-01 // 0x3fe62e42fee00000
   133			Ln2Lo      = 1.90821492927058770002e-10 // 0x3dea39ef35793c76
   134			InvLn2     = 1.44269504088896338700e+00 // 0x3ff71547652b82fe
   135			Tiny       = 1.0 / (1 << 54)            // 2**-54 = 0x3c90000000000000
   136			// scaled coefficients related to expm1
   137			Q1 = -3.33333333333331316428e-02 // 0xBFA11111111110F4
   138			Q2 = 1.58730158725481460165e-03  // 0x3F5A01A019FE5585
   139			Q3 = -7.93650757867487942473e-05 // 0xBF14CE199EAADBB7
   140			Q4 = 4.00821782732936239552e-06  // 0x3ED0CFCA86E65239
   141			Q5 = -2.01099218183624371326e-07 // 0xBE8AFDB76E09C32D
   142		)
   143	
   144		// special cases
   145		switch {
   146		case IsInf(x, 1) || IsNaN(x):
   147			return x
   148		case IsInf(x, -1):
   149			return -1
   150		}
   151	
   152		absx := x
   153		sign := false
   154		if x < 0 {
   155			absx = -absx
   156			sign = true
   157		}
   158	
   159		// filter out huge argument
   160		if absx >= Ln2X56 { // if |x| >= 56 * ln2
   161			if absx >= Othreshold { // if |x| >= 709.78...
   162				return Inf(1) // overflow
   163			}
   164			if sign {
   165				return -1 // x < -56*ln2, return -1.0
   166			}
   167		}
   168	
   169		// argument reduction
   170		var c float64
   171		var k int
   172		if absx > Ln2Half { // if  |x| > 0.5 * ln2
   173			var hi, lo float64
   174			if absx < Ln2HalfX3 { // and |x| < 1.5 * ln2
   175				if !sign {
   176					hi = x - Ln2Hi
   177					lo = Ln2Lo
   178					k = 1
   179				} else {
   180					hi = x + Ln2Hi
   181					lo = -Ln2Lo
   182					k = -1
   183				}
   184			} else {
   185				if !sign {
   186					k = int(InvLn2*x + 0.5)
   187				} else {
   188					k = int(InvLn2*x - 0.5)
   189				}
   190				t := float64(k)
   191				hi = x - t*Ln2Hi // t * Ln2Hi is exact here
   192				lo = t * Ln2Lo
   193			}
   194			x = hi - lo
   195			c = (hi - x) - lo
   196		} else if absx < Tiny { // when |x| < 2**-54, return x
   197			return x
   198		} else {
   199			k = 0
   200		}
   201	
   202		// x is now in primary range
   203		hfx := 0.5 * x
   204		hxs := x * hfx
   205		r1 := 1 + hxs*(Q1+hxs*(Q2+hxs*(Q3+hxs*(Q4+hxs*Q5))))
   206		t := 3 - r1*hfx
   207		e := hxs * ((r1 - t) / (6.0 - x*t))
   208		if k != 0 {
   209			e = (x*(e-c) - c)
   210			e -= hxs
   211			switch {
   212			case k == -1:
   213				return 0.5*(x-e) - 0.5
   214			case k == 1:
   215				if x < -0.25 {
   216					return -2 * (e - (x + 0.5))
   217				}
   218				return 1 + 2*(x-e)
   219			case k <= -2 || k > 56: // suffice to return exp(x)-1
   220				y := 1 - (e - x)
   221				y = Float64frombits(Float64bits(y) + uint64(k)<<52) // add k to y's exponent
   222				return y - 1
   223			}
   224			if k < 20 {
   225				t := Float64frombits(0x3ff0000000000000 - (0x20000000000000 >> uint(k))) // t=1-2**-k
   226				y := t - (e - x)
   227				y = Float64frombits(Float64bits(y) + uint64(k)<<52) // add k to y's exponent
   228				return y
   229			}
   230			t := Float64frombits(uint64((0x3ff - k) << 52)) // 2**-k
   231			y := x - (e + t)
   232			y += 1
   233			y = Float64frombits(Float64bits(y) + uint64(k)<<52) // add k to y's exponent
   234			return y
   235		}
   236		return x - (x*e - hxs) // c is 0
   237	}
previous page start next page