src/pkg/math/big/nat.go - The Go Programming Language

Golang

previous page next page
Source file src/pkg/math/big/nat.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Package big implements multi-precision arithmetic (big numbers).
     6	// The following numeric types are supported:
     7	//
     8	//	- Int	signed integers
     9	//	- Rat	rational numbers
    10	//
    11	// Methods are typically of the form:
    12	//
    13	//	func (z *Int) Op(x, y *Int) *Int	(similar for *Rat)
    14	//
    15	// and implement operations z = x Op y with the result as receiver; if it
    16	// is one of the operands it may be overwritten (and its memory reused).
    17	// To enable chaining of operations, the result is also returned. Methods
    18	// returning a result other than *Int or *Rat take one of the operands as
    19	// the receiver.
    20	//
    21	package big
    22	
    23	// This file contains operations on unsigned multi-precision integers.
    24	// These are the building blocks for the operations on signed integers
    25	// and rationals.
    26	
    27	import (
    28		"errors"
    29		"io"
    30		"math"
    31		"math/rand"
    32		"sync"
    33	)
    34	
    35	// An unsigned integer x of the form
    36	//
    37	//   x = x[n-1]*_B^(n-1) + x[n-2]*_B^(n-2) + ... + x[1]*_B + x[0]
    38	//
    39	// with 0 <= x[i] < _B and 0 <= i < n is stored in a slice of length n,
    40	// with the digits x[i] as the slice elements.
    41	//
    42	// A number is normalized if the slice contains no leading 0 digits.
    43	// During arithmetic operations, denormalized values may occur but are
    44	// always normalized before returning the final result. The normalized
    45	// representation of 0 is the empty or nil slice (length = 0).
    46	//
    47	type nat []Word
    48	
    49	var (
    50		natOne = nat{1}
    51		natTwo = nat{2}
    52		natTen = nat{10}
    53	)
    54	
    55	func (z nat) clear() {
    56		for i := range z {
    57			z[i] = 0
    58		}
    59	}
    60	
    61	func (z nat) norm() nat {
    62		i := len(z)
    63		for i > 0 && z[i-1] == 0 {
    64			i--
    65		}
    66		return z[0:i]
    67	}
    68	
    69	func (z nat) make(n int) nat {
    70		if n <= cap(z) {
    71			return z[0:n] // reuse z
    72		}
    73		// Choosing a good value for e has significant performance impact
    74		// because it increases the chance that a value can be reused.
    75		const e = 4 // extra capacity
    76		return make(nat, n, n+e)
    77	}
    78	
    79	func (z nat) setWord(x Word) nat {
    80		if x == 0 {
    81			return z.make(0)
    82		}
    83		z = z.make(1)
    84		z[0] = x
    85		return z
    86	}
    87	
    88	func (z nat) setUint64(x uint64) nat {
    89		// single-digit values
    90		if w := Word(x); uint64(w) == x {
    91			return z.setWord(w)
    92		}
    93	
    94		// compute number of words n required to represent x
    95		n := 0
    96		for t := x; t > 0; t >>= _W {
    97			n++
    98		}
    99	
   100		// split x into n words
   101		z = z.make(n)
   102		for i := range z {
   103			z[i] = Word(x & _M)
   104			x >>= _W
   105		}
   106	
   107		return z
   108	}
   109	
   110	func (z nat) set(x nat) nat {
   111		z = z.make(len(x))
   112		copy(z, x)
   113		return z
   114	}
   115	
   116	func (z nat) add(x, y nat) nat {
   117		m := len(x)
   118		n := len(y)
   119	
   120		switch {
   121		case m < n:
   122			return z.add(y, x)
   123		case m == 0:
   124			// n == 0 because m >= n; result is 0
   125			return z.make(0)
   126		case n == 0:
   127			// result is x
   128			return z.set(x)
   129		}
   130		// m > 0
   131	
   132		z = z.make(m + 1)
   133		c := addVV(z[0:n], x, y)
   134		if m > n {
   135			c = addVW(z[n:m], x[n:], c)
   136		}
   137		z[m] = c
   138	
   139		return z.norm()
   140	}
   141	
   142	func (z nat) sub(x, y nat) nat {
   143		m := len(x)
   144		n := len(y)
   145	
   146		switch {
   147		case m < n:
   148			panic("underflow")
   149		case m == 0:
   150			// n == 0 because m >= n; result is 0
   151			return z.make(0)
   152		case n == 0:
   153			// result is x
   154			return z.set(x)
   155		}
   156		// m > 0
   157	
   158		z = z.make(m)
   159		c := subVV(z[0:n], x, y)
   160		if m > n {
   161			c = subVW(z[n:], x[n:], c)
   162		}
   163		if c != 0 {
   164			panic("underflow")
   165		}
   166	
   167		return z.norm()
   168	}
   169	
   170	func (x nat) cmp(y nat) (r int) {
   171		m := len(x)
   172		n := len(y)
   173		if m != n || m == 0 {
   174			switch {
   175			case m < n:
   176				r = -1
   177			case m > n:
   178				r = 1
   179			}
   180			return
   181		}
   182	
   183		i := m - 1
   184		for i > 0 && x[i] == y[i] {
   185			i--
   186		}
   187	
   188		switch {
   189		case x[i] < y[i]:
   190			r = -1
   191		case x[i] > y[i]:
   192			r = 1
   193		}
   194		return
   195	}
   196	
   197	func (z nat) mulAddWW(x nat, y, r Word) nat {
   198		m := len(x)
   199		if m == 0 || y == 0 {
   200			return z.setWord(r) // result is r
   201		}
   202		// m > 0
   203	
   204		z = z.make(m + 1)
   205		z[m] = mulAddVWW(z[0:m], x, y, r)
   206	
   207		return z.norm()
   208	}
   209	
   210	// basicMul multiplies x and y and leaves the result in z.
   211	// The (non-normalized) result is placed in z[0 : len(x) + len(y)].
   212	func basicMul(z, x, y nat) {
   213		z[0 : len(x)+len(y)].clear() // initialize z
   214		for i, d := range y {
   215			if d != 0 {
   216				z[len(x)+i] = addMulVVW(z[i:i+len(x)], x, d)
   217			}
   218		}
   219	}
   220	
   221	// Fast version of z[0:n+n>>1].add(z[0:n+n>>1], x[0:n]) w/o bounds checks.
   222	// Factored out for readability - do not use outside karatsuba.
   223	func karatsubaAdd(z, x nat, n int) {
   224		if c := addVV(z[0:n], z, x); c != 0 {
   225			addVW(z[n:n+n>>1], z[n:], c)
   226		}
   227	}
   228	
   229	// Like karatsubaAdd, but does subtract.
   230	func karatsubaSub(z, x nat, n int) {
   231		if c := subVV(z[0:n], z, x); c != 0 {
   232			subVW(z[n:n+n>>1], z[n:], c)
   233		}
   234	}
   235	
   236	// Operands that are shorter than karatsubaThreshold are multiplied using
   237	// "grade school" multiplication; for longer operands the Karatsuba algorithm
   238	// is used.
   239	var karatsubaThreshold int = 32 // computed by calibrate.go
   240	
   241	// karatsuba multiplies x and y and leaves the result in z.
   242	// Both x and y must have the same length n and n must be a
   243	// power of 2. The result vector z must have len(z) >= 6*n.
   244	// The (non-normalized) result is placed in z[0 : 2*n].
   245	func karatsuba(z, x, y nat) {
   246		n := len(y)
   247	
   248		// Switch to basic multiplication if numbers are odd or small.
   249		// (n is always even if karatsubaThreshold is even, but be
   250		// conservative)
   251		if n&1 != 0 || n < karatsubaThreshold || n < 2 {
   252			basicMul(z, x, y)
   253			return
   254		}
   255		// n&1 == 0 && n >= karatsubaThreshold && n >= 2
   256	
   257		// Karatsuba multiplication is based on the observation that
   258		// for two numbers x and y with:
   259		//
   260		//   x = x1*b + x0
   261		//   y = y1*b + y0
   262		//
   263		// the product x*y can be obtained with 3 products z2, z1, z0
   264		// instead of 4:
   265		//
   266		//   x*y = x1*y1*b*b + (x1*y0 + x0*y1)*b + x0*y0
   267		//       =    z2*b*b +              z1*b +    z0
   268		//
   269		// with:
   270		//
   271		//   xd = x1 - x0
   272		//   yd = y0 - y1
   273		//
   274		//   z1 =      xd*yd                    + z1 + z0
   275		//      = (x1-x0)*(y0 - y1)             + z1 + z0
   276		//      = x1*y0 - x1*y1 - x0*y0 + x0*y1 + z1 + z0
   277		//      = x1*y0 -    z1 -    z0 + x0*y1 + z1 + z0
   278		//      = x1*y0                 + x0*y1
   279	
   280		// split x, y into "digits"
   281		n2 := n >> 1              // n2 >= 1
   282		x1, x0 := x[n2:], x[0:n2] // x = x1*b + y0
   283		y1, y0 := y[n2:], y[0:n2] // y = y1*b + y0
   284	
   285		// z is used for the result and temporary storage:
   286		//
   287		//   6*n     5*n     4*n     3*n     2*n     1*n     0*n
   288		// z = [z2 copy|z0 copy| xd*yd | yd:xd | x1*y1 | x0*y0 ]
   289		//
   290		// For each recursive call of karatsuba, an unused slice of
   291		// z is passed in that has (at least) half the length of the
   292		// caller's z.
   293	
   294		// compute z0 and z2 with the result "in place" in z
   295		karatsuba(z, x0, y0)     // z0 = x0*y0
   296		karatsuba(z[n:], x1, y1) // z2 = x1*y1
   297	
   298		// compute xd (or the negative value if underflow occurs)
   299		s := 1 // sign of product xd*yd
   300		xd := z[2*n : 2*n+n2]
   301		if subVV(xd, x1, x0) != 0 { // x1-x0
   302			s = -s
   303			subVV(xd, x0, x1) // x0-x1
   304		}
   305	
   306		// compute yd (or the negative value if underflow occurs)
   307		yd := z[2*n+n2 : 3*n]
   308		if subVV(yd, y0, y1) != 0 { // y0-y1
   309			s = -s
   310			subVV(yd, y1, y0) // y1-y0
   311		}
   312	
   313		// p = (x1-x0)*(y0-y1) == x1*y0 - x1*y1 - x0*y0 + x0*y1 for s > 0
   314		// p = (x0-x1)*(y0-y1) == x0*y0 - x0*y1 - x1*y0 + x1*y1 for s < 0
   315		p := z[n*3:]
   316		karatsuba(p, xd, yd)
   317	
   318		// save original z2:z0
   319		// (ok to use upper half of z since we're done recursing)
   320		r := z[n*4:]
   321		copy(r, z)
   322	
   323		// add up all partial products
   324		//
   325		//   2*n     n     0
   326		// z = [ z2  | z0  ]
   327		//   +    [ z0  ]
   328		//   +    [ z2  ]
   329		//   +    [  p  ]
   330		//
   331		karatsubaAdd(z[n2:], r, n)
   332		karatsubaAdd(z[n2:], r[n:], n)
   333		if s > 0 {
   334			karatsubaAdd(z[n2:], p, n)
   335		} else {
   336			karatsubaSub(z[n2:], p, n)
   337		}
   338	}
   339	
   340	// alias returns true if x and y share the same base array.
   341	func alias(x, y nat) bool {
   342		return cap(x) > 0 && cap(y) > 0 && &x[0:cap(x)][cap(x)-1] == &y[0:cap(y)][cap(y)-1]
   343	}
   344	
   345	// addAt implements z += x*(1<<(_W*i)); z must be long enough.
   346	// (we don't use nat.add because we need z to stay the same
   347	// slice, and we don't need to normalize z after each addition)
   348	func addAt(z, x nat, i int) {
   349		if n := len(x); n > 0 {
   350			if c := addVV(z[i:i+n], z[i:], x); c != 0 {
   351				j := i + n
   352				if j < len(z) {
   353					addVW(z[j:], z[j:], c)
   354				}
   355			}
   356		}
   357	}
   358	
   359	func max(x, y int) int {
   360		if x > y {
   361			return x
   362		}
   363		return y
   364	}
   365	
   366	// karatsubaLen computes an approximation to the maximum k <= n such that
   367	// k = p<<i for a number p <= karatsubaThreshold and an i >= 0. Thus, the
   368	// result is the largest number that can be divided repeatedly by 2 before
   369	// becoming about the value of karatsubaThreshold.
   370	func karatsubaLen(n int) int {
   371		i := uint(0)
   372		for n > karatsubaThreshold {
   373			n >>= 1
   374			i++
   375		}
   376		return n << i
   377	}
   378	
   379	func (z nat) mul(x, y nat) nat {
   380		m := len(x)
   381		n := len(y)
   382	
   383		switch {
   384		case m < n:
   385			return z.mul(y, x)
   386		case m == 0 || n == 0:
   387			return z.make(0)
   388		case n == 1:
   389			return z.mulAddWW(x, y[0], 0)
   390		}
   391		// m >= n > 1
   392	
   393		// determine if z can be reused
   394		if alias(z, x) || alias(z, y) {
   395			z = nil // z is an alias for x or y - cannot reuse
   396		}
   397	
   398		// use basic multiplication if the numbers are small
   399		if n < karatsubaThreshold || n < 2 {
   400			z = z.make(m + n)
   401			basicMul(z, x, y)
   402			return z.norm()
   403		}
   404		// m >= n && n >= karatsubaThreshold && n >= 2
   405	
   406		// determine Karatsuba length k such that
   407		//
   408		//   x = x1*b + x0
   409		//   y = y1*b + y0  (and k <= len(y), which implies k <= len(x))
   410		//   b = 1<<(_W*k)  ("base" of digits xi, yi)
   411		//
   412		k := karatsubaLen(n)
   413		// k <= n
   414	
   415		// multiply x0 and y0 via Karatsuba
   416		x0 := x[0:k]              // x0 is not normalized
   417		y0 := y[0:k]              // y0 is not normalized
   418		z = z.make(max(6*k, m+n)) // enough space for karatsuba of x0*y0 and full result of x*y
   419		karatsuba(z, x0, y0)
   420		z = z[0 : m+n] // z has final length but may be incomplete, upper portion is garbage
   421	
   422		// If x1 and/or y1 are not 0, add missing terms to z explicitly:
   423		//
   424		//     m+n       2*k       0
   425		//   z = [   ...   | x0*y0 ]
   426		//     +   [ x1*y1 ]
   427		//     +   [ x1*y0 ]
   428		//     +   [ x0*y1 ]
   429		//
   430		if k < n || m != n {
   431			x1 := x[k:] // x1 is normalized because x is
   432			y1 := y[k:] // y1 is normalized because y is
   433			var t nat
   434			t = t.mul(x1, y1)
   435			copy(z[2*k:], t)
   436			z[2*k+len(t):].clear() // upper portion of z is garbage
   437			t = t.mul(x1, y0.norm())
   438			addAt(z, t, k)
   439			t = t.mul(x0.norm(), y1)
   440			addAt(z, t, k)
   441		}
   442	
   443		return z.norm()
   444	}
   445	
   446	// mulRange computes the product of all the unsigned integers in the
   447	// range [a, b] inclusively. If a > b (empty range), the result is 1.
   448	func (z nat) mulRange(a, b uint64) nat {
   449		switch {
   450		case a == 0:
   451			// cut long ranges short (optimization)
   452			return z.setUint64(0)
   453		case a > b:
   454			return z.setUint64(1)
   455		case a == b:
   456			return z.setUint64(a)
   457		case a+1 == b:
   458			return z.mul(nat(nil).setUint64(a), nat(nil).setUint64(b))
   459		}
   460		m := (a + b) / 2
   461		return z.mul(nat(nil).mulRange(a, m), nat(nil).mulRange(m+1, b))
   462	}
   463	
   464	// q = (x-r)/y, with 0 <= r < y
   465	func (z nat) divW(x nat, y Word) (q nat, r Word) {
   466		m := len(x)
   467		switch {
   468		case y == 0:
   469			panic("division by zero")
   470		case y == 1:
   471			q = z.set(x) // result is x
   472			return
   473		case m == 0:
   474			q = z.make(0) // result is 0
   475			return
   476		}
   477		// m > 0
   478		z = z.make(m)
   479		r = divWVW(z, 0, x, y)
   480		q = z.norm()
   481		return
   482	}
   483	
   484	func (z nat) div(z2, u, v nat) (q, r nat) {
   485		if len(v) == 0 {
   486			panic("division by zero")
   487		}
   488	
   489		if u.cmp(v) < 0 {
   490			q = z.make(0)
   491			r = z2.set(u)
   492			return
   493		}
   494	
   495		if len(v) == 1 {
   496			var rprime Word
   497			q, rprime = z.divW(u, v[0])
   498			if rprime > 0 {
   499				r = z2.make(1)
   500				r[0] = rprime
   501			} else {
   502				r = z2.make(0)
   503			}
   504			return
   505		}
   506	
   507		q, r = z.divLarge(z2, u, v)
   508		return
   509	}
   510	
   511	// q = (uIn-r)/v, with 0 <= r < y
   512	// Uses z as storage for q, and u as storage for r if possible.
   513	// See Knuth, Volume 2, section 4.3.1, Algorithm D.
   514	// Preconditions:
   515	//    len(v) >= 2
   516	//    len(uIn) >= len(v)
   517	func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
   518		n := len(v)
   519		m := len(uIn) - n
   520	
   521		// determine if z can be reused
   522		// TODO(gri) should find a better solution - this if statement
   523		//           is very costly (see e.g. time pidigits -s -n 10000)
   524		if alias(z, uIn) || alias(z, v) {
   525			z = nil // z is an alias for uIn or v - cannot reuse
   526		}
   527		q = z.make(m + 1)
   528	
   529		qhatv := make(nat, n+1)
   530		if alias(u, uIn) || alias(u, v) {
   531			u = nil // u is an alias for uIn or v - cannot reuse
   532		}
   533		u = u.make(len(uIn) + 1)
   534		u.clear()
   535	
   536		// D1.
   537		shift := leadingZeros(v[n-1])
   538		if shift > 0 {
   539			// do not modify v, it may be used by another goroutine simultaneously
   540			v1 := make(nat, n)
   541			shlVU(v1, v, shift)
   542			v = v1
   543		}
   544		u[len(uIn)] = shlVU(u[0:len(uIn)], uIn, shift)
   545	
   546		// D2.
   547		for j := m; j >= 0; j-- {
   548			// D3.
   549			qhat := Word(_M)
   550			if u[j+n] != v[n-1] {
   551				var rhat Word
   552				qhat, rhat = divWW(u[j+n], u[j+n-1], v[n-1])
   553	
   554				// x1 | x2 = q̂v_{n-2}
   555				x1, x2 := mulWW(qhat, v[n-2])
   556				// test if q̂v_{n-2} > br̂ + u_{j+n-2}
   557				for greaterThan(x1, x2, rhat, u[j+n-2]) {
   558					qhat--
   559					prevRhat := rhat
   560					rhat += v[n-1]
   561					// v[n-1] >= 0, so this tests for overflow.
   562					if rhat < prevRhat {
   563						break
   564					}
   565					x1, x2 = mulWW(qhat, v[n-2])
   566				}
   567			}
   568	
   569			// D4.
   570			qhatv[n] = mulAddVWW(qhatv[0:n], v, qhat, 0)
   571	
   572			c := subVV(u[j:j+len(qhatv)], u[j:], qhatv)
   573			if c != 0 {
   574				c := addVV(u[j:j+n], u[j:], v)
   575				u[j+n] += c
   576				qhat--
   577			}
   578	
   579			q[j] = qhat
   580		}
   581	
   582		q = q.norm()
   583		shrVU(u, u, shift)
   584		r = u.norm()
   585	
   586		return q, r
   587	}
   588	
   589	// Length of x in bits. x must be normalized.
   590	func (x nat) bitLen() int {
   591		if i := len(x) - 1; i >= 0 {
   592			return i*_W + bitLen(x[i])
   593		}
   594		return 0
   595	}
   596	
   597	// MaxBase is the largest number base accepted for string conversions.
   598	const MaxBase = 'z' - 'a' + 10 + 1 // = hexValue('z') + 1
   599	
   600	func hexValue(ch rune) Word {
   601		d := int(MaxBase + 1) // illegal base
   602		switch {
   603		case '0' <= ch && ch <= '9':
   604			d = int(ch - '0')
   605		case 'a' <= ch && ch <= 'z':
   606			d = int(ch - 'a' + 10)
   607		case 'A' <= ch && ch <= 'Z':
   608			d = int(ch - 'A' + 10)
   609		}
   610		return Word(d)
   611	}
   612	
   613	// scan sets z to the natural number corresponding to the longest possible prefix
   614	// read from r representing an unsigned integer in a given conversion base.
   615	// It returns z, the actual conversion base used, and an error, if any. In the
   616	// error case, the value of z is undefined. The syntax follows the syntax of
   617	// unsigned integer literals in Go.
   618	//
   619	// The base argument must be 0 or a value from 2 through MaxBase. If the base
   620	// is 0, the string prefix determines the actual conversion base. A prefix of
   621	// ``0x'' or ``0X'' selects base 16; the ``0'' prefix selects base 8, and a
   622	// ``0b'' or ``0B'' prefix selects base 2. Otherwise the selected base is 10.
   623	//
   624	func (z nat) scan(r io.RuneScanner, base int) (nat, int, error) {
   625		// reject illegal bases
   626		if base < 0 || base == 1 || MaxBase < base {
   627			return z, 0, errors.New("illegal number base")
   628		}
   629	
   630		// one char look-ahead
   631		ch, _, err := r.ReadRune()
   632		if err != nil {
   633			return z, 0, err
   634		}
   635	
   636		// determine base if necessary
   637		b := Word(base)
   638		if base == 0 {
   639			b = 10
   640			if ch == '0' {
   641				switch ch, _, err = r.ReadRune(); err {
   642				case nil:
   643					b = 8
   644					switch ch {
   645					case 'x', 'X':
   646						b = 16
   647					case 'b', 'B':
   648						b = 2
   649					}
   650					if b == 2 || b == 16 {
   651						if ch, _, err = r.ReadRune(); err != nil {
   652							return z, 0, err
   653						}
   654					}
   655				case io.EOF:
   656					return z.make(0), 10, nil
   657				default:
   658					return z, 10, err
   659				}
   660			}
   661		}
   662	
   663		// convert string
   664		// - group as many digits d as possible together into a "super-digit" dd with "super-base" bb
   665		// - only when bb does not fit into a word anymore, do a full number mulAddWW using bb and dd
   666		z = z.make(0)
   667		bb := Word(1)
   668		dd := Word(0)
   669		for max := _M / b; ; {
   670			d := hexValue(ch)
   671			if d >= b {
   672				r.UnreadRune() // ch does not belong to number anymore
   673				break
   674			}
   675	
   676			if bb <= max {
   677				bb *= b
   678				dd = dd*b + d
   679			} else {
   680				// bb * b would overflow
   681				z = z.mulAddWW(z, bb, dd)
   682				bb = b
   683				dd = d
   684			}
   685	
   686			if ch, _, err = r.ReadRune(); err != nil {
   687				if err != io.EOF {
   688					return z, int(b), err
   689				}
   690				break
   691			}
   692		}
   693	
   694		switch {
   695		case bb > 1:
   696			// there was at least one mantissa digit
   697			z = z.mulAddWW(z, bb, dd)
   698		case base == 0 && b == 8:
   699			// there was only the octal prefix 0 (possibly followed by digits > 7);
   700			// return base 10, not 8
   701			return z, 10, nil
   702		case base != 0 || b != 8:
   703			// there was neither a mantissa digit nor the octal prefix 0
   704			return z, int(b), errors.New("syntax error scanning number")
   705		}
   706	
   707		return z.norm(), int(b), nil
   708	}
   709	
   710	// Character sets for string conversion.
   711	const (
   712		lowercaseDigits = "0123456789abcdefghijklmnopqrstuvwxyz"
   713		uppercaseDigits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
   714	)
   715	
   716	// decimalString returns a decimal representation of x.
   717	// It calls x.string with the charset "0123456789".
   718	func (x nat) decimalString() string {
   719		return x.string(lowercaseDigits[0:10])
   720	}
   721	
   722	// string converts x to a string using digits from a charset; a digit with
   723	// value d is represented by charset[d]. The conversion base is determined
   724	// by len(charset), which must be >= 2 and <= 256.
   725	func (x nat) string(charset string) string {
   726		b := Word(len(charset))
   727	
   728		// special cases
   729		switch {
   730		case b < 2 || MaxBase > 256:
   731			panic("illegal base")
   732		case len(x) == 0:
   733			return string(charset[0])
   734		}
   735	
   736		// allocate buffer for conversion
   737		i := int(float64(x.bitLen())/math.Log2(float64(b))) + 1 // off by one at most
   738		s := make([]byte, i)
   739	
   740		// convert power of two and non power of two bases separately
   741		if b == b&-b {
   742			// shift is base-b digit size in bits
   743			shift := uint(trailingZeroBits(b)) // shift > 0 because b >= 2
   744			mask := Word(1)<<shift - 1
   745			w := x[0]
   746			nbits := uint(_W) // number of unprocessed bits in w
   747	
   748			// convert less-significant words
   749			for k := 1; k < len(x); k++ {
   750				// convert full digits
   751				for nbits >= shift {
   752					i--
   753					s[i] = charset[w&mask]
   754					w >>= shift
   755					nbits -= shift
   756				}
   757	
   758				// convert any partial leading digit and advance to next word
   759				if nbits == 0 {
   760					// no partial digit remaining, just advance
   761					w = x[k]
   762					nbits = _W
   763				} else {
   764					// partial digit in current (k-1) and next (k) word
   765					w |= x[k] << nbits
   766					i--
   767					s[i] = charset[w&mask]
   768	
   769					// advance
   770					w = x[k] >> (shift - nbits)
   771					nbits = _W - (shift - nbits)
   772				}
   773			}
   774	
   775			// convert digits of most-significant word (omit leading zeros)
   776			for nbits >= 0 && w != 0 {
   777				i--
   778				s[i] = charset[w&mask]
   779				w >>= shift
   780				nbits -= shift
   781			}
   782	
   783		} else {
   784			// determine "big base"; i.e., the largest possible value bb
   785			// that is a power of base b and still fits into a Word
   786			// (as in 10^19 for 19 decimal digits in a 64bit Word)
   787			bb := b      // big base is b**ndigits
   788			ndigits := 1 // number of base b digits
   789			for max := Word(_M / b); bb <= max; bb *= b {
   790				ndigits++ // maximize ndigits where bb = b**ndigits, bb <= _M
   791			}
   792	
   793			// construct table of successive squares of bb*leafSize to use in subdivisions
   794			// result (table != nil) <=> (len(x) > leafSize > 0)
   795			table := divisors(len(x), b, ndigits, bb)
   796	
   797			// preserve x, create local copy for use by convertWords
   798			q := nat(nil).set(x)
   799	
   800			// convert q to string s in base b
   801			q.convertWords(s, charset, b, ndigits, bb, table)
   802	
   803			// strip leading zeros
   804			// (x != 0; thus s must contain at least one non-zero digit
   805			// and the loop will terminate)
   806			i = 0
   807			for zero := charset[0]; s[i] == zero; {
   808				i++
   809			}
   810		}
   811	
   812		return string(s[i:])
   813	}
   814	
   815	// Convert words of q to base b digits in s. If q is large, it is recursively "split in half"
   816	// by nat/nat division using tabulated divisors. Otherwise, it is converted iteratively using
   817	// repeated nat/Word divison.
   818	//
   819	// The iterative method processes n Words by n divW() calls, each of which visits every Word in the 
   820	// incrementally shortened q for a total of n + (n-1) + (n-2) ... + 2 + 1, or n(n+1)/2 divW()'s. 
   821	// Recursive conversion divides q by its approximate square root, yielding two parts, each half 
   822	// the size of q. Using the iterative method on both halves means 2 * (n/2)(n/2 + 1)/2 divW()'s
   823	// plus the expensive long div(). Asymptotically, the ratio is favorable at 1/2 the divW()'s, and
   824	// is made better by splitting the subblocks recursively. Best is to split blocks until one more 
   825	// split would take longer (because of the nat/nat div()) than the twice as many divW()'s of the 
   826	// iterative approach. This threshold is represented by leafSize. Benchmarking of leafSize in the 
   827	// range 2..64 shows that values of 8 and 16 work well, with a 4x speedup at medium lengths and 
   828	// ~30x for 20000 digits. Use nat_test.go's BenchmarkLeafSize tests to optimize leafSize for 
   829	// specific hardware.
   830	//
   831	func (q nat) convertWords(s []byte, charset string, b Word, ndigits int, bb Word, table []divisor) {
   832		// split larger blocks recursively
   833		if table != nil {
   834			// len(q) > leafSize > 0
   835			var r nat
   836			index := len(table) - 1
   837			for len(q) > leafSize {
   838				// find divisor close to sqrt(q) if possible, but in any case < q
   839				maxLength := q.bitLen()     // ~= log2 q, or at of least largest possible q of this bit length
   840				minLength := maxLength >> 1 // ~= log2 sqrt(q)
   841				for index > 0 && table[index-1].nbits > minLength {
   842					index-- // desired
   843				}
   844				if table[index].nbits >= maxLength && table[index].bbb.cmp(q) >= 0 {
   845					index--
   846					if index < 0 {
   847						panic("internal inconsistency")
   848					}
   849				}
   850	
   851				// split q into the two digit number (q'*bbb + r) to form independent subblocks
   852				q, r = q.div(r, q, table[index].bbb)
   853	
   854				// convert subblocks and collect results in s[:h] and s[h:]
   855				h := len(s) - table[index].ndigits
   856				r.convertWords(s[h:], charset, b, ndigits, bb, table[0:index])
   857				s = s[:h] // == q.convertWords(s, charset, b, ndigits, bb, table[0:index+1])
   858			}
   859		}
   860	
   861		// having split any large blocks now process the remaining (small) block iteratively
   862		i := len(s)
   863		var r Word
   864		if b == 10 {
   865			// hard-coding for 10 here speeds this up by 1.25x (allows for / and % by constants)
   866			for len(q) > 0 {
   867				// extract least significant, base bb "digit"
   868				q, r = q.divW(q, bb)
   869				for j := 0; j < ndigits && i > 0; j++ {
   870					i--
   871					// avoid % computation since r%10 == r - int(r/10)*10;
   872					// this appears to be faster for BenchmarkString10000Base10
   873					// and smaller strings (but a bit slower for larger ones)
   874					t := r / 10
   875					s[i] = charset[r-t<<3-t-t] // TODO(gri) replace w/ t*10 once compiler produces better code
   876					r = t
   877				}
   878			}
   879		} else {
   880			for len(q) > 0 {
   881				// extract least significant, base bb "digit"
   882				q, r = q.divW(q, bb)
   883				for j := 0; j < ndigits && i > 0; j++ {
   884					i--
   885					s[i] = charset[r%b]
   886					r /= b
   887				}
   888			}
   889		}
   890	
   891		// prepend high-order zeroes
   892		zero := charset[0]
   893		for i > 0 { // while need more leading zeroes
   894			i--
   895			s[i] = zero
   896		}
   897	}
   898	
   899	// Split blocks greater than leafSize Words (or set to 0 to disable recursive conversion)
   900	// Benchmark and configure leafSize using: go test -bench="Leaf"
   901	//   8 and 16 effective on 3.0 GHz Xeon "Clovertown" CPU (128 byte cache lines)
   902	//   8 and 16 effective on 2.66 GHz Core 2 Duo "Penryn" CPU
   903	var leafSize int = 8 // number of Word-size binary values treat as a monolithic block
   904	
   905	type divisor struct {
   906		bbb     nat // divisor
   907		nbits   int // bit length of divisor (discounting leading zeroes) ~= log2(bbb)
   908		ndigits int // digit length of divisor in terms of output base digits
   909	}
   910	
   911	var cacheBase10 [64]divisor // cached divisors for base 10
   912	var cacheLock sync.Mutex    // protects cacheBase10
   913	
   914	// expWW computes x**y
   915	func (z nat) expWW(x, y Word) nat {
   916		return z.expNN(nat(nil).setWord(x), nat(nil).setWord(y), nil)
   917	}
   918	
   919	// construct table of powers of bb*leafSize to use in subdivisions
   920	func divisors(m int, b Word, ndigits int, bb Word) []divisor {
   921		// only compute table when recursive conversion is enabled and x is large
   922		if leafSize == 0 || m <= leafSize {
   923			return nil
   924		}
   925	
   926		// determine k where (bb**leafSize)**(2**k) >= sqrt(x)
   927		k := 1
   928		for words := leafSize; words < m>>1 && k < len(cacheBase10); words <<= 1 {
   929			k++
   930		}
   931	
   932		// create new table of divisors or extend and reuse existing table as appropriate
   933		var table []divisor
   934		var cached bool
   935		switch b {
   936		case 10:
   937			table = cacheBase10[0:k] // reuse old table for this conversion
   938			cached = true
   939		default:
   940			table = make([]divisor, k) // new table for this conversion
   941		}
   942	
   943		// extend table
   944		if table[k-1].ndigits == 0 {
   945			if cached {
   946				cacheLock.Lock() // begin critical section
   947			}
   948	
   949			// add new entries as needed
   950			var larger nat
   951			for i := 0; i < k; i++ {
   952				if table[i].ndigits == 0 {
   953					if i == 0 {
   954						table[i].bbb = nat(nil).expWW(bb, Word(leafSize))
   955						table[i].ndigits = ndigits * leafSize
   956					} else {
   957						table[i].bbb = nat(nil).mul(table[i-1].bbb, table[i-1].bbb)
   958						table[i].ndigits = 2 * table[i-1].ndigits
   959					}
   960	
   961					// optimization: exploit aggregated extra bits in macro blocks
   962					larger = nat(nil).set(table[i].bbb)
   963					for mulAddVWW(larger, larger, b, 0) == 0 {
   964						table[i].bbb = table[i].bbb.set(larger)
   965						table[i].ndigits++
   966					}
   967	
   968					table[i].nbits = table[i].bbb.bitLen()
   969				}
   970			}
   971	
   972			if cached {
   973				cacheLock.Unlock() // end critical section
   974			}
   975		}
   976	
   977		return table
   978	}
   979	
   980	const deBruijn32 = 0x077CB531
   981	
   982	var deBruijn32Lookup = []byte{
   983		0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
   984		31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9,
   985	}
   986	
   987	const deBruijn64 = 0x03f79d71b4ca8b09
   988	
   989	var deBruijn64Lookup = []byte{
   990		0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
   991		62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
   992		63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
   993		54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
   994	}
   995	
   996	// trailingZeroBits returns the number of consecutive zero bits on the right
   997	// side of the given Word.
   998	// See Knuth, volume 4, section 7.3.1
   999	func trailingZeroBits(x Word) int {
  1000		// x & -x leaves only the right-most bit set in the word. Let k be the
  1001		// index of that bit. Since only a single bit is set, the value is two
  1002		// to the power of k. Multiplying by a power of two is equivalent to
  1003		// left shifting, in this case by k bits.  The de Bruijn constant is
  1004		// such that all six bit, consecutive substrings are distinct.
  1005		// Therefore, if we have a left shifted version of this constant we can
  1006		// find by how many bits it was shifted by looking at which six bit
  1007		// substring ended up at the top of the word.
  1008		switch _W {
  1009		case 32:
  1010			return int(deBruijn32Lookup[((x&-x)*deBruijn32)>>27])
  1011		case 64:
  1012			return int(deBruijn64Lookup[((x&-x)*(deBruijn64&_M))>>58])
  1013		default:
  1014			panic("Unknown word size")
  1015		}
  1016	
  1017		return 0
  1018	}
  1019	
  1020	// z = x << s
  1021	func (z nat) shl(x nat, s uint) nat {
  1022		m := len(x)
  1023		if m == 0 {
  1024			return z.make(0)
  1025		}
  1026		// m > 0
  1027	
  1028		n := m + int(s/_W)
  1029		z = z.make(n + 1)
  1030		z[n] = shlVU(z[n-m:n], x, s%_W)
  1031		z[0 : n-m].clear()
  1032	
  1033		return z.norm()
  1034	}
  1035	
  1036	// z = x >> s
  1037	func (z nat) shr(x nat, s uint) nat {
  1038		m := len(x)
  1039		n := m - int(s/_W)
  1040		if n <= 0 {
  1041			return z.make(0)
  1042		}
  1043		// n > 0
  1044	
  1045		z = z.make(n)
  1046		shrVU(z, x[m-n:], s%_W)
  1047	
  1048		return z.norm()
  1049	}
  1050	
  1051	func (z nat) setBit(x nat, i uint, b uint) nat {
  1052		j := int(i / _W)
  1053		m := Word(1) << (i % _W)
  1054		n := len(x)
  1055		switch b {
  1056		case 0:
  1057			z = z.make(n)
  1058			copy(z, x)
  1059			if j >= n {
  1060				// no need to grow
  1061				return z
  1062			}
  1063			z[j] &^= m
  1064			return z.norm()
  1065		case 1:
  1066			if j >= n {
  1067				z = z.make(j + 1)
  1068				z[n:].clear()
  1069			} else {
  1070				z = z.make(n)
  1071			}
  1072			copy(z, x)
  1073			z[j] |= m
  1074			// no need to normalize
  1075			return z
  1076		}
  1077		panic("set bit is not 0 or 1")
  1078	}
  1079	
  1080	func (z nat) bit(i uint) uint {
  1081		j := int(i / _W)
  1082		if j >= len(z) {
  1083			return 0
  1084		}
  1085		return uint(z[j] >> (i % _W) & 1)
  1086	}
  1087	
  1088	func (z nat) and(x, y nat) nat {
  1089		m := len(x)
  1090		n := len(y)
  1091		if m > n {
  1092			m = n
  1093		}
  1094		// m <= n
  1095	
  1096		z = z.make(m)
  1097		for i := 0; i < m; i++ {
  1098			z[i] = x[i] & y[i]
  1099		}
  1100	
  1101		return z.norm()
  1102	}
  1103	
  1104	func (z nat) andNot(x, y nat) nat {
  1105		m := len(x)
  1106		n := len(y)
  1107		if n > m {
  1108			n = m
  1109		}
  1110		// m >= n
  1111	
  1112		z = z.make(m)
  1113		for i := 0; i < n; i++ {
  1114			z[i] = x[i] &^ y[i]
  1115		}
  1116		copy(z[n:m], x[n:m])
  1117	
  1118		return z.norm()
  1119	}
  1120	
  1121	func (z nat) or(x, y nat) nat {
  1122		m := len(x)
  1123		n := len(y)
  1124		s := x
  1125		if m < n {
  1126			n, m = m, n
  1127			s = y
  1128		}
  1129		// m >= n
  1130	
  1131		z = z.make(m)
  1132		for i := 0; i < n; i++ {
  1133			z[i] = x[i] | y[i]
  1134		}
  1135		copy(z[n:m], s[n:m])
  1136	
  1137		return z.norm()
  1138	}
  1139	
  1140	func (z nat) xor(x, y nat) nat {
  1141		m := len(x)
  1142		n := len(y)
  1143		s := x
  1144		if m < n {
  1145			n, m = m, n
  1146			s = y
  1147		}
  1148		// m >= n
  1149	
  1150		z = z.make(m)
  1151		for i := 0; i < n; i++ {
  1152			z[i] = x[i] ^ y[i]
  1153		}
  1154		copy(z[n:m], s[n:m])
  1155	
  1156		return z.norm()
  1157	}
  1158	
  1159	// greaterThan returns true iff (x1<<_W + x2) > (y1<<_W + y2)
  1160	func greaterThan(x1, x2, y1, y2 Word) bool {
  1161		return x1 > y1 || x1 == y1 && x2 > y2
  1162	}
  1163	
  1164	// modW returns x % d.
  1165	func (x nat) modW(d Word) (r Word) {
  1166		// TODO(agl): we don't actually need to store the q value.
  1167		var q nat
  1168		q = q.make(len(x))
  1169		return divWVW(q, 0, x, d)
  1170	}
  1171	
  1172	// powersOfTwoDecompose finds q and k with x = q * 1<<k and q is odd, or q and k are 0.
  1173	func (x nat) powersOfTwoDecompose() (q nat, k int) {
  1174		if len(x) == 0 {
  1175			return x, 0
  1176		}
  1177	
  1178		// One of the words must be non-zero by definition,
  1179		// so this loop will terminate with i < len(x), and
  1180		// i is the number of 0 words.
  1181		i := 0
  1182		for x[i] == 0 {
  1183			i++
  1184		}
  1185		n := trailingZeroBits(x[i]) // x[i] != 0
  1186	
  1187		q = make(nat, len(x)-i)
  1188		shrVU(q, x[i:], uint(n))
  1189	
  1190		q = q.norm()
  1191		k = i*_W + n
  1192		return
  1193	}
  1194	
  1195	// random creates a random integer in [0..limit), using the space in z if
  1196	// possible. n is the bit length of limit.
  1197	func (z nat) random(rand *rand.Rand, limit nat, n int) nat {
  1198		if alias(z, limit) {
  1199			z = nil // z is an alias for limit - cannot reuse
  1200		}
  1201		z = z.make(len(limit))
  1202	
  1203		bitLengthOfMSW := uint(n % _W)
  1204		if bitLengthOfMSW == 0 {
  1205			bitLengthOfMSW = _W
  1206		}
  1207		mask := Word((1 << bitLengthOfMSW) - 1)
  1208	
  1209		for {
  1210			for i := range z {
  1211				switch _W {
  1212				case 32:
  1213					z[i] = Word(rand.Uint32())
  1214				case 64:
  1215					z[i] = Word(rand.Uint32()) | Word(rand.Uint32())<<32
  1216				}
  1217			}
  1218	
  1219			z[len(limit)-1] &= mask
  1220	
  1221			if z.cmp(limit) < 0 {
  1222				break
  1223			}
  1224		}
  1225	
  1226		return z.norm()
  1227	}
  1228	
  1229	// If m != nil, expNN calculates x**y mod m. Otherwise it calculates x**y. It
  1230	// reuses the storage of z if possible.
  1231	func (z nat) expNN(x, y, m nat) nat {
  1232		if alias(z, x) || alias(z, y) {
  1233			// We cannot allow in place modification of x or y.
  1234			z = nil
  1235		}
  1236	
  1237		if len(y) == 0 {
  1238			z = z.make(1)
  1239			z[0] = 1
  1240			return z
  1241		}
  1242	
  1243		if m != nil {
  1244			// We likely end up being as long as the modulus.
  1245			z = z.make(len(m))
  1246		}
  1247		z = z.set(x)
  1248		v := y[len(y)-1]
  1249		// It's invalid for the most significant word to be zero, therefore we
  1250		// will find a one bit.
  1251		shift := leadingZeros(v) + 1
  1252		v <<= shift
  1253		var q nat
  1254	
  1255		const mask = 1 << (_W - 1)
  1256	
  1257		// We walk through the bits of the exponent one by one. Each time we
  1258		// see a bit, we square, thus doubling the power. If the bit is a one,
  1259		// we also multiply by x, thus adding one to the power.
  1260	
  1261		w := _W - int(shift)
  1262		for j := 0; j < w; j++ {
  1263			z = z.mul(z, z)
  1264	
  1265			if v&mask != 0 {
  1266				z = z.mul(z, x)
  1267			}
  1268	
  1269			if m != nil {
  1270				q, z = q.div(z, z, m)
  1271			}
  1272	
  1273			v <<= 1
  1274		}
  1275	
  1276		for i := len(y) - 2; i >= 0; i-- {
  1277			v = y[i]
  1278	
  1279			for j := 0; j < _W; j++ {
  1280				z = z.mul(z, z)
  1281	
  1282				if v&mask != 0 {
  1283					z = z.mul(z, x)
  1284				}
  1285	
  1286				if m != nil {
  1287					q, z = q.div(z, z, m)
  1288				}
  1289	
  1290				v <<= 1
  1291			}
  1292		}
  1293	
  1294		return z.norm()
  1295	}
  1296	
  1297	// probablyPrime performs reps Miller-Rabin tests to check whether n is prime.
  1298	// If it returns true, n is prime with probability 1 - 1/4^reps.
  1299	// If it returns false, n is not prime.
  1300	func (n nat) probablyPrime(reps int) bool {
  1301		if len(n) == 0 {
  1302			return false
  1303		}
  1304	
  1305		if len(n) == 1 {
  1306			if n[0] < 2 {
  1307				return false
  1308			}
  1309	
  1310			if n[0]%2 == 0 {
  1311				return n[0] == 2
  1312			}
  1313	
  1314			// We have to exclude these cases because we reject all
  1315			// multiples of these numbers below.
  1316			switch n[0] {
  1317			case 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53:
  1318				return true
  1319			}
  1320		}
  1321	
  1322		const primesProduct32 = 0xC0CFD797         // Π {p ∈ primes, 2 < p <= 29}
  1323		const primesProduct64 = 0xE221F97C30E94E1D // Π {p ∈ primes, 2 < p <= 53}
  1324	
  1325		var r Word
  1326		switch _W {
  1327		case 32:
  1328			r = n.modW(primesProduct32)
  1329		case 64:
  1330			r = n.modW(primesProduct64 & _M)
  1331		default:
  1332			panic("Unknown word size")
  1333		}
  1334	
  1335		if r%3 == 0 || r%5 == 0 || r%7 == 0 || r%11 == 0 ||
  1336			r%13 == 0 || r%17 == 0 || r%19 == 0 || r%23 == 0 || r%29 == 0 {
  1337			return false
  1338		}
  1339	
  1340		if _W == 64 && (r%31 == 0 || r%37 == 0 || r%41 == 0 ||
  1341			r%43 == 0 || r%47 == 0 || r%53 == 0) {
  1342			return false
  1343		}
  1344	
  1345		nm1 := nat(nil).sub(n, natOne)
  1346		// 1<<k * q = nm1;
  1347		q, k := nm1.powersOfTwoDecompose()
  1348	
  1349		nm3 := nat(nil).sub(nm1, natTwo)
  1350		rand := rand.New(rand.NewSource(int64(n[0])))
  1351	
  1352		var x, y, quotient nat
  1353		nm3Len := nm3.bitLen()
  1354	
  1355	NextRandom:
  1356		for i := 0; i < reps; i++ {
  1357			x = x.random(rand, nm3, nm3Len)
  1358			x = x.add(x, natTwo)
  1359			y = y.expNN(x, q, n)
  1360			if y.cmp(natOne) == 0 || y.cmp(nm1) == 0 {
  1361				continue
  1362			}
  1363			for j := 1; j < k; j++ {
  1364				y = y.mul(y, y)
  1365				quotient, y = quotient.div(y, y, n)
  1366				if y.cmp(nm1) == 0 {
  1367					continue NextRandom
  1368				}
  1369				if y.cmp(natOne) == 0 {
  1370					return false
  1371				}
  1372			}
  1373			return false
  1374		}
  1375	
  1376		return true
  1377	}
  1378	
  1379	// bytes writes the value of z into buf using big-endian encoding.
  1380	// len(buf) must be >= len(z)*_S. The value of z is encoded in the
  1381	// slice buf[i:]. The number i of unused bytes at the beginning of
  1382	// buf is returned as result.
  1383	func (z nat) bytes(buf []byte) (i int) {
  1384		i = len(buf)
  1385		for _, d := range z {
  1386			for j := 0; j < _S; j++ {
  1387				i--
  1388				buf[i] = byte(d)
  1389				d >>= 8
  1390			}
  1391		}
  1392	
  1393		for i < len(buf) && buf[i] == 0 {
  1394			i++
  1395		}
  1396	
  1397		return
  1398	}
  1399	
  1400	// setBytes interprets buf as the bytes of a big-endian unsigned
  1401	// integer, sets z to that value, and returns z.
  1402	func (z nat) setBytes(buf []byte) nat {
  1403		z = z.make((len(buf) + _S - 1) / _S)
  1404	
  1405		k := 0
  1406		s := uint(0)
  1407		var d Word
  1408		for i := len(buf); i > 0; i-- {
  1409			d |= Word(buf[i-1]) << s
  1410			if s += 8; s == _S*8 {
  1411				z[k] = d
  1412				k++
  1413				s = 0
  1414				d = 0
  1415			}
  1416		}
  1417		if k < len(z) {
  1418			z[k] = d
  1419		}
  1420	
  1421		return z.norm()
  1422	}
previous page start next page