Source file src/pkg/math/big/nat.go
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Package big implements multi-precision arithmetic (big numbers).
6 // The following numeric types are supported:
7 //
8 // - Int signed integers
9 // - Rat rational numbers
10 //
11 // Methods are typically of the form:
12 //
13 // func (z *Int) Op(x, y *Int) *Int (similar for *Rat)
14 //
15 // and implement operations z = x Op y with the result as receiver; if it
16 // is one of the operands it may be overwritten (and its memory reused).
17 // To enable chaining of operations, the result is also returned. Methods
18 // returning a result other than *Int or *Rat take one of the operands as
19 // the receiver.
20 //
21 package big
22
23 // This file contains operations on unsigned multi-precision integers.
24 // These are the building blocks for the operations on signed integers
25 // and rationals.
26
27 import (
28 "errors"
29 "io"
30 "math"
31 "math/rand"
32 "sync"
33 )
34
35 // An unsigned integer x of the form
36 //
37 // x = x[n-1]*_B^(n-1) + x[n-2]*_B^(n-2) + ... + x[1]*_B + x[0]
38 //
39 // with 0 <= x[i] < _B and 0 <= i < n is stored in a slice of length n,
40 // with the digits x[i] as the slice elements.
41 //
42 // A number is normalized if the slice contains no leading 0 digits.
43 // During arithmetic operations, denormalized values may occur but are
44 // always normalized before returning the final result. The normalized
45 // representation of 0 is the empty or nil slice (length = 0).
46 //
47 type nat []Word
48
49 var (
50 natOne = nat{1}
51 natTwo = nat{2}
52 natTen = nat{10}
53 )
54
55 func (z nat) clear() {
56 for i := range z {
57 z[i] = 0
58 }
59 }
60
61 func (z nat) norm() nat {
62 i := len(z)
63 for i > 0 && z[i-1] == 0 {
64 i--
65 }
66 return z[0:i]
67 }
68
69 func (z nat) make(n int) nat {
70 if n <= cap(z) {
71 return z[0:n] // reuse z
72 }
73 // Choosing a good value for e has significant performance impact
74 // because it increases the chance that a value can be reused.
75 const e = 4 // extra capacity
76 return make(nat, n, n+e)
77 }
78
79 func (z nat) setWord(x Word) nat {
80 if x == 0 {
81 return z.make(0)
82 }
83 z = z.make(1)
84 z[0] = x
85 return z
86 }
87
88 func (z nat) setUint64(x uint64) nat {
89 // single-digit values
90 if w := Word(x); uint64(w) == x {
91 return z.setWord(w)
92 }
93
94 // compute number of words n required to represent x
95 n := 0
96 for t := x; t > 0; t >>= _W {
97 n++
98 }
99
100 // split x into n words
101 z = z.make(n)
102 for i := range z {
103 z[i] = Word(x & _M)
104 x >>= _W
105 }
106
107 return z
108 }
109
110 func (z nat) set(x nat) nat {
111 z = z.make(len(x))
112 copy(z, x)
113 return z
114 }
115
116 func (z nat) add(x, y nat) nat {
117 m := len(x)
118 n := len(y)
119
120 switch {
121 case m < n:
122 return z.add(y, x)
123 case m == 0:
124 // n == 0 because m >= n; result is 0
125 return z.make(0)
126 case n == 0:
127 // result is x
128 return z.set(x)
129 }
130 // m > 0
131
132 z = z.make(m + 1)
133 c := addVV(z[0:n], x, y)
134 if m > n {
135 c = addVW(z[n:m], x[n:], c)
136 }
137 z[m] = c
138
139 return z.norm()
140 }
141
142 func (z nat) sub(x, y nat) nat {
143 m := len(x)
144 n := len(y)
145
146 switch {
147 case m < n:
148 panic("underflow")
149 case m == 0:
150 // n == 0 because m >= n; result is 0
151 return z.make(0)
152 case n == 0:
153 // result is x
154 return z.set(x)
155 }
156 // m > 0
157
158 z = z.make(m)
159 c := subVV(z[0:n], x, y)
160 if m > n {
161 c = subVW(z[n:], x[n:], c)
162 }
163 if c != 0 {
164 panic("underflow")
165 }
166
167 return z.norm()
168 }
169
170 func (x nat) cmp(y nat) (r int) {
171 m := len(x)
172 n := len(y)
173 if m != n || m == 0 {
174 switch {
175 case m < n:
176 r = -1
177 case m > n:
178 r = 1
179 }
180 return
181 }
182
183 i := m - 1
184 for i > 0 && x[i] == y[i] {
185 i--
186 }
187
188 switch {
189 case x[i] < y[i]:
190 r = -1
191 case x[i] > y[i]:
192 r = 1
193 }
194 return
195 }
196
197 func (z nat) mulAddWW(x nat, y, r Word) nat {
198 m := len(x)
199 if m == 0 || y == 0 {
200 return z.setWord(r) // result is r
201 }
202 // m > 0
203
204 z = z.make(m + 1)
205 z[m] = mulAddVWW(z[0:m], x, y, r)
206
207 return z.norm()
208 }
209
210 // basicMul multiplies x and y and leaves the result in z.
211 // The (non-normalized) result is placed in z[0 : len(x) + len(y)].
212 func basicMul(z, x, y nat) {
213 z[0 : len(x)+len(y)].clear() // initialize z
214 for i, d := range y {
215 if d != 0 {
216 z[len(x)+i] = addMulVVW(z[i:i+len(x)], x, d)
217 }
218 }
219 }
220
221 // Fast version of z[0:n+n>>1].add(z[0:n+n>>1], x[0:n]) w/o bounds checks.
222 // Factored out for readability - do not use outside karatsuba.
223 func karatsubaAdd(z, x nat, n int) {
224 if c := addVV(z[0:n], z, x); c != 0 {
225 addVW(z[n:n+n>>1], z[n:], c)
226 }
227 }
228
229 // Like karatsubaAdd, but does subtract.
230 func karatsubaSub(z, x nat, n int) {
231 if c := subVV(z[0:n], z, x); c != 0 {
232 subVW(z[n:n+n>>1], z[n:], c)
233 }
234 }
235
236 // Operands that are shorter than karatsubaThreshold are multiplied using
237 // "grade school" multiplication; for longer operands the Karatsuba algorithm
238 // is used.
239 var karatsubaThreshold int = 32 // computed by calibrate.go
240
241 // karatsuba multiplies x and y and leaves the result in z.
242 // Both x and y must have the same length n and n must be a
243 // power of 2. The result vector z must have len(z) >= 6*n.
244 // The (non-normalized) result is placed in z[0 : 2*n].
245 func karatsuba(z, x, y nat) {
246 n := len(y)
247
248 // Switch to basic multiplication if numbers are odd or small.
249 // (n is always even if karatsubaThreshold is even, but be
250 // conservative)
251 if n&1 != 0 || n < karatsubaThreshold || n < 2 {
252 basicMul(z, x, y)
253 return
254 }
255 // n&1 == 0 && n >= karatsubaThreshold && n >= 2
256
257 // Karatsuba multiplication is based on the observation that
258 // for two numbers x and y with:
259 //
260 // x = x1*b + x0
261 // y = y1*b + y0
262 //
263 // the product x*y can be obtained with 3 products z2, z1, z0
264 // instead of 4:
265 //
266 // x*y = x1*y1*b*b + (x1*y0 + x0*y1)*b + x0*y0
267 // = z2*b*b + z1*b + z0
268 //
269 // with:
270 //
271 // xd = x1 - x0
272 // yd = y0 - y1
273 //
274 // z1 = xd*yd + z1 + z0
275 // = (x1-x0)*(y0 - y1) + z1 + z0
276 // = x1*y0 - x1*y1 - x0*y0 + x0*y1 + z1 + z0
277 // = x1*y0 - z1 - z0 + x0*y1 + z1 + z0
278 // = x1*y0 + x0*y1
279
280 // split x, y into "digits"
281 n2 := n >> 1 // n2 >= 1
282 x1, x0 := x[n2:], x[0:n2] // x = x1*b + y0
283 y1, y0 := y[n2:], y[0:n2] // y = y1*b + y0
284
285 // z is used for the result and temporary storage:
286 //
287 // 6*n 5*n 4*n 3*n 2*n 1*n 0*n
288 // z = [z2 copy|z0 copy| xd*yd | yd:xd | x1*y1 | x0*y0 ]
289 //
290 // For each recursive call of karatsuba, an unused slice of
291 // z is passed in that has (at least) half the length of the
292 // caller's z.
293
294 // compute z0 and z2 with the result "in place" in z
295 karatsuba(z, x0, y0) // z0 = x0*y0
296 karatsuba(z[n:], x1, y1) // z2 = x1*y1
297
298 // compute xd (or the negative value if underflow occurs)
299 s := 1 // sign of product xd*yd
300 xd := z[2*n : 2*n+n2]
301 if subVV(xd, x1, x0) != 0 { // x1-x0
302 s = -s
303 subVV(xd, x0, x1) // x0-x1
304 }
305
306 // compute yd (or the negative value if underflow occurs)
307 yd := z[2*n+n2 : 3*n]
308 if subVV(yd, y0, y1) != 0 { // y0-y1
309 s = -s
310 subVV(yd, y1, y0) // y1-y0
311 }
312
313 // p = (x1-x0)*(y0-y1) == x1*y0 - x1*y1 - x0*y0 + x0*y1 for s > 0
314 // p = (x0-x1)*(y0-y1) == x0*y0 - x0*y1 - x1*y0 + x1*y1 for s < 0
315 p := z[n*3:]
316 karatsuba(p, xd, yd)
317
318 // save original z2:z0
319 // (ok to use upper half of z since we're done recursing)
320 r := z[n*4:]
321 copy(r, z)
322
323 // add up all partial products
324 //
325 // 2*n n 0
326 // z = [ z2 | z0 ]
327 // + [ z0 ]
328 // + [ z2 ]
329 // + [ p ]
330 //
331 karatsubaAdd(z[n2:], r, n)
332 karatsubaAdd(z[n2:], r[n:], n)
333 if s > 0 {
334 karatsubaAdd(z[n2:], p, n)
335 } else {
336 karatsubaSub(z[n2:], p, n)
337 }
338 }
339
340 // alias returns true if x and y share the same base array.
341 func alias(x, y nat) bool {
342 return cap(x) > 0 && cap(y) > 0 && &x[0:cap(x)][cap(x)-1] == &y[0:cap(y)][cap(y)-1]
343 }
344
345 // addAt implements z += x*(1<<(_W*i)); z must be long enough.
346 // (we don't use nat.add because we need z to stay the same
347 // slice, and we don't need to normalize z after each addition)
348 func addAt(z, x nat, i int) {
349 if n := len(x); n > 0 {
350 if c := addVV(z[i:i+n], z[i:], x); c != 0 {
351 j := i + n
352 if j < len(z) {
353 addVW(z[j:], z[j:], c)
354 }
355 }
356 }
357 }
358
359 func max(x, y int) int {
360 if x > y {
361 return x
362 }
363 return y
364 }
365
366 // karatsubaLen computes an approximation to the maximum k <= n such that
367 // k = p<<i for a number p <= karatsubaThreshold and an i >= 0. Thus, the
368 // result is the largest number that can be divided repeatedly by 2 before
369 // becoming about the value of karatsubaThreshold.
370 func karatsubaLen(n int) int {
371 i := uint(0)
372 for n > karatsubaThreshold {
373 n >>= 1
374 i++
375 }
376 return n << i
377 }
378
379 func (z nat) mul(x, y nat) nat {
380 m := len(x)
381 n := len(y)
382
383 switch {
384 case m < n:
385 return z.mul(y, x)
386 case m == 0 || n == 0:
387 return z.make(0)
388 case n == 1:
389 return z.mulAddWW(x, y[0], 0)
390 }
391 // m >= n > 1
392
393 // determine if z can be reused
394 if alias(z, x) || alias(z, y) {
395 z = nil // z is an alias for x or y - cannot reuse
396 }
397
398 // use basic multiplication if the numbers are small
399 if n < karatsubaThreshold || n < 2 {
400 z = z.make(m + n)
401 basicMul(z, x, y)
402 return z.norm()
403 }
404 // m >= n && n >= karatsubaThreshold && n >= 2
405
406 // determine Karatsuba length k such that
407 //
408 // x = x1*b + x0
409 // y = y1*b + y0 (and k <= len(y), which implies k <= len(x))
410 // b = 1<<(_W*k) ("base" of digits xi, yi)
411 //
412 k := karatsubaLen(n)
413 // k <= n
414
415 // multiply x0 and y0 via Karatsuba
416 x0 := x[0:k] // x0 is not normalized
417 y0 := y[0:k] // y0 is not normalized
418 z = z.make(max(6*k, m+n)) // enough space for karatsuba of x0*y0 and full result of x*y
419 karatsuba(z, x0, y0)
420 z = z[0 : m+n] // z has final length but may be incomplete, upper portion is garbage
421
422 // If x1 and/or y1 are not 0, add missing terms to z explicitly:
423 //
424 // m+n 2*k 0
425 // z = [ ... | x0*y0 ]
426 // + [ x1*y1 ]
427 // + [ x1*y0 ]
428 // + [ x0*y1 ]
429 //
430 if k < n || m != n {
431 x1 := x[k:] // x1 is normalized because x is
432 y1 := y[k:] // y1 is normalized because y is
433 var t nat
434 t = t.mul(x1, y1)
435 copy(z[2*k:], t)
436 z[2*k+len(t):].clear() // upper portion of z is garbage
437 t = t.mul(x1, y0.norm())
438 addAt(z, t, k)
439 t = t.mul(x0.norm(), y1)
440 addAt(z, t, k)
441 }
442
443 return z.norm()
444 }
445
446 // mulRange computes the product of all the unsigned integers in the
447 // range [a, b] inclusively. If a > b (empty range), the result is 1.
448 func (z nat) mulRange(a, b uint64) nat {
449 switch {
450 case a == 0:
451 // cut long ranges short (optimization)
452 return z.setUint64(0)
453 case a > b:
454 return z.setUint64(1)
455 case a == b:
456 return z.setUint64(a)
457 case a+1 == b:
458 return z.mul(nat(nil).setUint64(a), nat(nil).setUint64(b))
459 }
460 m := (a + b) / 2
461 return z.mul(nat(nil).mulRange(a, m), nat(nil).mulRange(m+1, b))
462 }
463
464 // q = (x-r)/y, with 0 <= r < y
465 func (z nat) divW(x nat, y Word) (q nat, r Word) {
466 m := len(x)
467 switch {
468 case y == 0:
469 panic("division by zero")
470 case y == 1:
471 q = z.set(x) // result is x
472 return
473 case m == 0:
474 q = z.make(0) // result is 0
475 return
476 }
477 // m > 0
478 z = z.make(m)
479 r = divWVW(z, 0, x, y)
480 q = z.norm()
481 return
482 }
483
484 func (z nat) div(z2, u, v nat) (q, r nat) {
485 if len(v) == 0 {
486 panic("division by zero")
487 }
488
489 if u.cmp(v) < 0 {
490 q = z.make(0)
491 r = z2.set(u)
492 return
493 }
494
495 if len(v) == 1 {
496 var rprime Word
497 q, rprime = z.divW(u, v[0])
498 if rprime > 0 {
499 r = z2.make(1)
500 r[0] = rprime
501 } else {
502 r = z2.make(0)
503 }
504 return
505 }
506
507 q, r = z.divLarge(z2, u, v)
508 return
509 }
510
511 // q = (uIn-r)/v, with 0 <= r < y
512 // Uses z as storage for q, and u as storage for r if possible.
513 // See Knuth, Volume 2, section 4.3.1, Algorithm D.
514 // Preconditions:
515 // len(v) >= 2
516 // len(uIn) >= len(v)
517 func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
518 n := len(v)
519 m := len(uIn) - n
520
521 // determine if z can be reused
522 // TODO(gri) should find a better solution - this if statement
523 // is very costly (see e.g. time pidigits -s -n 10000)
524 if alias(z, uIn) || alias(z, v) {
525 z = nil // z is an alias for uIn or v - cannot reuse
526 }
527 q = z.make(m + 1)
528
529 qhatv := make(nat, n+1)
530 if alias(u, uIn) || alias(u, v) {
531 u = nil // u is an alias for uIn or v - cannot reuse
532 }
533 u = u.make(len(uIn) + 1)
534 u.clear()
535
536 // D1.
537 shift := leadingZeros(v[n-1])
538 if shift > 0 {
539 // do not modify v, it may be used by another goroutine simultaneously
540 v1 := make(nat, n)
541 shlVU(v1, v, shift)
542 v = v1
543 }
544 u[len(uIn)] = shlVU(u[0:len(uIn)], uIn, shift)
545
546 // D2.
547 for j := m; j >= 0; j-- {
548 // D3.
549 qhat := Word(_M)
550 if u[j+n] != v[n-1] {
551 var rhat Word
552 qhat, rhat = divWW(u[j+n], u[j+n-1], v[n-1])
553
554 // x1 | x2 = q̂v_{n-2}
555 x1, x2 := mulWW(qhat, v[n-2])
556 // test if q̂v_{n-2} > br̂ + u_{j+n-2}
557 for greaterThan(x1, x2, rhat, u[j+n-2]) {
558 qhat--
559 prevRhat := rhat
560 rhat += v[n-1]
561 // v[n-1] >= 0, so this tests for overflow.
562 if rhat < prevRhat {
563 break
564 }
565 x1, x2 = mulWW(qhat, v[n-2])
566 }
567 }
568
569 // D4.
570 qhatv[n] = mulAddVWW(qhatv[0:n], v, qhat, 0)
571
572 c := subVV(u[j:j+len(qhatv)], u[j:], qhatv)
573 if c != 0 {
574 c := addVV(u[j:j+n], u[j:], v)
575 u[j+n] += c
576 qhat--
577 }
578
579 q[j] = qhat
580 }
581
582 q = q.norm()
583 shrVU(u, u, shift)
584 r = u.norm()
585
586 return q, r
587 }
588
589 // Length of x in bits. x must be normalized.
590 func (x nat) bitLen() int {
591 if i := len(x) - 1; i >= 0 {
592 return i*_W + bitLen(x[i])
593 }
594 return 0
595 }
596
597 // MaxBase is the largest number base accepted for string conversions.
598 const MaxBase = 'z' - 'a' + 10 + 1 // = hexValue('z') + 1
599
600 func hexValue(ch rune) Word {
601 d := int(MaxBase + 1) // illegal base
602 switch {
603 case '0' <= ch && ch <= '9':
604 d = int(ch - '0')
605 case 'a' <= ch && ch <= 'z':
606 d = int(ch - 'a' + 10)
607 case 'A' <= ch && ch <= 'Z':
608 d = int(ch - 'A' + 10)
609 }
610 return Word(d)
611 }
612
613 // scan sets z to the natural number corresponding to the longest possible prefix
614 // read from r representing an unsigned integer in a given conversion base.
615 // It returns z, the actual conversion base used, and an error, if any. In the
616 // error case, the value of z is undefined. The syntax follows the syntax of
617 // unsigned integer literals in Go.
618 //
619 // The base argument must be 0 or a value from 2 through MaxBase. If the base
620 // is 0, the string prefix determines the actual conversion base. A prefix of
621 // ``0x'' or ``0X'' selects base 16; the ``0'' prefix selects base 8, and a
622 // ``0b'' or ``0B'' prefix selects base 2. Otherwise the selected base is 10.
623 //
624 func (z nat) scan(r io.RuneScanner, base int) (nat, int, error) {
625 // reject illegal bases
626 if base < 0 || base == 1 || MaxBase < base {
627 return z, 0, errors.New("illegal number base")
628 }
629
630 // one char look-ahead
631 ch, _, err := r.ReadRune()
632 if err != nil {
633 return z, 0, err
634 }
635
636 // determine base if necessary
637 b := Word(base)
638 if base == 0 {
639 b = 10
640 if ch == '0' {
641 switch ch, _, err = r.ReadRune(); err {
642 case nil:
643 b = 8
644 switch ch {
645 case 'x', 'X':
646 b = 16
647 case 'b', 'B':
648 b = 2
649 }
650 if b == 2 || b == 16 {
651 if ch, _, err = r.ReadRune(); err != nil {
652 return z, 0, err
653 }
654 }
655 case io.EOF:
656 return z.make(0), 10, nil
657 default:
658 return z, 10, err
659 }
660 }
661 }
662
663 // convert string
664 // - group as many digits d as possible together into a "super-digit" dd with "super-base" bb
665 // - only when bb does not fit into a word anymore, do a full number mulAddWW using bb and dd
666 z = z.make(0)
667 bb := Word(1)
668 dd := Word(0)
669 for max := _M / b; ; {
670 d := hexValue(ch)
671 if d >= b {
672 r.UnreadRune() // ch does not belong to number anymore
673 break
674 }
675
676 if bb <= max {
677 bb *= b
678 dd = dd*b + d
679 } else {
680 // bb * b would overflow
681 z = z.mulAddWW(z, bb, dd)
682 bb = b
683 dd = d
684 }
685
686 if ch, _, err = r.ReadRune(); err != nil {
687 if err != io.EOF {
688 return z, int(b), err
689 }
690 break
691 }
692 }
693
694 switch {
695 case bb > 1:
696 // there was at least one mantissa digit
697 z = z.mulAddWW(z, bb, dd)
698 case base == 0 && b == 8:
699 // there was only the octal prefix 0 (possibly followed by digits > 7);
700 // return base 10, not 8
701 return z, 10, nil
702 case base != 0 || b != 8:
703 // there was neither a mantissa digit nor the octal prefix 0
704 return z, int(b), errors.New("syntax error scanning number")
705 }
706
707 return z.norm(), int(b), nil
708 }
709
710 // Character sets for string conversion.
711 const (
712 lowercaseDigits = "0123456789abcdefghijklmnopqrstuvwxyz"
713 uppercaseDigits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
714 )
715
716 // decimalString returns a decimal representation of x.
717 // It calls x.string with the charset "0123456789".
718 func (x nat) decimalString() string {
719 return x.string(lowercaseDigits[0:10])
720 }
721
722 // string converts x to a string using digits from a charset; a digit with
723 // value d is represented by charset[d]. The conversion base is determined
724 // by len(charset), which must be >= 2 and <= 256.
725 func (x nat) string(charset string) string {
726 b := Word(len(charset))
727
728 // special cases
729 switch {
730 case b < 2 || MaxBase > 256:
731 panic("illegal base")
732 case len(x) == 0:
733 return string(charset[0])
734 }
735
736 // allocate buffer for conversion
737 i := int(float64(x.bitLen())/math.Log2(float64(b))) + 1 // off by one at most
738 s := make([]byte, i)
739
740 // convert power of two and non power of two bases separately
741 if b == b&-b {
742 // shift is base-b digit size in bits
743 shift := uint(trailingZeroBits(b)) // shift > 0 because b >= 2
744 mask := Word(1)<<shift - 1
745 w := x[0]
746 nbits := uint(_W) // number of unprocessed bits in w
747
748 // convert less-significant words
749 for k := 1; k < len(x); k++ {
750 // convert full digits
751 for nbits >= shift {
752 i--
753 s[i] = charset[w&mask]
754 w >>= shift
755 nbits -= shift
756 }
757
758 // convert any partial leading digit and advance to next word
759 if nbits == 0 {
760 // no partial digit remaining, just advance
761 w = x[k]
762 nbits = _W
763 } else {
764 // partial digit in current (k-1) and next (k) word
765 w |= x[k] << nbits
766 i--
767 s[i] = charset[w&mask]
768
769 // advance
770 w = x[k] >> (shift - nbits)
771 nbits = _W - (shift - nbits)
772 }
773 }
774
775 // convert digits of most-significant word (omit leading zeros)
776 for nbits >= 0 && w != 0 {
777 i--
778 s[i] = charset[w&mask]
779 w >>= shift
780 nbits -= shift
781 }
782
783 } else {
784 // determine "big base"; i.e., the largest possible value bb
785 // that is a power of base b and still fits into a Word
786 // (as in 10^19 for 19 decimal digits in a 64bit Word)
787 bb := b // big base is b**ndigits
788 ndigits := 1 // number of base b digits
789 for max := Word(_M / b); bb <= max; bb *= b {
790 ndigits++ // maximize ndigits where bb = b**ndigits, bb <= _M
791 }
792
793 // construct table of successive squares of bb*leafSize to use in subdivisions
794 // result (table != nil) <=> (len(x) > leafSize > 0)
795 table := divisors(len(x), b, ndigits, bb)
796
797 // preserve x, create local copy for use by convertWords
798 q := nat(nil).set(x)
799
800 // convert q to string s in base b
801 q.convertWords(s, charset, b, ndigits, bb, table)
802
803 // strip leading zeros
804 // (x != 0; thus s must contain at least one non-zero digit
805 // and the loop will terminate)
806 i = 0
807 for zero := charset[0]; s[i] == zero; {
808 i++
809 }
810 }
811
812 return string(s[i:])
813 }
814
815 // Convert words of q to base b digits in s. If q is large, it is recursively "split in half"
816 // by nat/nat division using tabulated divisors. Otherwise, it is converted iteratively using
817 // repeated nat/Word divison.
818 //
819 // The iterative method processes n Words by n divW() calls, each of which visits every Word in the
820 // incrementally shortened q for a total of n + (n-1) + (n-2) ... + 2 + 1, or n(n+1)/2 divW()'s.
821 // Recursive conversion divides q by its approximate square root, yielding two parts, each half
822 // the size of q. Using the iterative method on both halves means 2 * (n/2)(n/2 + 1)/2 divW()'s
823 // plus the expensive long div(). Asymptotically, the ratio is favorable at 1/2 the divW()'s, and
824 // is made better by splitting the subblocks recursively. Best is to split blocks until one more
825 // split would take longer (because of the nat/nat div()) than the twice as many divW()'s of the
826 // iterative approach. This threshold is represented by leafSize. Benchmarking of leafSize in the
827 // range 2..64 shows that values of 8 and 16 work well, with a 4x speedup at medium lengths and
828 // ~30x for 20000 digits. Use nat_test.go's BenchmarkLeafSize tests to optimize leafSize for
829 // specific hardware.
830 //
831 func (q nat) convertWords(s []byte, charset string, b Word, ndigits int, bb Word, table []divisor) {
832 // split larger blocks recursively
833 if table != nil {
834 // len(q) > leafSize > 0
835 var r nat
836 index := len(table) - 1
837 for len(q) > leafSize {
838 // find divisor close to sqrt(q) if possible, but in any case < q
839 maxLength := q.bitLen() // ~= log2 q, or at of least largest possible q of this bit length
840 minLength := maxLength >> 1 // ~= log2 sqrt(q)
841 for index > 0 && table[index-1].nbits > minLength {
842 index-- // desired
843 }
844 if table[index].nbits >= maxLength && table[index].bbb.cmp(q) >= 0 {
845 index--
846 if index < 0 {
847 panic("internal inconsistency")
848 }
849 }
850
851 // split q into the two digit number (q'*bbb + r) to form independent subblocks
852 q, r = q.div(r, q, table[index].bbb)
853
854 // convert subblocks and collect results in s[:h] and s[h:]
855 h := len(s) - table[index].ndigits
856 r.convertWords(s[h:], charset, b, ndigits, bb, table[0:index])
857 s = s[:h] // == q.convertWords(s, charset, b, ndigits, bb, table[0:index+1])
858 }
859 }
860
861 // having split any large blocks now process the remaining (small) block iteratively
862 i := len(s)
863 var r Word
864 if b == 10 {
865 // hard-coding for 10 here speeds this up by 1.25x (allows for / and % by constants)
866 for len(q) > 0 {
867 // extract least significant, base bb "digit"
868 q, r = q.divW(q, bb)
869 for j := 0; j < ndigits && i > 0; j++ {
870 i--
871 // avoid % computation since r%10 == r - int(r/10)*10;
872 // this appears to be faster for BenchmarkString10000Base10
873 // and smaller strings (but a bit slower for larger ones)
874 t := r / 10
875 s[i] = charset[r-t<<3-t-t] // TODO(gri) replace w/ t*10 once compiler produces better code
876 r = t
877 }
878 }
879 } else {
880 for len(q) > 0 {
881 // extract least significant, base bb "digit"
882 q, r = q.divW(q, bb)
883 for j := 0; j < ndigits && i > 0; j++ {
884 i--
885 s[i] = charset[r%b]
886 r /= b
887 }
888 }
889 }
890
891 // prepend high-order zeroes
892 zero := charset[0]
893 for i > 0 { // while need more leading zeroes
894 i--
895 s[i] = zero
896 }
897 }
898
899 // Split blocks greater than leafSize Words (or set to 0 to disable recursive conversion)
900 // Benchmark and configure leafSize using: go test -bench="Leaf"
901 // 8 and 16 effective on 3.0 GHz Xeon "Clovertown" CPU (128 byte cache lines)
902 // 8 and 16 effective on 2.66 GHz Core 2 Duo "Penryn" CPU
903 var leafSize int = 8 // number of Word-size binary values treat as a monolithic block
904
905 type divisor struct {
906 bbb nat // divisor
907 nbits int // bit length of divisor (discounting leading zeroes) ~= log2(bbb)
908 ndigits int // digit length of divisor in terms of output base digits
909 }
910
911 var cacheBase10 [64]divisor // cached divisors for base 10
912 var cacheLock sync.Mutex // protects cacheBase10
913
914 // expWW computes x**y
915 func (z nat) expWW(x, y Word) nat {
916 return z.expNN(nat(nil).setWord(x), nat(nil).setWord(y), nil)
917 }
918
919 // construct table of powers of bb*leafSize to use in subdivisions
920 func divisors(m int, b Word, ndigits int, bb Word) []divisor {
921 // only compute table when recursive conversion is enabled and x is large
922 if leafSize == 0 || m <= leafSize {
923 return nil
924 }
925
926 // determine k where (bb**leafSize)**(2**k) >= sqrt(x)
927 k := 1
928 for words := leafSize; words < m>>1 && k < len(cacheBase10); words <<= 1 {
929 k++
930 }
931
932 // create new table of divisors or extend and reuse existing table as appropriate
933 var table []divisor
934 var cached bool
935 switch b {
936 case 10:
937 table = cacheBase10[0:k] // reuse old table for this conversion
938 cached = true
939 default:
940 table = make([]divisor, k) // new table for this conversion
941 }
942
943 // extend table
944 if table[k-1].ndigits == 0 {
945 if cached {
946 cacheLock.Lock() // begin critical section
947 }
948
949 // add new entries as needed
950 var larger nat
951 for i := 0; i < k; i++ {
952 if table[i].ndigits == 0 {
953 if i == 0 {
954 table[i].bbb = nat(nil).expWW(bb, Word(leafSize))
955 table[i].ndigits = ndigits * leafSize
956 } else {
957 table[i].bbb = nat(nil).mul(table[i-1].bbb, table[i-1].bbb)
958 table[i].ndigits = 2 * table[i-1].ndigits
959 }
960
961 // optimization: exploit aggregated extra bits in macro blocks
962 larger = nat(nil).set(table[i].bbb)
963 for mulAddVWW(larger, larger, b, 0) == 0 {
964 table[i].bbb = table[i].bbb.set(larger)
965 table[i].ndigits++
966 }
967
968 table[i].nbits = table[i].bbb.bitLen()
969 }
970 }
971
972 if cached {
973 cacheLock.Unlock() // end critical section
974 }
975 }
976
977 return table
978 }
979
980 const deBruijn32 = 0x077CB531
981
982 var deBruijn32Lookup = []byte{
983 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
984 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9,
985 }
986
987 const deBruijn64 = 0x03f79d71b4ca8b09
988
989 var deBruijn64Lookup = []byte{
990 0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
991 62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
992 63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
993 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
994 }
995
996 // trailingZeroBits returns the number of consecutive zero bits on the right
997 // side of the given Word.
998 // See Knuth, volume 4, section 7.3.1
999 func trailingZeroBits(x Word) int {
1000 // x & -x leaves only the right-most bit set in the word. Let k be the
1001 // index of that bit. Since only a single bit is set, the value is two
1002 // to the power of k. Multiplying by a power of two is equivalent to
1003 // left shifting, in this case by k bits. The de Bruijn constant is
1004 // such that all six bit, consecutive substrings are distinct.
1005 // Therefore, if we have a left shifted version of this constant we can
1006 // find by how many bits it was shifted by looking at which six bit
1007 // substring ended up at the top of the word.
1008 switch _W {
1009 case 32:
1010 return int(deBruijn32Lookup[((x&-x)*deBruijn32)>>27])
1011 case 64:
1012 return int(deBruijn64Lookup[((x&-x)*(deBruijn64&_M))>>58])
1013 default:
1014 panic("Unknown word size")
1015 }
1016
1017 return 0
1018 }
1019
1020 // z = x << s
1021 func (z nat) shl(x nat, s uint) nat {
1022 m := len(x)
1023 if m == 0 {
1024 return z.make(0)
1025 }
1026 // m > 0
1027
1028 n := m + int(s/_W)
1029 z = z.make(n + 1)
1030 z[n] = shlVU(z[n-m:n], x, s%_W)
1031 z[0 : n-m].clear()
1032
1033 return z.norm()
1034 }
1035
1036 // z = x >> s
1037 func (z nat) shr(x nat, s uint) nat {
1038 m := len(x)
1039 n := m - int(s/_W)
1040 if n <= 0 {
1041 return z.make(0)
1042 }
1043 // n > 0
1044
1045 z = z.make(n)
1046 shrVU(z, x[m-n:], s%_W)
1047
1048 return z.norm()
1049 }
1050
1051 func (z nat) setBit(x nat, i uint, b uint) nat {
1052 j := int(i / _W)
1053 m := Word(1) << (i % _W)
1054 n := len(x)
1055 switch b {
1056 case 0:
1057 z = z.make(n)
1058 copy(z, x)
1059 if j >= n {
1060 // no need to grow
1061 return z
1062 }
1063 z[j] &^= m
1064 return z.norm()
1065 case 1:
1066 if j >= n {
1067 z = z.make(j + 1)
1068 z[n:].clear()
1069 } else {
1070 z = z.make(n)
1071 }
1072 copy(z, x)
1073 z[j] |= m
1074 // no need to normalize
1075 return z
1076 }
1077 panic("set bit is not 0 or 1")
1078 }
1079
1080 func (z nat) bit(i uint) uint {
1081 j := int(i / _W)
1082 if j >= len(z) {
1083 return 0
1084 }
1085 return uint(z[j] >> (i % _W) & 1)
1086 }
1087
1088 func (z nat) and(x, y nat) nat {
1089 m := len(x)
1090 n := len(y)
1091 if m > n {
1092 m = n
1093 }
1094 // m <= n
1095
1096 z = z.make(m)
1097 for i := 0; i < m; i++ {
1098 z[i] = x[i] & y[i]
1099 }
1100
1101 return z.norm()
1102 }
1103
1104 func (z nat) andNot(x, y nat) nat {
1105 m := len(x)
1106 n := len(y)
1107 if n > m {
1108 n = m
1109 }
1110 // m >= n
1111
1112 z = z.make(m)
1113 for i := 0; i < n; i++ {
1114 z[i] = x[i] &^ y[i]
1115 }
1116 copy(z[n:m], x[n:m])
1117
1118 return z.norm()
1119 }
1120
1121 func (z nat) or(x, y nat) nat {
1122 m := len(x)
1123 n := len(y)
1124 s := x
1125 if m < n {
1126 n, m = m, n
1127 s = y
1128 }
1129 // m >= n
1130
1131 z = z.make(m)
1132 for i := 0; i < n; i++ {
1133 z[i] = x[i] | y[i]
1134 }
1135 copy(z[n:m], s[n:m])
1136
1137 return z.norm()
1138 }
1139
1140 func (z nat) xor(x, y nat) nat {
1141 m := len(x)
1142 n := len(y)
1143 s := x
1144 if m < n {
1145 n, m = m, n
1146 s = y
1147 }
1148 // m >= n
1149
1150 z = z.make(m)
1151 for i := 0; i < n; i++ {
1152 z[i] = x[i] ^ y[i]
1153 }
1154 copy(z[n:m], s[n:m])
1155
1156 return z.norm()
1157 }
1158
1159 // greaterThan returns true iff (x1<<_W + x2) > (y1<<_W + y2)
1160 func greaterThan(x1, x2, y1, y2 Word) bool {
1161 return x1 > y1 || x1 == y1 && x2 > y2
1162 }
1163
1164 // modW returns x % d.
1165 func (x nat) modW(d Word) (r Word) {
1166 // TODO(agl): we don't actually need to store the q value.
1167 var q nat
1168 q = q.make(len(x))
1169 return divWVW(q, 0, x, d)
1170 }
1171
1172 // powersOfTwoDecompose finds q and k with x = q * 1<<k and q is odd, or q and k are 0.
1173 func (x nat) powersOfTwoDecompose() (q nat, k int) {
1174 if len(x) == 0 {
1175 return x, 0
1176 }
1177
1178 // One of the words must be non-zero by definition,
1179 // so this loop will terminate with i < len(x), and
1180 // i is the number of 0 words.
1181 i := 0
1182 for x[i] == 0 {
1183 i++
1184 }
1185 n := trailingZeroBits(x[i]) // x[i] != 0
1186
1187 q = make(nat, len(x)-i)
1188 shrVU(q, x[i:], uint(n))
1189
1190 q = q.norm()
1191 k = i*_W + n
1192 return
1193 }
1194
1195 // random creates a random integer in [0..limit), using the space in z if
1196 // possible. n is the bit length of limit.
1197 func (z nat) random(rand *rand.Rand, limit nat, n int) nat {
1198 if alias(z, limit) {
1199 z = nil // z is an alias for limit - cannot reuse
1200 }
1201 z = z.make(len(limit))
1202
1203 bitLengthOfMSW := uint(n % _W)
1204 if bitLengthOfMSW == 0 {
1205 bitLengthOfMSW = _W
1206 }
1207 mask := Word((1 << bitLengthOfMSW) - 1)
1208
1209 for {
1210 for i := range z {
1211 switch _W {
1212 case 32:
1213 z[i] = Word(rand.Uint32())
1214 case 64:
1215 z[i] = Word(rand.Uint32()) | Word(rand.Uint32())<<32
1216 }
1217 }
1218
1219 z[len(limit)-1] &= mask
1220
1221 if z.cmp(limit) < 0 {
1222 break
1223 }
1224 }
1225
1226 return z.norm()
1227 }
1228
1229 // If m != nil, expNN calculates x**y mod m. Otherwise it calculates x**y. It
1230 // reuses the storage of z if possible.
1231 func (z nat) expNN(x, y, m nat) nat {
1232 if alias(z, x) || alias(z, y) {
1233 // We cannot allow in place modification of x or y.
1234 z = nil
1235 }
1236
1237 if len(y) == 0 {
1238 z = z.make(1)
1239 z[0] = 1
1240 return z
1241 }
1242
1243 if m != nil {
1244 // We likely end up being as long as the modulus.
1245 z = z.make(len(m))
1246 }
1247 z = z.set(x)
1248 v := y[len(y)-1]
1249 // It's invalid for the most significant word to be zero, therefore we
1250 // will find a one bit.
1251 shift := leadingZeros(v) + 1
1252 v <<= shift
1253 var q nat
1254
1255 const mask = 1 << (_W - 1)
1256
1257 // We walk through the bits of the exponent one by one. Each time we
1258 // see a bit, we square, thus doubling the power. If the bit is a one,
1259 // we also multiply by x, thus adding one to the power.
1260
1261 w := _W - int(shift)
1262 for j := 0; j < w; j++ {
1263 z = z.mul(z, z)
1264
1265 if v&mask != 0 {
1266 z = z.mul(z, x)
1267 }
1268
1269 if m != nil {
1270 q, z = q.div(z, z, m)
1271 }
1272
1273 v <<= 1
1274 }
1275
1276 for i := len(y) - 2; i >= 0; i-- {
1277 v = y[i]
1278
1279 for j := 0; j < _W; j++ {
1280 z = z.mul(z, z)
1281
1282 if v&mask != 0 {
1283 z = z.mul(z, x)
1284 }
1285
1286 if m != nil {
1287 q, z = q.div(z, z, m)
1288 }
1289
1290 v <<= 1
1291 }
1292 }
1293
1294 return z.norm()
1295 }
1296
1297 // probablyPrime performs reps Miller-Rabin tests to check whether n is prime.
1298 // If it returns true, n is prime with probability 1 - 1/4^reps.
1299 // If it returns false, n is not prime.
1300 func (n nat) probablyPrime(reps int) bool {
1301 if len(n) == 0 {
1302 return false
1303 }
1304
1305 if len(n) == 1 {
1306 if n[0] < 2 {
1307 return false
1308 }
1309
1310 if n[0]%2 == 0 {
1311 return n[0] == 2
1312 }
1313
1314 // We have to exclude these cases because we reject all
1315 // multiples of these numbers below.
1316 switch n[0] {
1317 case 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53:
1318 return true
1319 }
1320 }
1321
1322 const primesProduct32 = 0xC0CFD797 // Π {p ∈ primes, 2 < p <= 29}
1323 const primesProduct64 = 0xE221F97C30E94E1D // Π {p ∈ primes, 2 < p <= 53}
1324
1325 var r Word
1326 switch _W {
1327 case 32:
1328 r = n.modW(primesProduct32)
1329 case 64:
1330 r = n.modW(primesProduct64 & _M)
1331 default:
1332 panic("Unknown word size")
1333 }
1334
1335 if r%3 == 0 || r%5 == 0 || r%7 == 0 || r%11 == 0 ||
1336 r%13 == 0 || r%17 == 0 || r%19 == 0 || r%23 == 0 || r%29 == 0 {
1337 return false
1338 }
1339
1340 if _W == 64 && (r%31 == 0 || r%37 == 0 || r%41 == 0 ||
1341 r%43 == 0 || r%47 == 0 || r%53 == 0) {
1342 return false
1343 }
1344
1345 nm1 := nat(nil).sub(n, natOne)
1346 // 1<<k * q = nm1;
1347 q, k := nm1.powersOfTwoDecompose()
1348
1349 nm3 := nat(nil).sub(nm1, natTwo)
1350 rand := rand.New(rand.NewSource(int64(n[0])))
1351
1352 var x, y, quotient nat
1353 nm3Len := nm3.bitLen()
1354
1355 NextRandom:
1356 for i := 0; i < reps; i++ {
1357 x = x.random(rand, nm3, nm3Len)
1358 x = x.add(x, natTwo)
1359 y = y.expNN(x, q, n)
1360 if y.cmp(natOne) == 0 || y.cmp(nm1) == 0 {
1361 continue
1362 }
1363 for j := 1; j < k; j++ {
1364 y = y.mul(y, y)
1365 quotient, y = quotient.div(y, y, n)
1366 if y.cmp(nm1) == 0 {
1367 continue NextRandom
1368 }
1369 if y.cmp(natOne) == 0 {
1370 return false
1371 }
1372 }
1373 return false
1374 }
1375
1376 return true
1377 }
1378
1379 // bytes writes the value of z into buf using big-endian encoding.
1380 // len(buf) must be >= len(z)*_S. The value of z is encoded in the
1381 // slice buf[i:]. The number i of unused bytes at the beginning of
1382 // buf is returned as result.
1383 func (z nat) bytes(buf []byte) (i int) {
1384 i = len(buf)
1385 for _, d := range z {
1386 for j := 0; j < _S; j++ {
1387 i--
1388 buf[i] = byte(d)
1389 d >>= 8
1390 }
1391 }
1392
1393 for i < len(buf) && buf[i] == 0 {
1394 i++
1395 }
1396
1397 return
1398 }
1399
1400 // setBytes interprets buf as the bytes of a big-endian unsigned
1401 // integer, sets z to that value, and returns z.
1402 func (z nat) setBytes(buf []byte) nat {
1403 z = z.make((len(buf) + _S - 1) / _S)
1404
1405 k := 0
1406 s := uint(0)
1407 var d Word
1408 for i := len(buf); i > 0; i-- {
1409 d |= Word(buf[i-1]) << s
1410 if s += 8; s == _S*8 {
1411 z[k] = d
1412 k++
1413 s = 0
1414 d = 0
1415 }
1416 }
1417 if k < len(z) {
1418 z[k] = d
1419 }
1420
1421 return z.norm()
1422 }