Source file src/pkg/math/big/nat.go
1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package big implements multi-precision arithmetic (big numbers). 6 // The following numeric types are supported: 7 // 8 // - Int signed integers 9 // - Rat rational numbers 10 // 11 // Methods are typically of the form: 12 // 13 // func (z *Int) Op(x, y *Int) *Int (similar for *Rat) 14 // 15 // and implement operations z = x Op y with the result as receiver; if it 16 // is one of the operands it may be overwritten (and its memory reused). 17 // To enable chaining of operations, the result is also returned. Methods 18 // returning a result other than *Int or *Rat take one of the operands as 19 // the receiver. 20 // 21 package big 22 23 // This file contains operations on unsigned multi-precision integers. 24 // These are the building blocks for the operations on signed integers 25 // and rationals. 26 27 import ( 28 "errors" 29 "io" 30 "math" 31 "math/rand" 32 "sync" 33 ) 34 35 // An unsigned integer x of the form 36 // 37 // x = x[n-1]*_B^(n-1) + x[n-2]*_B^(n-2) + ... + x[1]*_B + x[0] 38 // 39 // with 0 <= x[i] < _B and 0 <= i < n is stored in a slice of length n, 40 // with the digits x[i] as the slice elements. 41 // 42 // A number is normalized if the slice contains no leading 0 digits. 43 // During arithmetic operations, denormalized values may occur but are 44 // always normalized before returning the final result. The normalized 45 // representation of 0 is the empty or nil slice (length = 0). 46 // 47 type nat []Word 48 49 var ( 50 natOne = nat{1} 51 natTwo = nat{2} 52 natTen = nat{10} 53 ) 54 55 func (z nat) clear() { 56 for i := range z { 57 z[i] = 0 58 } 59 } 60 61 func (z nat) norm() nat { 62 i := len(z) 63 for i > 0 && z[i-1] == 0 { 64 i-- 65 } 66 return z[0:i] 67 } 68 69 func (z nat) make(n int) nat { 70 if n <= cap(z) { 71 return z[0:n] // reuse z 72 } 73 // Choosing a good value for e has significant performance impact 74 // because it increases the chance that a value can be reused. 75 const e = 4 // extra capacity 76 return make(nat, n, n+e) 77 } 78 79 func (z nat) setWord(x Word) nat { 80 if x == 0 { 81 return z.make(0) 82 } 83 z = z.make(1) 84 z[0] = x 85 return z 86 } 87 88 func (z nat) setUint64(x uint64) nat { 89 // single-digit values 90 if w := Word(x); uint64(w) == x { 91 return z.setWord(w) 92 } 93 94 // compute number of words n required to represent x 95 n := 0 96 for t := x; t > 0; t >>= _W { 97 n++ 98 } 99 100 // split x into n words 101 z = z.make(n) 102 for i := range z { 103 z[i] = Word(x & _M) 104 x >>= _W 105 } 106 107 return z 108 } 109 110 func (z nat) set(x nat) nat { 111 z = z.make(len(x)) 112 copy(z, x) 113 return z 114 } 115 116 func (z nat) add(x, y nat) nat { 117 m := len(x) 118 n := len(y) 119 120 switch { 121 case m < n: 122 return z.add(y, x) 123 case m == 0: 124 // n == 0 because m >= n; result is 0 125 return z.make(0) 126 case n == 0: 127 // result is x 128 return z.set(x) 129 } 130 // m > 0 131 132 z = z.make(m + 1) 133 c := addVV(z[0:n], x, y) 134 if m > n { 135 c = addVW(z[n:m], x[n:], c) 136 } 137 z[m] = c 138 139 return z.norm() 140 } 141 142 func (z nat) sub(x, y nat) nat { 143 m := len(x) 144 n := len(y) 145 146 switch { 147 case m < n: 148 panic("underflow") 149 case m == 0: 150 // n == 0 because m >= n; result is 0 151 return z.make(0) 152 case n == 0: 153 // result is x 154 return z.set(x) 155 } 156 // m > 0 157 158 z = z.make(m) 159 c := subVV(z[0:n], x, y) 160 if m > n { 161 c = subVW(z[n:], x[n:], c) 162 } 163 if c != 0 { 164 panic("underflow") 165 } 166 167 return z.norm() 168 } 169 170 func (x nat) cmp(y nat) (r int) { 171 m := len(x) 172 n := len(y) 173 if m != n || m == 0 { 174 switch { 175 case m < n: 176 r = -1 177 case m > n: 178 r = 1 179 } 180 return 181 } 182 183 i := m - 1 184 for i > 0 && x[i] == y[i] { 185 i-- 186 } 187 188 switch { 189 case x[i] < y[i]: 190 r = -1 191 case x[i] > y[i]: 192 r = 1 193 } 194 return 195 } 196 197 func (z nat) mulAddWW(x nat, y, r Word) nat { 198 m := len(x) 199 if m == 0 || y == 0 { 200 return z.setWord(r) // result is r 201 } 202 // m > 0 203 204 z = z.make(m + 1) 205 z[m] = mulAddVWW(z[0:m], x, y, r) 206 207 return z.norm() 208 } 209 210 // basicMul multiplies x and y and leaves the result in z. 211 // The (non-normalized) result is placed in z[0 : len(x) + len(y)]. 212 func basicMul(z, x, y nat) { 213 z[0 : len(x)+len(y)].clear() // initialize z 214 for i, d := range y { 215 if d != 0 { 216 z[len(x)+i] = addMulVVW(z[i:i+len(x)], x, d) 217 } 218 } 219 } 220 221 // Fast version of z[0:n+n>>1].add(z[0:n+n>>1], x[0:n]) w/o bounds checks. 222 // Factored out for readability - do not use outside karatsuba. 223 func karatsubaAdd(z, x nat, n int) { 224 if c := addVV(z[0:n], z, x); c != 0 { 225 addVW(z[n:n+n>>1], z[n:], c) 226 } 227 } 228 229 // Like karatsubaAdd, but does subtract. 230 func karatsubaSub(z, x nat, n int) { 231 if c := subVV(z[0:n], z, x); c != 0 { 232 subVW(z[n:n+n>>1], z[n:], c) 233 } 234 } 235 236 // Operands that are shorter than karatsubaThreshold are multiplied using 237 // "grade school" multiplication; for longer operands the Karatsuba algorithm 238 // is used. 239 var karatsubaThreshold int = 32 // computed by calibrate.go 240 241 // karatsuba multiplies x and y and leaves the result in z. 242 // Both x and y must have the same length n and n must be a 243 // power of 2. The result vector z must have len(z) >= 6*n. 244 // The (non-normalized) result is placed in z[0 : 2*n]. 245 func karatsuba(z, x, y nat) { 246 n := len(y) 247 248 // Switch to basic multiplication if numbers are odd or small. 249 // (n is always even if karatsubaThreshold is even, but be 250 // conservative) 251 if n&1 != 0 || n < karatsubaThreshold || n < 2 { 252 basicMul(z, x, y) 253 return 254 } 255 // n&1 == 0 && n >= karatsubaThreshold && n >= 2 256 257 // Karatsuba multiplication is based on the observation that 258 // for two numbers x and y with: 259 // 260 // x = x1*b + x0 261 // y = y1*b + y0 262 // 263 // the product x*y can be obtained with 3 products z2, z1, z0 264 // instead of 4: 265 // 266 // x*y = x1*y1*b*b + (x1*y0 + x0*y1)*b + x0*y0 267 // = z2*b*b + z1*b + z0 268 // 269 // with: 270 // 271 // xd = x1 - x0 272 // yd = y0 - y1 273 // 274 // z1 = xd*yd + z1 + z0 275 // = (x1-x0)*(y0 - y1) + z1 + z0 276 // = x1*y0 - x1*y1 - x0*y0 + x0*y1 + z1 + z0 277 // = x1*y0 - z1 - z0 + x0*y1 + z1 + z0 278 // = x1*y0 + x0*y1 279 280 // split x, y into "digits" 281 n2 := n >> 1 // n2 >= 1 282 x1, x0 := x[n2:], x[0:n2] // x = x1*b + y0 283 y1, y0 := y[n2:], y[0:n2] // y = y1*b + y0 284 285 // z is used for the result and temporary storage: 286 // 287 // 6*n 5*n 4*n 3*n 2*n 1*n 0*n 288 // z = [z2 copy|z0 copy| xd*yd | yd:xd | x1*y1 | x0*y0 ] 289 // 290 // For each recursive call of karatsuba, an unused slice of 291 // z is passed in that has (at least) half the length of the 292 // caller's z. 293 294 // compute z0 and z2 with the result "in place" in z 295 karatsuba(z, x0, y0) // z0 = x0*y0 296 karatsuba(z[n:], x1, y1) // z2 = x1*y1 297 298 // compute xd (or the negative value if underflow occurs) 299 s := 1 // sign of product xd*yd 300 xd := z[2*n : 2*n+n2] 301 if subVV(xd, x1, x0) != 0 { // x1-x0 302 s = -s 303 subVV(xd, x0, x1) // x0-x1 304 } 305 306 // compute yd (or the negative value if underflow occurs) 307 yd := z[2*n+n2 : 3*n] 308 if subVV(yd, y0, y1) != 0 { // y0-y1 309 s = -s 310 subVV(yd, y1, y0) // y1-y0 311 } 312 313 // p = (x1-x0)*(y0-y1) == x1*y0 - x1*y1 - x0*y0 + x0*y1 for s > 0 314 // p = (x0-x1)*(y0-y1) == x0*y0 - x0*y1 - x1*y0 + x1*y1 for s < 0 315 p := z[n*3:] 316 karatsuba(p, xd, yd) 317 318 // save original z2:z0 319 // (ok to use upper half of z since we're done recursing) 320 r := z[n*4:] 321 copy(r, z) 322 323 // add up all partial products 324 // 325 // 2*n n 0 326 // z = [ z2 | z0 ] 327 // + [ z0 ] 328 // + [ z2 ] 329 // + [ p ] 330 // 331 karatsubaAdd(z[n2:], r, n) 332 karatsubaAdd(z[n2:], r[n:], n) 333 if s > 0 { 334 karatsubaAdd(z[n2:], p, n) 335 } else { 336 karatsubaSub(z[n2:], p, n) 337 } 338 } 339 340 // alias returns true if x and y share the same base array. 341 func alias(x, y nat) bool { 342 return cap(x) > 0 && cap(y) > 0 && &x[0:cap(x)][cap(x)-1] == &y[0:cap(y)][cap(y)-1] 343 } 344 345 // addAt implements z += x*(1<<(_W*i)); z must be long enough. 346 // (we don't use nat.add because we need z to stay the same 347 // slice, and we don't need to normalize z after each addition) 348 func addAt(z, x nat, i int) { 349 if n := len(x); n > 0 { 350 if c := addVV(z[i:i+n], z[i:], x); c != 0 { 351 j := i + n 352 if j < len(z) { 353 addVW(z[j:], z[j:], c) 354 } 355 } 356 } 357 } 358 359 func max(x, y int) int { 360 if x > y { 361 return x 362 } 363 return y 364 } 365 366 // karatsubaLen computes an approximation to the maximum k <= n such that 367 // k = p<<i for a number p <= karatsubaThreshold and an i >= 0. Thus, the 368 // result is the largest number that can be divided repeatedly by 2 before 369 // becoming about the value of karatsubaThreshold. 370 func karatsubaLen(n int) int { 371 i := uint(0) 372 for n > karatsubaThreshold { 373 n >>= 1 374 i++ 375 } 376 return n << i 377 } 378 379 func (z nat) mul(x, y nat) nat { 380 m := len(x) 381 n := len(y) 382 383 switch { 384 case m < n: 385 return z.mul(y, x) 386 case m == 0 || n == 0: 387 return z.make(0) 388 case n == 1: 389 return z.mulAddWW(x, y[0], 0) 390 } 391 // m >= n > 1 392 393 // determine if z can be reused 394 if alias(z, x) || alias(z, y) { 395 z = nil // z is an alias for x or y - cannot reuse 396 } 397 398 // use basic multiplication if the numbers are small 399 if n < karatsubaThreshold || n < 2 { 400 z = z.make(m + n) 401 basicMul(z, x, y) 402 return z.norm() 403 } 404 // m >= n && n >= karatsubaThreshold && n >= 2 405 406 // determine Karatsuba length k such that 407 // 408 // x = x1*b + x0 409 // y = y1*b + y0 (and k <= len(y), which implies k <= len(x)) 410 // b = 1<<(_W*k) ("base" of digits xi, yi) 411 // 412 k := karatsubaLen(n) 413 // k <= n 414 415 // multiply x0 and y0 via Karatsuba 416 x0 := x[0:k] // x0 is not normalized 417 y0 := y[0:k] // y0 is not normalized 418 z = z.make(max(6*k, m+n)) // enough space for karatsuba of x0*y0 and full result of x*y 419 karatsuba(z, x0, y0) 420 z = z[0 : m+n] // z has final length but may be incomplete, upper portion is garbage 421 422 // If x1 and/or y1 are not 0, add missing terms to z explicitly: 423 // 424 // m+n 2*k 0 425 // z = [ ... | x0*y0 ] 426 // + [ x1*y1 ] 427 // + [ x1*y0 ] 428 // + [ x0*y1 ] 429 // 430 if k < n || m != n { 431 x1 := x[k:] // x1 is normalized because x is 432 y1 := y[k:] // y1 is normalized because y is 433 var t nat 434 t = t.mul(x1, y1) 435 copy(z[2*k:], t) 436 z[2*k+len(t):].clear() // upper portion of z is garbage 437 t = t.mul(x1, y0.norm()) 438 addAt(z, t, k) 439 t = t.mul(x0.norm(), y1) 440 addAt(z, t, k) 441 } 442 443 return z.norm() 444 } 445 446 // mulRange computes the product of all the unsigned integers in the 447 // range [a, b] inclusively. If a > b (empty range), the result is 1. 448 func (z nat) mulRange(a, b uint64) nat { 449 switch { 450 case a == 0: 451 // cut long ranges short (optimization) 452 return z.setUint64(0) 453 case a > b: 454 return z.setUint64(1) 455 case a == b: 456 return z.setUint64(a) 457 case a+1 == b: 458 return z.mul(nat(nil).setUint64(a), nat(nil).setUint64(b)) 459 } 460 m := (a + b) / 2 461 return z.mul(nat(nil).mulRange(a, m), nat(nil).mulRange(m+1, b)) 462 } 463 464 // q = (x-r)/y, with 0 <= r < y 465 func (z nat) divW(x nat, y Word) (q nat, r Word) { 466 m := len(x) 467 switch { 468 case y == 0: 469 panic("division by zero") 470 case y == 1: 471 q = z.set(x) // result is x 472 return 473 case m == 0: 474 q = z.make(0) // result is 0 475 return 476 } 477 // m > 0 478 z = z.make(m) 479 r = divWVW(z, 0, x, y) 480 q = z.norm() 481 return 482 } 483 484 func (z nat) div(z2, u, v nat) (q, r nat) { 485 if len(v) == 0 { 486 panic("division by zero") 487 } 488 489 if u.cmp(v) < 0 { 490 q = z.make(0) 491 r = z2.set(u) 492 return 493 } 494 495 if len(v) == 1 { 496 var rprime Word 497 q, rprime = z.divW(u, v[0]) 498 if rprime > 0 { 499 r = z2.make(1) 500 r[0] = rprime 501 } else { 502 r = z2.make(0) 503 } 504 return 505 } 506 507 q, r = z.divLarge(z2, u, v) 508 return 509 } 510 511 // q = (uIn-r)/v, with 0 <= r < y 512 // Uses z as storage for q, and u as storage for r if possible. 513 // See Knuth, Volume 2, section 4.3.1, Algorithm D. 514 // Preconditions: 515 // len(v) >= 2 516 // len(uIn) >= len(v) 517 func (z nat) divLarge(u, uIn, v nat) (q, r nat) { 518 n := len(v) 519 m := len(uIn) - n 520 521 // determine if z can be reused 522 // TODO(gri) should find a better solution - this if statement 523 // is very costly (see e.g. time pidigits -s -n 10000) 524 if alias(z, uIn) || alias(z, v) { 525 z = nil // z is an alias for uIn or v - cannot reuse 526 } 527 q = z.make(m + 1) 528 529 qhatv := make(nat, n+1) 530 if alias(u, uIn) || alias(u, v) { 531 u = nil // u is an alias for uIn or v - cannot reuse 532 } 533 u = u.make(len(uIn) + 1) 534 u.clear() 535 536 // D1. 537 shift := leadingZeros(v[n-1]) 538 if shift > 0 { 539 // do not modify v, it may be used by another goroutine simultaneously 540 v1 := make(nat, n) 541 shlVU(v1, v, shift) 542 v = v1 543 } 544 u[len(uIn)] = shlVU(u[0:len(uIn)], uIn, shift) 545 546 // D2. 547 for j := m; j >= 0; j-- { 548 // D3. 549 qhat := Word(_M) 550 if u[j+n] != v[n-1] { 551 var rhat Word 552 qhat, rhat = divWW(u[j+n], u[j+n-1], v[n-1]) 553 554 // x1 | x2 = q̂v_{n-2} 555 x1, x2 := mulWW(qhat, v[n-2]) 556 // test if q̂v_{n-2} > br̂ + u_{j+n-2} 557 for greaterThan(x1, x2, rhat, u[j+n-2]) { 558 qhat-- 559 prevRhat := rhat 560 rhat += v[n-1] 561 // v[n-1] >= 0, so this tests for overflow. 562 if rhat < prevRhat { 563 break 564 } 565 x1, x2 = mulWW(qhat, v[n-2]) 566 } 567 } 568 569 // D4. 570 qhatv[n] = mulAddVWW(qhatv[0:n], v, qhat, 0) 571 572 c := subVV(u[j:j+len(qhatv)], u[j:], qhatv) 573 if c != 0 { 574 c := addVV(u[j:j+n], u[j:], v) 575 u[j+n] += c 576 qhat-- 577 } 578 579 q[j] = qhat 580 } 581 582 q = q.norm() 583 shrVU(u, u, shift) 584 r = u.norm() 585 586 return q, r 587 } 588 589 // Length of x in bits. x must be normalized. 590 func (x nat) bitLen() int { 591 if i := len(x) - 1; i >= 0 { 592 return i*_W + bitLen(x[i]) 593 } 594 return 0 595 } 596 597 // MaxBase is the largest number base accepted for string conversions. 598 const MaxBase = 'z' - 'a' + 10 + 1 // = hexValue('z') + 1 599 600 func hexValue(ch rune) Word { 601 d := int(MaxBase + 1) // illegal base 602 switch { 603 case '0' <= ch && ch <= '9': 604 d = int(ch - '0') 605 case 'a' <= ch && ch <= 'z': 606 d = int(ch - 'a' + 10) 607 case 'A' <= ch && ch <= 'Z': 608 d = int(ch - 'A' + 10) 609 } 610 return Word(d) 611 } 612 613 // scan sets z to the natural number corresponding to the longest possible prefix 614 // read from r representing an unsigned integer in a given conversion base. 615 // It returns z, the actual conversion base used, and an error, if any. In the 616 // error case, the value of z is undefined. The syntax follows the syntax of 617 // unsigned integer literals in Go. 618 // 619 // The base argument must be 0 or a value from 2 through MaxBase. If the base 620 // is 0, the string prefix determines the actual conversion base. A prefix of 621 // ``0x'' or ``0X'' selects base 16; the ``0'' prefix selects base 8, and a 622 // ``0b'' or ``0B'' prefix selects base 2. Otherwise the selected base is 10. 623 // 624 func (z nat) scan(r io.RuneScanner, base int) (nat, int, error) { 625 // reject illegal bases 626 if base < 0 || base == 1 || MaxBase < base { 627 return z, 0, errors.New("illegal number base") 628 } 629 630 // one char look-ahead 631 ch, _, err := r.ReadRune() 632 if err != nil { 633 return z, 0, err 634 } 635 636 // determine base if necessary 637 b := Word(base) 638 if base == 0 { 639 b = 10 640 if ch == '0' { 641 switch ch, _, err = r.ReadRune(); err { 642 case nil: 643 b = 8 644 switch ch { 645 case 'x', 'X': 646 b = 16 647 case 'b', 'B': 648 b = 2 649 } 650 if b == 2 || b == 16 { 651 if ch, _, err = r.ReadRune(); err != nil { 652 return z, 0, err 653 } 654 } 655 case io.EOF: 656 return z.make(0), 10, nil 657 default: 658 return z, 10, err 659 } 660 } 661 } 662 663 // convert string 664 // - group as many digits d as possible together into a "super-digit" dd with "super-base" bb 665 // - only when bb does not fit into a word anymore, do a full number mulAddWW using bb and dd 666 z = z.make(0) 667 bb := Word(1) 668 dd := Word(0) 669 for max := _M / b; ; { 670 d := hexValue(ch) 671 if d >= b { 672 r.UnreadRune() // ch does not belong to number anymore 673 break 674 } 675 676 if bb <= max { 677 bb *= b 678 dd = dd*b + d 679 } else { 680 // bb * b would overflow 681 z = z.mulAddWW(z, bb, dd) 682 bb = b 683 dd = d 684 } 685 686 if ch, _, err = r.ReadRune(); err != nil { 687 if err != io.EOF { 688 return z, int(b), err 689 } 690 break 691 } 692 } 693 694 switch { 695 case bb > 1: 696 // there was at least one mantissa digit 697 z = z.mulAddWW(z, bb, dd) 698 case base == 0 && b == 8: 699 // there was only the octal prefix 0 (possibly followed by digits > 7); 700 // return base 10, not 8 701 return z, 10, nil 702 case base != 0 || b != 8: 703 // there was neither a mantissa digit nor the octal prefix 0 704 return z, int(b), errors.New("syntax error scanning number") 705 } 706 707 return z.norm(), int(b), nil 708 } 709 710 // Character sets for string conversion. 711 const ( 712 lowercaseDigits = "0123456789abcdefghijklmnopqrstuvwxyz" 713 uppercaseDigits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" 714 ) 715 716 // decimalString returns a decimal representation of x. 717 // It calls x.string with the charset "0123456789". 718 func (x nat) decimalString() string { 719 return x.string(lowercaseDigits[0:10]) 720 } 721 722 // string converts x to a string using digits from a charset; a digit with 723 // value d is represented by charset[d]. The conversion base is determined 724 // by len(charset), which must be >= 2 and <= 256. 725 func (x nat) string(charset string) string { 726 b := Word(len(charset)) 727 728 // special cases 729 switch { 730 case b < 2 || MaxBase > 256: 731 panic("illegal base") 732 case len(x) == 0: 733 return string(charset[0]) 734 } 735 736 // allocate buffer for conversion 737 i := int(float64(x.bitLen())/math.Log2(float64(b))) + 1 // off by one at most 738 s := make([]byte, i) 739 740 // convert power of two and non power of two bases separately 741 if b == b&-b { 742 // shift is base-b digit size in bits 743 shift := uint(trailingZeroBits(b)) // shift > 0 because b >= 2 744 mask := Word(1)<<shift - 1 745 w := x[0] 746 nbits := uint(_W) // number of unprocessed bits in w 747 748 // convert less-significant words 749 for k := 1; k < len(x); k++ { 750 // convert full digits 751 for nbits >= shift { 752 i-- 753 s[i] = charset[w&mask] 754 w >>= shift 755 nbits -= shift 756 } 757 758 // convert any partial leading digit and advance to next word 759 if nbits == 0 { 760 // no partial digit remaining, just advance 761 w = x[k] 762 nbits = _W 763 } else { 764 // partial digit in current (k-1) and next (k) word 765 w |= x[k] << nbits 766 i-- 767 s[i] = charset[w&mask] 768 769 // advance 770 w = x[k] >> (shift - nbits) 771 nbits = _W - (shift - nbits) 772 } 773 } 774 775 // convert digits of most-significant word (omit leading zeros) 776 for nbits >= 0 && w != 0 { 777 i-- 778 s[i] = charset[w&mask] 779 w >>= shift 780 nbits -= shift 781 } 782 783 } else { 784 // determine "big base"; i.e., the largest possible value bb 785 // that is a power of base b and still fits into a Word 786 // (as in 10^19 for 19 decimal digits in a 64bit Word) 787 bb := b // big base is b**ndigits 788 ndigits := 1 // number of base b digits 789 for max := Word(_M / b); bb <= max; bb *= b { 790 ndigits++ // maximize ndigits where bb = b**ndigits, bb <= _M 791 } 792 793 // construct table of successive squares of bb*leafSize to use in subdivisions 794 // result (table != nil) <=> (len(x) > leafSize > 0) 795 table := divisors(len(x), b, ndigits, bb) 796 797 // preserve x, create local copy for use by convertWords 798 q := nat(nil).set(x) 799 800 // convert q to string s in base b 801 q.convertWords(s, charset, b, ndigits, bb, table) 802 803 // strip leading zeros 804 // (x != 0; thus s must contain at least one non-zero digit 805 // and the loop will terminate) 806 i = 0 807 for zero := charset[0]; s[i] == zero; { 808 i++ 809 } 810 } 811 812 return string(s[i:]) 813 } 814 815 // Convert words of q to base b digits in s. If q is large, it is recursively "split in half" 816 // by nat/nat division using tabulated divisors. Otherwise, it is converted iteratively using 817 // repeated nat/Word divison. 818 // 819 // The iterative method processes n Words by n divW() calls, each of which visits every Word in the 820 // incrementally shortened q for a total of n + (n-1) + (n-2) ... + 2 + 1, or n(n+1)/2 divW()'s. 821 // Recursive conversion divides q by its approximate square root, yielding two parts, each half 822 // the size of q. Using the iterative method on both halves means 2 * (n/2)(n/2 + 1)/2 divW()'s 823 // plus the expensive long div(). Asymptotically, the ratio is favorable at 1/2 the divW()'s, and 824 // is made better by splitting the subblocks recursively. Best is to split blocks until one more 825 // split would take longer (because of the nat/nat div()) than the twice as many divW()'s of the 826 // iterative approach. This threshold is represented by leafSize. Benchmarking of leafSize in the 827 // range 2..64 shows that values of 8 and 16 work well, with a 4x speedup at medium lengths and 828 // ~30x for 20000 digits. Use nat_test.go's BenchmarkLeafSize tests to optimize leafSize for 829 // specific hardware. 830 // 831 func (q nat) convertWords(s []byte, charset string, b Word, ndigits int, bb Word, table []divisor) { 832 // split larger blocks recursively 833 if table != nil { 834 // len(q) > leafSize > 0 835 var r nat 836 index := len(table) - 1 837 for len(q) > leafSize { 838 // find divisor close to sqrt(q) if possible, but in any case < q 839 maxLength := q.bitLen() // ~= log2 q, or at of least largest possible q of this bit length 840 minLength := maxLength >> 1 // ~= log2 sqrt(q) 841 for index > 0 && table[index-1].nbits > minLength { 842 index-- // desired 843 } 844 if table[index].nbits >= maxLength && table[index].bbb.cmp(q) >= 0 { 845 index-- 846 if index < 0 { 847 panic("internal inconsistency") 848 } 849 } 850 851 // split q into the two digit number (q'*bbb + r) to form independent subblocks 852 q, r = q.div(r, q, table[index].bbb) 853 854 // convert subblocks and collect results in s[:h] and s[h:] 855 h := len(s) - table[index].ndigits 856 r.convertWords(s[h:], charset, b, ndigits, bb, table[0:index]) 857 s = s[:h] // == q.convertWords(s, charset, b, ndigits, bb, table[0:index+1]) 858 } 859 } 860 861 // having split any large blocks now process the remaining (small) block iteratively 862 i := len(s) 863 var r Word 864 if b == 10 { 865 // hard-coding for 10 here speeds this up by 1.25x (allows for / and % by constants) 866 for len(q) > 0 { 867 // extract least significant, base bb "digit" 868 q, r = q.divW(q, bb) 869 for j := 0; j < ndigits && i > 0; j++ { 870 i-- 871 // avoid % computation since r%10 == r - int(r/10)*10; 872 // this appears to be faster for BenchmarkString10000Base10 873 // and smaller strings (but a bit slower for larger ones) 874 t := r / 10 875 s[i] = charset[r-t<<3-t-t] // TODO(gri) replace w/ t*10 once compiler produces better code 876 r = t 877 } 878 } 879 } else { 880 for len(q) > 0 { 881 // extract least significant, base bb "digit" 882 q, r = q.divW(q, bb) 883 for j := 0; j < ndigits && i > 0; j++ { 884 i-- 885 s[i] = charset[r%b] 886 r /= b 887 } 888 } 889 } 890 891 // prepend high-order zeroes 892 zero := charset[0] 893 for i > 0 { // while need more leading zeroes 894 i-- 895 s[i] = zero 896 } 897 } 898 899 // Split blocks greater than leafSize Words (or set to 0 to disable recursive conversion) 900 // Benchmark and configure leafSize using: go test -bench="Leaf" 901 // 8 and 16 effective on 3.0 GHz Xeon "Clovertown" CPU (128 byte cache lines) 902 // 8 and 16 effective on 2.66 GHz Core 2 Duo "Penryn" CPU 903 var leafSize int = 8 // number of Word-size binary values treat as a monolithic block 904 905 type divisor struct { 906 bbb nat // divisor 907 nbits int // bit length of divisor (discounting leading zeroes) ~= log2(bbb) 908 ndigits int // digit length of divisor in terms of output base digits 909 } 910 911 var cacheBase10 [64]divisor // cached divisors for base 10 912 var cacheLock sync.Mutex // protects cacheBase10 913 914 // expWW computes x**y 915 func (z nat) expWW(x, y Word) nat { 916 return z.expNN(nat(nil).setWord(x), nat(nil).setWord(y), nil) 917 } 918 919 // construct table of powers of bb*leafSize to use in subdivisions 920 func divisors(m int, b Word, ndigits int, bb Word) []divisor { 921 // only compute table when recursive conversion is enabled and x is large 922 if leafSize == 0 || m <= leafSize { 923 return nil 924 } 925 926 // determine k where (bb**leafSize)**(2**k) >= sqrt(x) 927 k := 1 928 for words := leafSize; words < m>>1 && k < len(cacheBase10); words <<= 1 { 929 k++ 930 } 931 932 // create new table of divisors or extend and reuse existing table as appropriate 933 var table []divisor 934 var cached bool 935 switch b { 936 case 10: 937 table = cacheBase10[0:k] // reuse old table for this conversion 938 cached = true 939 default: 940 table = make([]divisor, k) // new table for this conversion 941 } 942 943 // extend table 944 if table[k-1].ndigits == 0 { 945 if cached { 946 cacheLock.Lock() // begin critical section 947 } 948 949 // add new entries as needed 950 var larger nat 951 for i := 0; i < k; i++ { 952 if table[i].ndigits == 0 { 953 if i == 0 { 954 table[i].bbb = nat(nil).expWW(bb, Word(leafSize)) 955 table[i].ndigits = ndigits * leafSize 956 } else { 957 table[i].bbb = nat(nil).mul(table[i-1].bbb, table[i-1].bbb) 958 table[i].ndigits = 2 * table[i-1].ndigits 959 } 960 961 // optimization: exploit aggregated extra bits in macro blocks 962 larger = nat(nil).set(table[i].bbb) 963 for mulAddVWW(larger, larger, b, 0) == 0 { 964 table[i].bbb = table[i].bbb.set(larger) 965 table[i].ndigits++ 966 } 967 968 table[i].nbits = table[i].bbb.bitLen() 969 } 970 } 971 972 if cached { 973 cacheLock.Unlock() // end critical section 974 } 975 } 976 977 return table 978 } 979 980 const deBruijn32 = 0x077CB531 981 982 var deBruijn32Lookup = []byte{ 983 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 984 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9, 985 } 986 987 const deBruijn64 = 0x03f79d71b4ca8b09 988 989 var deBruijn64Lookup = []byte{ 990 0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4, 991 62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5, 992 63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11, 993 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6, 994 } 995 996 // trailingZeroBits returns the number of consecutive zero bits on the right 997 // side of the given Word. 998 // See Knuth, volume 4, section 7.3.1 999 func trailingZeroBits(x Word) int { 1000 // x & -x leaves only the right-most bit set in the word. Let k be the 1001 // index of that bit. Since only a single bit is set, the value is two 1002 // to the power of k. Multiplying by a power of two is equivalent to 1003 // left shifting, in this case by k bits. The de Bruijn constant is 1004 // such that all six bit, consecutive substrings are distinct. 1005 // Therefore, if we have a left shifted version of this constant we can 1006 // find by how many bits it was shifted by looking at which six bit 1007 // substring ended up at the top of the word. 1008 switch _W { 1009 case 32: 1010 return int(deBruijn32Lookup[((x&-x)*deBruijn32)>>27]) 1011 case 64: 1012 return int(deBruijn64Lookup[((x&-x)*(deBruijn64&_M))>>58]) 1013 default: 1014 panic("Unknown word size") 1015 } 1016 1017 return 0 1018 } 1019 1020 // z = x << s 1021 func (z nat) shl(x nat, s uint) nat { 1022 m := len(x) 1023 if m == 0 { 1024 return z.make(0) 1025 } 1026 // m > 0 1027 1028 n := m + int(s/_W) 1029 z = z.make(n + 1) 1030 z[n] = shlVU(z[n-m:n], x, s%_W) 1031 z[0 : n-m].clear() 1032 1033 return z.norm() 1034 } 1035 1036 // z = x >> s 1037 func (z nat) shr(x nat, s uint) nat { 1038 m := len(x) 1039 n := m - int(s/_W) 1040 if n <= 0 { 1041 return z.make(0) 1042 } 1043 // n > 0 1044 1045 z = z.make(n) 1046 shrVU(z, x[m-n:], s%_W) 1047 1048 return z.norm() 1049 } 1050 1051 func (z nat) setBit(x nat, i uint, b uint) nat { 1052 j := int(i / _W) 1053 m := Word(1) << (i % _W) 1054 n := len(x) 1055 switch b { 1056 case 0: 1057 z = z.make(n) 1058 copy(z, x) 1059 if j >= n { 1060 // no need to grow 1061 return z 1062 } 1063 z[j] &^= m 1064 return z.norm() 1065 case 1: 1066 if j >= n { 1067 z = z.make(j + 1) 1068 z[n:].clear() 1069 } else { 1070 z = z.make(n) 1071 } 1072 copy(z, x) 1073 z[j] |= m 1074 // no need to normalize 1075 return z 1076 } 1077 panic("set bit is not 0 or 1") 1078 } 1079 1080 func (z nat) bit(i uint) uint { 1081 j := int(i / _W) 1082 if j >= len(z) { 1083 return 0 1084 } 1085 return uint(z[j] >> (i % _W) & 1) 1086 } 1087 1088 func (z nat) and(x, y nat) nat { 1089 m := len(x) 1090 n := len(y) 1091 if m > n { 1092 m = n 1093 } 1094 // m <= n 1095 1096 z = z.make(m) 1097 for i := 0; i < m; i++ { 1098 z[i] = x[i] & y[i] 1099 } 1100 1101 return z.norm() 1102 } 1103 1104 func (z nat) andNot(x, y nat) nat { 1105 m := len(x) 1106 n := len(y) 1107 if n > m { 1108 n = m 1109 } 1110 // m >= n 1111 1112 z = z.make(m) 1113 for i := 0; i < n; i++ { 1114 z[i] = x[i] &^ y[i] 1115 } 1116 copy(z[n:m], x[n:m]) 1117 1118 return z.norm() 1119 } 1120 1121 func (z nat) or(x, y nat) nat { 1122 m := len(x) 1123 n := len(y) 1124 s := x 1125 if m < n { 1126 n, m = m, n 1127 s = y 1128 } 1129 // m >= n 1130 1131 z = z.make(m) 1132 for i := 0; i < n; i++ { 1133 z[i] = x[i] | y[i] 1134 } 1135 copy(z[n:m], s[n:m]) 1136 1137 return z.norm() 1138 } 1139 1140 func (z nat) xor(x, y nat) nat { 1141 m := len(x) 1142 n := len(y) 1143 s := x 1144 if m < n { 1145 n, m = m, n 1146 s = y 1147 } 1148 // m >= n 1149 1150 z = z.make(m) 1151 for i := 0; i < n; i++ { 1152 z[i] = x[i] ^ y[i] 1153 } 1154 copy(z[n:m], s[n:m]) 1155 1156 return z.norm() 1157 } 1158 1159 // greaterThan returns true iff (x1<<_W + x2) > (y1<<_W + y2) 1160 func greaterThan(x1, x2, y1, y2 Word) bool { 1161 return x1 > y1 || x1 == y1 && x2 > y2 1162 } 1163 1164 // modW returns x % d. 1165 func (x nat) modW(d Word) (r Word) { 1166 // TODO(agl): we don't actually need to store the q value. 1167 var q nat 1168 q = q.make(len(x)) 1169 return divWVW(q, 0, x, d) 1170 } 1171 1172 // powersOfTwoDecompose finds q and k with x = q * 1<<k and q is odd, or q and k are 0. 1173 func (x nat) powersOfTwoDecompose() (q nat, k int) { 1174 if len(x) == 0 { 1175 return x, 0 1176 } 1177 1178 // One of the words must be non-zero by definition, 1179 // so this loop will terminate with i < len(x), and 1180 // i is the number of 0 words. 1181 i := 0 1182 for x[i] == 0 { 1183 i++ 1184 } 1185 n := trailingZeroBits(x[i]) // x[i] != 0 1186 1187 q = make(nat, len(x)-i) 1188 shrVU(q, x[i:], uint(n)) 1189 1190 q = q.norm() 1191 k = i*_W + n 1192 return 1193 } 1194 1195 // random creates a random integer in [0..limit), using the space in z if 1196 // possible. n is the bit length of limit. 1197 func (z nat) random(rand *rand.Rand, limit nat, n int) nat { 1198 if alias(z, limit) { 1199 z = nil // z is an alias for limit - cannot reuse 1200 } 1201 z = z.make(len(limit)) 1202 1203 bitLengthOfMSW := uint(n % _W) 1204 if bitLengthOfMSW == 0 { 1205 bitLengthOfMSW = _W 1206 } 1207 mask := Word((1 << bitLengthOfMSW) - 1) 1208 1209 for { 1210 for i := range z { 1211 switch _W { 1212 case 32: 1213 z[i] = Word(rand.Uint32()) 1214 case 64: 1215 z[i] = Word(rand.Uint32()) | Word(rand.Uint32())<<32 1216 } 1217 } 1218 1219 z[len(limit)-1] &= mask 1220 1221 if z.cmp(limit) < 0 { 1222 break 1223 } 1224 } 1225 1226 return z.norm() 1227 } 1228 1229 // If m != nil, expNN calculates x**y mod m. Otherwise it calculates x**y. It 1230 // reuses the storage of z if possible. 1231 func (z nat) expNN(x, y, m nat) nat { 1232 if alias(z, x) || alias(z, y) { 1233 // We cannot allow in place modification of x or y. 1234 z = nil 1235 } 1236 1237 if len(y) == 0 { 1238 z = z.make(1) 1239 z[0] = 1 1240 return z 1241 } 1242 1243 if m != nil { 1244 // We likely end up being as long as the modulus. 1245 z = z.make(len(m)) 1246 } 1247 z = z.set(x) 1248 v := y[len(y)-1] 1249 // It's invalid for the most significant word to be zero, therefore we 1250 // will find a one bit. 1251 shift := leadingZeros(v) + 1 1252 v <<= shift 1253 var q nat 1254 1255 const mask = 1 << (_W - 1) 1256 1257 // We walk through the bits of the exponent one by one. Each time we 1258 // see a bit, we square, thus doubling the power. If the bit is a one, 1259 // we also multiply by x, thus adding one to the power. 1260 1261 w := _W - int(shift) 1262 for j := 0; j < w; j++ { 1263 z = z.mul(z, z) 1264 1265 if v&mask != 0 { 1266 z = z.mul(z, x) 1267 } 1268 1269 if m != nil { 1270 q, z = q.div(z, z, m) 1271 } 1272 1273 v <<= 1 1274 } 1275 1276 for i := len(y) - 2; i >= 0; i-- { 1277 v = y[i] 1278 1279 for j := 0; j < _W; j++ { 1280 z = z.mul(z, z) 1281 1282 if v&mask != 0 { 1283 z = z.mul(z, x) 1284 } 1285 1286 if m != nil { 1287 q, z = q.div(z, z, m) 1288 } 1289 1290 v <<= 1 1291 } 1292 } 1293 1294 return z.norm() 1295 } 1296 1297 // probablyPrime performs reps Miller-Rabin tests to check whether n is prime. 1298 // If it returns true, n is prime with probability 1 - 1/4^reps. 1299 // If it returns false, n is not prime. 1300 func (n nat) probablyPrime(reps int) bool { 1301 if len(n) == 0 { 1302 return false 1303 } 1304 1305 if len(n) == 1 { 1306 if n[0] < 2 { 1307 return false 1308 } 1309 1310 if n[0]%2 == 0 { 1311 return n[0] == 2 1312 } 1313 1314 // We have to exclude these cases because we reject all 1315 // multiples of these numbers below. 1316 switch n[0] { 1317 case 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53: 1318 return true 1319 } 1320 } 1321 1322 const primesProduct32 = 0xC0CFD797 // Π {p ∈ primes, 2 < p <= 29} 1323 const primesProduct64 = 0xE221F97C30E94E1D // Π {p ∈ primes, 2 < p <= 53} 1324 1325 var r Word 1326 switch _W { 1327 case 32: 1328 r = n.modW(primesProduct32) 1329 case 64: 1330 r = n.modW(primesProduct64 & _M) 1331 default: 1332 panic("Unknown word size") 1333 } 1334 1335 if r%3 == 0 || r%5 == 0 || r%7 == 0 || r%11 == 0 || 1336 r%13 == 0 || r%17 == 0 || r%19 == 0 || r%23 == 0 || r%29 == 0 { 1337 return false 1338 } 1339 1340 if _W == 64 && (r%31 == 0 || r%37 == 0 || r%41 == 0 || 1341 r%43 == 0 || r%47 == 0 || r%53 == 0) { 1342 return false 1343 } 1344 1345 nm1 := nat(nil).sub(n, natOne) 1346 // 1<<k * q = nm1; 1347 q, k := nm1.powersOfTwoDecompose() 1348 1349 nm3 := nat(nil).sub(nm1, natTwo) 1350 rand := rand.New(rand.NewSource(int64(n[0]))) 1351 1352 var x, y, quotient nat 1353 nm3Len := nm3.bitLen() 1354 1355 NextRandom: 1356 for i := 0; i < reps; i++ { 1357 x = x.random(rand, nm3, nm3Len) 1358 x = x.add(x, natTwo) 1359 y = y.expNN(x, q, n) 1360 if y.cmp(natOne) == 0 || y.cmp(nm1) == 0 { 1361 continue 1362 } 1363 for j := 1; j < k; j++ { 1364 y = y.mul(y, y) 1365 quotient, y = quotient.div(y, y, n) 1366 if y.cmp(nm1) == 0 { 1367 continue NextRandom 1368 } 1369 if y.cmp(natOne) == 0 { 1370 return false 1371 } 1372 } 1373 return false 1374 } 1375 1376 return true 1377 } 1378 1379 // bytes writes the value of z into buf using big-endian encoding. 1380 // len(buf) must be >= len(z)*_S. The value of z is encoded in the 1381 // slice buf[i:]. The number i of unused bytes at the beginning of 1382 // buf is returned as result. 1383 func (z nat) bytes(buf []byte) (i int) { 1384 i = len(buf) 1385 for _, d := range z { 1386 for j := 0; j < _S; j++ { 1387 i-- 1388 buf[i] = byte(d) 1389 d >>= 8 1390 } 1391 } 1392 1393 for i < len(buf) && buf[i] == 0 { 1394 i++ 1395 } 1396 1397 return 1398 } 1399 1400 // setBytes interprets buf as the bytes of a big-endian unsigned 1401 // integer, sets z to that value, and returns z. 1402 func (z nat) setBytes(buf []byte) nat { 1403 z = z.make((len(buf) + _S - 1) / _S) 1404 1405 k := 0 1406 s := uint(0) 1407 var d Word 1408 for i := len(buf); i > 0; i-- { 1409 d |= Word(buf[i-1]) << s 1410 if s += 8; s == _S*8 { 1411 z[k] = d 1412 k++ 1413 s = 0 1414 d = 0 1415 } 1416 } 1417 if k < len(z) { 1418 z[k] = d 1419 } 1420 1421 return z.norm() 1422 }