Source file src/pkg/compress/flate/inflate.go
1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package flate implements the DEFLATE compressed data format, described in 6 // RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file 7 // formats. 8 package flate 9 10 import ( 11 "bufio" 12 "io" 13 "strconv" 14 ) 15 16 const ( 17 maxCodeLen = 16 // max length of Huffman code 18 maxHist = 32768 // max history required 19 maxLit = 286 20 maxDist = 32 21 numCodes = 19 // number of codes in Huffman meta-code 22 ) 23 24 // A CorruptInputError reports the presence of corrupt input at a given offset. 25 type CorruptInputError int64 26 27 func (e CorruptInputError) Error() string { 28 return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10) 29 } 30 31 // An InternalError reports an error in the flate code itself. 32 type InternalError string 33 34 func (e InternalError) Error() string { return "flate: internal error: " + string(e) } 35 36 // A ReadError reports an error encountered while reading input. 37 type ReadError struct { 38 Offset int64 // byte offset where error occurred 39 Err error // error returned by underlying Read 40 } 41 42 func (e *ReadError) Error() string { 43 return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() 44 } 45 46 // A WriteError reports an error encountered while writing output. 47 type WriteError struct { 48 Offset int64 // byte offset where error occurred 49 Err error // error returned by underlying Write 50 } 51 52 func (e *WriteError) Error() string { 53 return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() 54 } 55 56 // Huffman decoder is based on 57 // J. Brian Connell, ``A Huffman-Shannon-Fano Code,'' 58 // Proceedings of the IEEE, 61(7) (July 1973), pp 1046-1047. 59 type huffmanDecoder struct { 60 // min, max code length 61 min, max int 62 63 // limit[i] = largest code word of length i 64 // Given code v of length n, 65 // need more bits if v > limit[n]. 66 limit [maxCodeLen + 1]int 67 68 // base[i] = smallest code word of length i - seq number 69 base [maxCodeLen + 1]int 70 71 // codes[seq number] = output code. 72 // Given code v of length n, value is 73 // codes[v - base[n]]. 74 codes []int 75 } 76 77 // Initialize Huffman decoding tables from array of code lengths. 78 func (h *huffmanDecoder) init(bits []int) bool { 79 // Count number of codes of each length, 80 // compute min and max length. 81 var count [maxCodeLen + 1]int 82 var min, max int 83 for _, n := range bits { 84 if n == 0 { 85 continue 86 } 87 if min == 0 || n < min { 88 min = n 89 } 90 if n > max { 91 max = n 92 } 93 count[n]++ 94 } 95 if max == 0 { 96 return false 97 } 98 99 h.min = min 100 h.max = max 101 102 // For each code range, compute 103 // nextcode (first code of that length), 104 // limit (last code of that length), and 105 // base (offset from first code to sequence number). 106 code := 0 107 seq := 0 108 var nextcode [maxCodeLen]int 109 for i := min; i <= max; i++ { 110 n := count[i] 111 nextcode[i] = code 112 h.base[i] = code - seq 113 code += n 114 seq += n 115 h.limit[i] = code - 1 116 code <<= 1 117 } 118 119 // Make array mapping sequence numbers to codes. 120 if len(h.codes) < len(bits) { 121 h.codes = make([]int, len(bits)) 122 } 123 for i, n := range bits { 124 if n == 0 { 125 continue 126 } 127 code := nextcode[n] 128 nextcode[n]++ 129 seq := code - h.base[n] 130 h.codes[seq] = i 131 } 132 return true 133 } 134 135 // Hard-coded Huffman tables for DEFLATE algorithm. 136 // See RFC 1951, section 3.2.6. 137 var fixedHuffmanDecoder = huffmanDecoder{ 138 7, 9, 139 [maxCodeLen + 1]int{7: 23, 199, 511}, 140 [maxCodeLen + 1]int{7: 0, 24, 224}, 141 []int{ 142 // length 7: 256-279 143 256, 257, 258, 259, 260, 261, 262, 144 263, 264, 265, 266, 267, 268, 269, 145 270, 271, 272, 273, 274, 275, 276, 146 277, 278, 279, 147 148 // length 8: 0-143 149 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 150 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 151 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 152 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 153 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 154 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 155 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 156 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 157 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 158 92, 93, 94, 95, 96, 97, 98, 99, 100, 159 101, 102, 103, 104, 105, 106, 107, 108, 160 109, 110, 111, 112, 113, 114, 115, 116, 161 117, 118, 119, 120, 121, 122, 123, 124, 162 125, 126, 127, 128, 129, 130, 131, 132, 163 133, 134, 135, 136, 137, 138, 139, 140, 164 141, 142, 143, 165 166 // length 8: 280-287 167 280, 281, 282, 283, 284, 285, 286, 287, 168 169 // length 9: 144-255 170 144, 145, 146, 147, 148, 149, 150, 151, 171 152, 153, 154, 155, 156, 157, 158, 159, 172 160, 161, 162, 163, 164, 165, 166, 167, 173 168, 169, 170, 171, 172, 173, 174, 175, 174 176, 177, 178, 179, 180, 181, 182, 183, 175 184, 185, 186, 187, 188, 189, 190, 191, 176 192, 193, 194, 195, 196, 197, 198, 199, 177 200, 201, 202, 203, 204, 205, 206, 207, 178 208, 209, 210, 211, 212, 213, 214, 215, 179 216, 217, 218, 219, 220, 221, 222, 223, 180 224, 225, 226, 227, 228, 229, 230, 231, 181 232, 233, 234, 235, 236, 237, 238, 239, 182 240, 241, 242, 243, 244, 245, 246, 247, 183 248, 249, 250, 251, 252, 253, 254, 255, 184 }, 185 } 186 187 // The actual read interface needed by NewReader. 188 // If the passed in io.Reader does not also have ReadByte, 189 // the NewReader will introduce its own buffering. 190 type Reader interface { 191 io.Reader 192 ReadByte() (c byte, err error) 193 } 194 195 // Decompress state. 196 type decompressor struct { 197 // Input source. 198 r Reader 199 roffset int64 200 woffset int64 201 202 // Input bits, in top of b. 203 b uint32 204 nb uint 205 206 // Huffman decoders for literal/length, distance. 207 h1, h2 huffmanDecoder 208 209 // Length arrays used to define Huffman codes. 210 bits [maxLit + maxDist]int 211 codebits [numCodes]int 212 213 // Output history, buffer. 214 hist [maxHist]byte 215 hp int // current output position in buffer 216 hw int // have written hist[0:hw] already 217 hfull bool // buffer has filled at least once 218 219 // Temporary buffer (avoids repeated allocation). 220 buf [4]byte 221 222 // Next step in the decompression, 223 // and decompression state. 224 step func(*decompressor) 225 final bool 226 err error 227 toRead []byte 228 hl, hd *huffmanDecoder 229 copyLen int 230 copyDist int 231 } 232 233 func (f *decompressor) nextBlock() { 234 if f.final { 235 if f.hw != f.hp { 236 f.flush((*decompressor).nextBlock) 237 return 238 } 239 f.err = io.EOF 240 return 241 } 242 for f.nb < 1+2 { 243 if f.err = f.moreBits(); f.err != nil { 244 return 245 } 246 } 247 f.final = f.b&1 == 1 248 f.b >>= 1 249 typ := f.b & 3 250 f.b >>= 2 251 f.nb -= 1 + 2 252 switch typ { 253 case 0: 254 f.dataBlock() 255 case 1: 256 // compressed, fixed Huffman tables 257 f.hl = &fixedHuffmanDecoder 258 f.hd = nil 259 f.huffmanBlock() 260 case 2: 261 // compressed, dynamic Huffman tables 262 if f.err = f.readHuffman(); f.err != nil { 263 break 264 } 265 f.hl = &f.h1 266 f.hd = &f.h2 267 f.huffmanBlock() 268 default: 269 // 3 is reserved. 270 f.err = CorruptInputError(f.roffset) 271 } 272 } 273 274 func (f *decompressor) Read(b []byte) (int, error) { 275 for { 276 if len(f.toRead) > 0 { 277 n := copy(b, f.toRead) 278 f.toRead = f.toRead[n:] 279 return n, nil 280 } 281 if f.err != nil { 282 return 0, f.err 283 } 284 f.step(f) 285 } 286 panic("unreachable") 287 } 288 289 func (f *decompressor) Close() error { 290 if f.err == io.EOF { 291 return nil 292 } 293 return f.err 294 } 295 296 // RFC 1951 section 3.2.7. 297 // Compression with dynamic Huffman codes 298 299 var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} 300 301 func (f *decompressor) readHuffman() error { 302 // HLIT[5], HDIST[5], HCLEN[4]. 303 for f.nb < 5+5+4 { 304 if err := f.moreBits(); err != nil { 305 return err 306 } 307 } 308 nlit := int(f.b&0x1F) + 257 309 f.b >>= 5 310 ndist := int(f.b&0x1F) + 1 311 f.b >>= 5 312 nclen := int(f.b&0xF) + 4 313 f.b >>= 4 314 f.nb -= 5 + 5 + 4 315 316 // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order. 317 for i := 0; i < nclen; i++ { 318 for f.nb < 3 { 319 if err := f.moreBits(); err != nil { 320 return err 321 } 322 } 323 f.codebits[codeOrder[i]] = int(f.b & 0x7) 324 f.b >>= 3 325 f.nb -= 3 326 } 327 for i := nclen; i < len(codeOrder); i++ { 328 f.codebits[codeOrder[i]] = 0 329 } 330 if !f.h1.init(f.codebits[0:]) { 331 return CorruptInputError(f.roffset) 332 } 333 334 // HLIT + 257 code lengths, HDIST + 1 code lengths, 335 // using the code length Huffman code. 336 for i, n := 0, nlit+ndist; i < n; { 337 x, err := f.huffSym(&f.h1) 338 if err != nil { 339 return err 340 } 341 if x < 16 { 342 // Actual length. 343 f.bits[i] = x 344 i++ 345 continue 346 } 347 // Repeat previous length or zero. 348 var rep int 349 var nb uint 350 var b int 351 switch x { 352 default: 353 return InternalError("unexpected length code") 354 case 16: 355 rep = 3 356 nb = 2 357 if i == 0 { 358 return CorruptInputError(f.roffset) 359 } 360 b = f.bits[i-1] 361 case 17: 362 rep = 3 363 nb = 3 364 b = 0 365 case 18: 366 rep = 11 367 nb = 7 368 b = 0 369 } 370 for f.nb < nb { 371 if err := f.moreBits(); err != nil { 372 return err 373 } 374 } 375 rep += int(f.b & uint32(1<<nb-1)) 376 f.b >>= nb 377 f.nb -= nb 378 if i+rep > n { 379 return CorruptInputError(f.roffset) 380 } 381 for j := 0; j < rep; j++ { 382 f.bits[i] = b 383 i++ 384 } 385 } 386 387 if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) { 388 return CorruptInputError(f.roffset) 389 } 390 391 return nil 392 } 393 394 // Decode a single Huffman block from f. 395 // hl and hd are the Huffman states for the lit/length values 396 // and the distance values, respectively. If hd == nil, using the 397 // fixed distance encoding associated with fixed Huffman blocks. 398 func (f *decompressor) huffmanBlock() { 399 for { 400 v, err := f.huffSym(f.hl) 401 if err != nil { 402 f.err = err 403 return 404 } 405 var n uint // number of bits extra 406 var length int 407 switch { 408 case v < 256: 409 f.hist[f.hp] = byte(v) 410 f.hp++ 411 if f.hp == len(f.hist) { 412 // After the flush, continue this loop. 413 f.flush((*decompressor).huffmanBlock) 414 return 415 } 416 continue 417 case v == 256: 418 // Done with huffman block; read next block. 419 f.step = (*decompressor).nextBlock 420 return 421 // otherwise, reference to older data 422 case v < 265: 423 length = v - (257 - 3) 424 n = 0 425 case v < 269: 426 length = v*2 - (265*2 - 11) 427 n = 1 428 case v < 273: 429 length = v*4 - (269*4 - 19) 430 n = 2 431 case v < 277: 432 length = v*8 - (273*8 - 35) 433 n = 3 434 case v < 281: 435 length = v*16 - (277*16 - 67) 436 n = 4 437 case v < 285: 438 length = v*32 - (281*32 - 131) 439 n = 5 440 default: 441 length = 258 442 n = 0 443 } 444 if n > 0 { 445 for f.nb < n { 446 if err = f.moreBits(); err != nil { 447 f.err = err 448 return 449 } 450 } 451 length += int(f.b & uint32(1<<n-1)) 452 f.b >>= n 453 f.nb -= n 454 } 455 456 var dist int 457 if f.hd == nil { 458 for f.nb < 5 { 459 if err = f.moreBits(); err != nil { 460 f.err = err 461 return 462 } 463 } 464 dist = int(reverseByte[(f.b&0x1F)<<3]) 465 f.b >>= 5 466 f.nb -= 5 467 } else { 468 if dist, err = f.huffSym(f.hd); err != nil { 469 f.err = err 470 return 471 } 472 } 473 474 switch { 475 case dist < 4: 476 dist++ 477 case dist >= 30: 478 f.err = CorruptInputError(f.roffset) 479 return 480 default: 481 nb := uint(dist-2) >> 1 482 // have 1 bit in bottom of dist, need nb more. 483 extra := (dist & 1) << nb 484 for f.nb < nb { 485 if err = f.moreBits(); err != nil { 486 f.err = err 487 return 488 } 489 } 490 extra |= int(f.b & uint32(1<<nb-1)) 491 f.b >>= nb 492 f.nb -= nb 493 dist = 1<<(nb+1) + 1 + extra 494 } 495 496 // Copy history[-dist:-dist+length] into output. 497 if dist > len(f.hist) { 498 f.err = InternalError("bad history distance") 499 return 500 } 501 502 // No check on length; encoding can be prescient. 503 if !f.hfull && dist > f.hp { 504 f.err = CorruptInputError(f.roffset) 505 return 506 } 507 508 p := f.hp - dist 509 if p < 0 { 510 p += len(f.hist) 511 } 512 for i := 0; i < length; i++ { 513 f.hist[f.hp] = f.hist[p] 514 f.hp++ 515 p++ 516 if f.hp == len(f.hist) { 517 // After flush continue copying out of history. 518 f.copyLen = length - (i + 1) 519 f.copyDist = dist 520 f.flush((*decompressor).copyHuff) 521 return 522 } 523 if p == len(f.hist) { 524 p = 0 525 } 526 } 527 } 528 panic("unreached") 529 } 530 531 func (f *decompressor) copyHuff() { 532 length := f.copyLen 533 dist := f.copyDist 534 p := f.hp - dist 535 if p < 0 { 536 p += len(f.hist) 537 } 538 for i := 0; i < length; i++ { 539 f.hist[f.hp] = f.hist[p] 540 f.hp++ 541 p++ 542 if f.hp == len(f.hist) { 543 f.copyLen = length - (i + 1) 544 f.flush((*decompressor).copyHuff) 545 return 546 } 547 if p == len(f.hist) { 548 p = 0 549 } 550 } 551 552 // Continue processing Huffman block. 553 f.huffmanBlock() 554 } 555 556 // Copy a single uncompressed data block from input to output. 557 func (f *decompressor) dataBlock() { 558 // Uncompressed. 559 // Discard current half-byte. 560 f.nb = 0 561 f.b = 0 562 563 // Length then ones-complement of length. 564 nr, err := io.ReadFull(f.r, f.buf[0:4]) 565 f.roffset += int64(nr) 566 if err != nil { 567 f.err = &ReadError{f.roffset, err} 568 return 569 } 570 n := int(f.buf[0]) | int(f.buf[1])<<8 571 nn := int(f.buf[2]) | int(f.buf[3])<<8 572 if uint16(nn) != uint16(^n) { 573 f.err = CorruptInputError(f.roffset) 574 return 575 } 576 577 if n == 0 { 578 // 0-length block means sync 579 f.flush((*decompressor).nextBlock) 580 return 581 } 582 583 f.copyLen = n 584 f.copyData() 585 } 586 587 func (f *decompressor) copyData() { 588 // Read f.dataLen bytes into history, 589 // pausing for reads as history fills. 590 n := f.copyLen 591 for n > 0 { 592 m := len(f.hist) - f.hp 593 if m > n { 594 m = n 595 } 596 m, err := io.ReadFull(f.r, f.hist[f.hp:f.hp+m]) 597 f.roffset += int64(m) 598 if err != nil { 599 f.err = &ReadError{f.roffset, err} 600 return 601 } 602 n -= m 603 f.hp += m 604 if f.hp == len(f.hist) { 605 f.copyLen = n 606 f.flush((*decompressor).copyData) 607 return 608 } 609 } 610 f.step = (*decompressor).nextBlock 611 } 612 613 func (f *decompressor) setDict(dict []byte) { 614 if len(dict) > len(f.hist) { 615 // Will only remember the tail. 616 dict = dict[len(dict)-len(f.hist):] 617 } 618 619 f.hp = copy(f.hist[:], dict) 620 if f.hp == len(f.hist) { 621 f.hp = 0 622 f.hfull = true 623 } 624 f.hw = f.hp 625 } 626 627 func (f *decompressor) moreBits() error { 628 c, err := f.r.ReadByte() 629 if err != nil { 630 if err == io.EOF { 631 err = io.ErrUnexpectedEOF 632 } 633 return err 634 } 635 f.roffset++ 636 f.b |= uint32(c) << f.nb 637 f.nb += 8 638 return nil 639 } 640 641 // Read the next Huffman-encoded symbol from f according to h. 642 func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) { 643 for n := uint(h.min); n <= uint(h.max); n++ { 644 lim := h.limit[n] 645 if lim == -1 { 646 continue 647 } 648 for f.nb < n { 649 if err := f.moreBits(); err != nil { 650 return 0, err 651 } 652 } 653 v := int(f.b & uint32(1<<n-1)) 654 v <<= 16 - n 655 v = int(reverseByte[v>>8]) | int(reverseByte[v&0xFF])<<8 // reverse bits 656 if v <= lim { 657 f.b >>= n 658 f.nb -= n 659 return h.codes[v-h.base[n]], nil 660 } 661 } 662 return 0, CorruptInputError(f.roffset) 663 } 664 665 // Flush any buffered output to the underlying writer. 666 func (f *decompressor) flush(step func(*decompressor)) { 667 f.toRead = f.hist[f.hw:f.hp] 668 f.woffset += int64(f.hp - f.hw) 669 f.hw = f.hp 670 if f.hp == len(f.hist) { 671 f.hp = 0 672 f.hw = 0 673 f.hfull = true 674 } 675 f.step = step 676 } 677 678 func makeReader(r io.Reader) Reader { 679 if rr, ok := r.(Reader); ok { 680 return rr 681 } 682 return bufio.NewReader(r) 683 } 684 685 // NewReader returns a new ReadCloser that can be used 686 // to read the uncompressed version of r. It is the caller's 687 // responsibility to call Close on the ReadCloser when 688 // finished reading. 689 func NewReader(r io.Reader) io.ReadCloser { 690 var f decompressor 691 f.r = makeReader(r) 692 f.step = (*decompressor).nextBlock 693 return &f 694 } 695 696 // NewReaderDict is like NewReader but initializes the reader 697 // with a preset dictionary. The returned Reader behaves as if 698 // the uncompressed data stream started with the given dictionary, 699 // which has already been read. NewReaderDict is typically used 700 // to read data compressed by NewWriterDict. 701 func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { 702 var f decompressor 703 f.setDict(dict) 704 f.r = makeReader(r) 705 f.step = (*decompressor).nextBlock 706 return &f 707 }