src/pkg/compress/flate/inflate.go - The Go Programming Language

Golang

Source file src/pkg/compress/flate/inflate.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Package flate implements the DEFLATE compressed data format, described in
     6	// RFC 1951.  The gzip and zlib packages implement access to DEFLATE-based file
     7	// formats.
     8	package flate
     9	
    10	import (
    11		"bufio"
    12		"io"
    13		"strconv"
    14	)
    15	
    16	const (
    17		maxCodeLen = 16    // max length of Huffman code
    18		maxHist    = 32768 // max history required
    19		maxLit     = 286
    20		maxDist    = 32
    21		numCodes   = 19 // number of codes in Huffman meta-code
    22	)
    23	
    24	// A CorruptInputError reports the presence of corrupt input at a given offset.
    25	type CorruptInputError int64
    26	
    27	func (e CorruptInputError) Error() string {
    28		return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)
    29	}
    30	
    31	// An InternalError reports an error in the flate code itself.
    32	type InternalError string
    33	
    34	func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
    35	
    36	// A ReadError reports an error encountered while reading input.
    37	type ReadError struct {
    38		Offset int64 // byte offset where error occurred
    39		Err    error // error returned by underlying Read
    40	}
    41	
    42	func (e *ReadError) Error() string {
    43		return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
    44	}
    45	
    46	// A WriteError reports an error encountered while writing output.
    47	type WriteError struct {
    48		Offset int64 // byte offset where error occurred
    49		Err    error // error returned by underlying Write
    50	}
    51	
    52	func (e *WriteError) Error() string {
    53		return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
    54	}
    55	
    56	// Huffman decoder is based on
    57	// J. Brian Connell, ``A Huffman-Shannon-Fano Code,''
    58	// Proceedings of the IEEE, 61(7) (July 1973), pp 1046-1047.
    59	type huffmanDecoder struct {
    60		// min, max code length
    61		min, max int
    62	
    63		// limit[i] = largest code word of length i
    64		// Given code v of length n,
    65		// need more bits if v > limit[n].
    66		limit [maxCodeLen + 1]int
    67	
    68		// base[i] = smallest code word of length i - seq number
    69		base [maxCodeLen + 1]int
    70	
    71		// codes[seq number] = output code.
    72		// Given code v of length n, value is
    73		// codes[v - base[n]].
    74		codes []int
    75	}
    76	
    77	// Initialize Huffman decoding tables from array of code lengths.
    78	func (h *huffmanDecoder) init(bits []int) bool {
    79		// Count number of codes of each length,
    80		// compute min and max length.
    81		var count [maxCodeLen + 1]int
    82		var min, max int
    83		for _, n := range bits {
    84			if n == 0 {
    85				continue
    86			}
    87			if min == 0 || n < min {
    88				min = n
    89			}
    90			if n > max {
    91				max = n
    92			}
    93			count[n]++
    94		}
    95		if max == 0 {
    96			return false
    97		}
    98	
    99		h.min = min
   100		h.max = max
   101	
   102		// For each code range, compute
   103		// nextcode (first code of that length),
   104		// limit (last code of that length), and
   105		// base (offset from first code to sequence number).
   106		code := 0
   107		seq := 0
   108		var nextcode [maxCodeLen]int
   109		for i := min; i <= max; i++ {
   110			n := count[i]
   111			nextcode[i] = code
   112			h.base[i] = code - seq
   113			code += n
   114			seq += n
   115			h.limit[i] = code - 1
   116			code <<= 1
   117		}
   118	
   119		// Make array mapping sequence numbers to codes.
   120		if len(h.codes) < len(bits) {
   121			h.codes = make([]int, len(bits))
   122		}
   123		for i, n := range bits {
   124			if n == 0 {
   125				continue
   126			}
   127			code := nextcode[n]
   128			nextcode[n]++
   129			seq := code - h.base[n]
   130			h.codes[seq] = i
   131		}
   132		return true
   133	}
   134	
   135	// Hard-coded Huffman tables for DEFLATE algorithm.
   136	// See RFC 1951, section 3.2.6.
   137	var fixedHuffmanDecoder = huffmanDecoder{
   138		7, 9,
   139		[maxCodeLen + 1]int{7: 23, 199, 511},
   140		[maxCodeLen + 1]int{7: 0, 24, 224},
   141		[]int{
   142			// length 7: 256-279
   143			256, 257, 258, 259, 260, 261, 262,
   144			263, 264, 265, 266, 267, 268, 269,
   145			270, 271, 272, 273, 274, 275, 276,
   146			277, 278, 279,
   147	
   148			// length 8: 0-143
   149			0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
   150			12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
   151			22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
   152			32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
   153			42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
   154			52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
   155			62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
   156			72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
   157			82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
   158			92, 93, 94, 95, 96, 97, 98, 99, 100,
   159			101, 102, 103, 104, 105, 106, 107, 108,
   160			109, 110, 111, 112, 113, 114, 115, 116,
   161			117, 118, 119, 120, 121, 122, 123, 124,
   162			125, 126, 127, 128, 129, 130, 131, 132,
   163			133, 134, 135, 136, 137, 138, 139, 140,
   164			141, 142, 143,
   165	
   166			// length 8: 280-287
   167			280, 281, 282, 283, 284, 285, 286, 287,
   168	
   169			// length 9: 144-255
   170			144, 145, 146, 147, 148, 149, 150, 151,
   171			152, 153, 154, 155, 156, 157, 158, 159,
   172			160, 161, 162, 163, 164, 165, 166, 167,
   173			168, 169, 170, 171, 172, 173, 174, 175,
   174			176, 177, 178, 179, 180, 181, 182, 183,
   175			184, 185, 186, 187, 188, 189, 190, 191,
   176			192, 193, 194, 195, 196, 197, 198, 199,
   177			200, 201, 202, 203, 204, 205, 206, 207,
   178			208, 209, 210, 211, 212, 213, 214, 215,
   179			216, 217, 218, 219, 220, 221, 222, 223,
   180			224, 225, 226, 227, 228, 229, 230, 231,
   181			232, 233, 234, 235, 236, 237, 238, 239,
   182			240, 241, 242, 243, 244, 245, 246, 247,
   183			248, 249, 250, 251, 252, 253, 254, 255,
   184		},
   185	}
   186	
   187	// The actual read interface needed by NewReader.
   188	// If the passed in io.Reader does not also have ReadByte,
   189	// the NewReader will introduce its own buffering.
   190	type Reader interface {
   191		io.Reader
   192		ReadByte() (c byte, err error)
   193	}
   194	
   195	// Decompress state.
   196	type decompressor struct {
   197		// Input source.
   198		r       Reader
   199		roffset int64
   200		woffset int64
   201	
   202		// Input bits, in top of b.
   203		b  uint32
   204		nb uint
   205	
   206		// Huffman decoders for literal/length, distance.
   207		h1, h2 huffmanDecoder
   208	
   209		// Length arrays used to define Huffman codes.
   210		bits     [maxLit + maxDist]int
   211		codebits [numCodes]int
   212	
   213		// Output history, buffer.
   214		hist  [maxHist]byte
   215		hp    int  // current output position in buffer
   216		hw    int  // have written hist[0:hw] already
   217		hfull bool // buffer has filled at least once
   218	
   219		// Temporary buffer (avoids repeated allocation).
   220		buf [4]byte
   221	
   222		// Next step in the decompression,
   223		// and decompression state.
   224		step     func(*decompressor)
   225		final    bool
   226		err      error
   227		toRead   []byte
   228		hl, hd   *huffmanDecoder
   229		copyLen  int
   230		copyDist int
   231	}
   232	
   233	func (f *decompressor) nextBlock() {
   234		if f.final {
   235			if f.hw != f.hp {
   236				f.flush((*decompressor).nextBlock)
   237				return
   238			}
   239			f.err = io.EOF
   240			return
   241		}
   242		for f.nb < 1+2 {
   243			if f.err = f.moreBits(); f.err != nil {
   244				return
   245			}
   246		}
   247		f.final = f.b&1 == 1
   248		f.b >>= 1
   249		typ := f.b & 3
   250		f.b >>= 2
   251		f.nb -= 1 + 2
   252		switch typ {
   253		case 0:
   254			f.dataBlock()
   255		case 1:
   256			// compressed, fixed Huffman tables
   257			f.hl = &fixedHuffmanDecoder
   258			f.hd = nil
   259			f.huffmanBlock()
   260		case 2:
   261			// compressed, dynamic Huffman tables
   262			if f.err = f.readHuffman(); f.err != nil {
   263				break
   264			}
   265			f.hl = &f.h1
   266			f.hd = &f.h2
   267			f.huffmanBlock()
   268		default:
   269			// 3 is reserved.
   270			f.err = CorruptInputError(f.roffset)
   271		}
   272	}
   273	
   274	func (f *decompressor) Read(b []byte) (int, error) {
   275		for {
   276			if len(f.toRead) > 0 {
   277				n := copy(b, f.toRead)
   278				f.toRead = f.toRead[n:]
   279				return n, nil
   280			}
   281			if f.err != nil {
   282				return 0, f.err
   283			}
   284			f.step(f)
   285		}
   286		panic("unreachable")
   287	}
   288	
   289	func (f *decompressor) Close() error {
   290		if f.err == io.EOF {
   291			return nil
   292		}
   293		return f.err
   294	}
   295	
   296	// RFC 1951 section 3.2.7.
   297	// Compression with dynamic Huffman codes
   298	
   299	var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
   300	
   301	func (f *decompressor) readHuffman() error {
   302		// HLIT[5], HDIST[5], HCLEN[4].
   303		for f.nb < 5+5+4 {
   304			if err := f.moreBits(); err != nil {
   305				return err
   306			}
   307		}
   308		nlit := int(f.b&0x1F) + 257
   309		f.b >>= 5
   310		ndist := int(f.b&0x1F) + 1
   311		f.b >>= 5
   312		nclen := int(f.b&0xF) + 4
   313		f.b >>= 4
   314		f.nb -= 5 + 5 + 4
   315	
   316		// (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
   317		for i := 0; i < nclen; i++ {
   318			for f.nb < 3 {
   319				if err := f.moreBits(); err != nil {
   320					return err
   321				}
   322			}
   323			f.codebits[codeOrder[i]] = int(f.b & 0x7)
   324			f.b >>= 3
   325			f.nb -= 3
   326		}
   327		for i := nclen; i < len(codeOrder); i++ {
   328			f.codebits[codeOrder[i]] = 0
   329		}
   330		if !f.h1.init(f.codebits[0:]) {
   331			return CorruptInputError(f.roffset)
   332		}
   333	
   334		// HLIT + 257 code lengths, HDIST + 1 code lengths,
   335		// using the code length Huffman code.
   336		for i, n := 0, nlit+ndist; i < n; {
   337			x, err := f.huffSym(&f.h1)
   338			if err != nil {
   339				return err
   340			}
   341			if x < 16 {
   342				// Actual length.
   343				f.bits[i] = x
   344				i++
   345				continue
   346			}
   347			// Repeat previous length or zero.
   348			var rep int
   349			var nb uint
   350			var b int
   351			switch x {
   352			default:
   353				return InternalError("unexpected length code")
   354			case 16:
   355				rep = 3
   356				nb = 2
   357				if i == 0 {
   358					return CorruptInputError(f.roffset)
   359				}
   360				b = f.bits[i-1]
   361			case 17:
   362				rep = 3
   363				nb = 3
   364				b = 0
   365			case 18:
   366				rep = 11
   367				nb = 7
   368				b = 0
   369			}
   370			for f.nb < nb {
   371				if err := f.moreBits(); err != nil {
   372					return err
   373				}
   374			}
   375			rep += int(f.b & uint32(1<<nb-1))
   376			f.b >>= nb
   377			f.nb -= nb
   378			if i+rep > n {
   379				return CorruptInputError(f.roffset)
   380			}
   381			for j := 0; j < rep; j++ {
   382				f.bits[i] = b
   383				i++
   384			}
   385		}
   386	
   387		if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
   388			return CorruptInputError(f.roffset)
   389		}
   390	
   391		return nil
   392	}
   393	
   394	// Decode a single Huffman block from f.
   395	// hl and hd are the Huffman states for the lit/length values
   396	// and the distance values, respectively.  If hd == nil, using the
   397	// fixed distance encoding associated with fixed Huffman blocks.
   398	func (f *decompressor) huffmanBlock() {
   399		for {
   400			v, err := f.huffSym(f.hl)
   401			if err != nil {
   402				f.err = err
   403				return
   404			}
   405			var n uint // number of bits extra
   406			var length int
   407			switch {
   408			case v < 256:
   409				f.hist[f.hp] = byte(v)
   410				f.hp++
   411				if f.hp == len(f.hist) {
   412					// After the flush, continue this loop.
   413					f.flush((*decompressor).huffmanBlock)
   414					return
   415				}
   416				continue
   417			case v == 256:
   418				// Done with huffman block; read next block.
   419				f.step = (*decompressor).nextBlock
   420				return
   421			// otherwise, reference to older data
   422			case v < 265:
   423				length = v - (257 - 3)
   424				n = 0
   425			case v < 269:
   426				length = v*2 - (265*2 - 11)
   427				n = 1
   428			case v < 273:
   429				length = v*4 - (269*4 - 19)
   430				n = 2
   431			case v < 277:
   432				length = v*8 - (273*8 - 35)
   433				n = 3
   434			case v < 281:
   435				length = v*16 - (277*16 - 67)
   436				n = 4
   437			case v < 285:
   438				length = v*32 - (281*32 - 131)
   439				n = 5
   440			default:
   441				length = 258
   442				n = 0
   443			}
   444			if n > 0 {
   445				for f.nb < n {
   446					if err = f.moreBits(); err != nil {
   447						f.err = err
   448						return
   449					}
   450				}
   451				length += int(f.b & uint32(1<<n-1))
   452				f.b >>= n
   453				f.nb -= n
   454			}
   455	
   456			var dist int
   457			if f.hd == nil {
   458				for f.nb < 5 {
   459					if err = f.moreBits(); err != nil {
   460						f.err = err
   461						return
   462					}
   463				}
   464				dist = int(reverseByte[(f.b&0x1F)<<3])
   465				f.b >>= 5
   466				f.nb -= 5
   467			} else {
   468				if dist, err = f.huffSym(f.hd); err != nil {
   469					f.err = err
   470					return
   471				}
   472			}
   473	
   474			switch {
   475			case dist < 4:
   476				dist++
   477			case dist >= 30:
   478				f.err = CorruptInputError(f.roffset)
   479				return
   480			default:
   481				nb := uint(dist-2) >> 1
   482				// have 1 bit in bottom of dist, need nb more.
   483				extra := (dist & 1) << nb
   484				for f.nb < nb {
   485					if err = f.moreBits(); err != nil {
   486						f.err = err
   487						return
   488					}
   489				}
   490				extra |= int(f.b & uint32(1<<nb-1))
   491				f.b >>= nb
   492				f.nb -= nb
   493				dist = 1<<(nb+1) + 1 + extra
   494			}
   495	
   496			// Copy history[-dist:-dist+length] into output.
   497			if dist > len(f.hist) {
   498				f.err = InternalError("bad history distance")
   499				return
   500			}
   501	
   502			// No check on length; encoding can be prescient.
   503			if !f.hfull && dist > f.hp {
   504				f.err = CorruptInputError(f.roffset)
   505				return
   506			}
   507	
   508			p := f.hp - dist
   509			if p < 0 {
   510				p += len(f.hist)
   511			}
   512			for i := 0; i < length; i++ {
   513				f.hist[f.hp] = f.hist[p]
   514				f.hp++
   515				p++
   516				if f.hp == len(f.hist) {
   517					// After flush continue copying out of history.
   518					f.copyLen = length - (i + 1)
   519					f.copyDist = dist
   520					f.flush((*decompressor).copyHuff)
   521					return
   522				}
   523				if p == len(f.hist) {
   524					p = 0
   525				}
   526			}
   527		}
   528		panic("unreached")
   529	}
   530	
   531	func (f *decompressor) copyHuff() {
   532		length := f.copyLen
   533		dist := f.copyDist
   534		p := f.hp - dist
   535		if p < 0 {
   536			p += len(f.hist)
   537		}
   538		for i := 0; i < length; i++ {
   539			f.hist[f.hp] = f.hist[p]
   540			f.hp++
   541			p++
   542			if f.hp == len(f.hist) {
   543				f.copyLen = length - (i + 1)
   544				f.flush((*decompressor).copyHuff)
   545				return
   546			}
   547			if p == len(f.hist) {
   548				p = 0
   549			}
   550		}
   551	
   552		// Continue processing Huffman block.
   553		f.huffmanBlock()
   554	}
   555	
   556	// Copy a single uncompressed data block from input to output.
   557	func (f *decompressor) dataBlock() {
   558		// Uncompressed.
   559		// Discard current half-byte.
   560		f.nb = 0
   561		f.b = 0
   562	
   563		// Length then ones-complement of length.
   564		nr, err := io.ReadFull(f.r, f.buf[0:4])
   565		f.roffset += int64(nr)
   566		if err != nil {
   567			f.err = &ReadError{f.roffset, err}
   568			return
   569		}
   570		n := int(f.buf[0]) | int(f.buf[1])<<8
   571		nn := int(f.buf[2]) | int(f.buf[3])<<8
   572		if uint16(nn) != uint16(^n) {
   573			f.err = CorruptInputError(f.roffset)
   574			return
   575		}
   576	
   577		if n == 0 {
   578			// 0-length block means sync
   579			f.flush((*decompressor).nextBlock)
   580			return
   581		}
   582	
   583		f.copyLen = n
   584		f.copyData()
   585	}
   586	
   587	func (f *decompressor) copyData() {
   588		// Read f.dataLen bytes into history,
   589		// pausing for reads as history fills.
   590		n := f.copyLen
   591		for n > 0 {
   592			m := len(f.hist) - f.hp
   593			if m > n {
   594				m = n
   595			}
   596			m, err := io.ReadFull(f.r, f.hist[f.hp:f.hp+m])
   597			f.roffset += int64(m)
   598			if err != nil {
   599				f.err = &ReadError{f.roffset, err}
   600				return
   601			}
   602			n -= m
   603			f.hp += m
   604			if f.hp == len(f.hist) {
   605				f.copyLen = n
   606				f.flush((*decompressor).copyData)
   607				return
   608			}
   609		}
   610		f.step = (*decompressor).nextBlock
   611	}
   612	
   613	func (f *decompressor) setDict(dict []byte) {
   614		if len(dict) > len(f.hist) {
   615			// Will only remember the tail.
   616			dict = dict[len(dict)-len(f.hist):]
   617		}
   618	
   619		f.hp = copy(f.hist[:], dict)
   620		if f.hp == len(f.hist) {
   621			f.hp = 0
   622			f.hfull = true
   623		}
   624		f.hw = f.hp
   625	}
   626	
   627	func (f *decompressor) moreBits() error {
   628		c, err := f.r.ReadByte()
   629		if err != nil {
   630			if err == io.EOF {
   631				err = io.ErrUnexpectedEOF
   632			}
   633			return err
   634		}
   635		f.roffset++
   636		f.b |= uint32(c) << f.nb
   637		f.nb += 8
   638		return nil
   639	}
   640	
   641	// Read the next Huffman-encoded symbol from f according to h.
   642	func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
   643		for n := uint(h.min); n <= uint(h.max); n++ {
   644			lim := h.limit[n]
   645			if lim == -1 {
   646				continue
   647			}
   648			for f.nb < n {
   649				if err := f.moreBits(); err != nil {
   650					return 0, err
   651				}
   652			}
   653			v := int(f.b & uint32(1<<n-1))
   654			v <<= 16 - n
   655			v = int(reverseByte[v>>8]) | int(reverseByte[v&0xFF])<<8 // reverse bits
   656			if v <= lim {
   657				f.b >>= n
   658				f.nb -= n
   659				return h.codes[v-h.base[n]], nil
   660			}
   661		}
   662		return 0, CorruptInputError(f.roffset)
   663	}
   664	
   665	// Flush any buffered output to the underlying writer.
   666	func (f *decompressor) flush(step func(*decompressor)) {
   667		f.toRead = f.hist[f.hw:f.hp]
   668		f.woffset += int64(f.hp - f.hw)
   669		f.hw = f.hp
   670		if f.hp == len(f.hist) {
   671			f.hp = 0
   672			f.hw = 0
   673			f.hfull = true
   674		}
   675		f.step = step
   676	}
   677	
   678	func makeReader(r io.Reader) Reader {
   679		if rr, ok := r.(Reader); ok {
   680			return rr
   681		}
   682		return bufio.NewReader(r)
   683	}
   684	
   685	// NewReader returns a new ReadCloser that can be used
   686	// to read the uncompressed version of r.  It is the caller's
   687	// responsibility to call Close on the ReadCloser when
   688	// finished reading.
   689	func NewReader(r io.Reader) io.ReadCloser {
   690		var f decompressor
   691		f.r = makeReader(r)
   692		f.step = (*decompressor).nextBlock
   693		return &f
   694	}
   695	
   696	// NewReaderDict is like NewReader but initializes the reader
   697	// with a preset dictionary.  The returned Reader behaves as if
   698	// the uncompressed data stream started with the given dictionary,
   699	// which has already been read.  NewReaderDict is typically used
   700	// to read data compressed by NewWriterDict.
   701	func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
   702		var f decompressor
   703		f.setDict(dict)
   704		f.r = makeReader(r)
   705		f.step = (*decompressor).nextBlock
   706		return &f
   707	}