src/pkg/compress/lzw/reader.go - The Go Programming Language

Golang

Source file src/pkg/compress/lzw/reader.go

     1	// Copyright 2011 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Package lzw implements the Lempel-Ziv-Welch compressed data format,
     6	// described in T. A. Welch, ``A Technique for High-Performance Data
     7	// Compression'', Computer, 17(6) (June 1984), pp 8-19.
     8	//
     9	// In particular, it implements LZW as used by the GIF, TIFF and PDF file
    10	// formats, which means variable-width codes up to 12 bits and the first
    11	// two non-literal codes are a clear code and an EOF code.
    12	package lzw
    13	
    14	// TODO(nigeltao): check that TIFF and PDF use LZW in the same way as GIF,
    15	// modulo LSB/MSB packing order.
    16	
    17	import (
    18		"bufio"
    19		"errors"
    20		"fmt"
    21		"io"
    22	)
    23	
    24	// Order specifies the bit ordering in an LZW data stream.
    25	type Order int
    26	
    27	const (
    28		// LSB means Least Significant Bits first, as used in the GIF file format.
    29		LSB Order = iota
    30		// MSB means Most Significant Bits first, as used in the TIFF and PDF
    31		// file formats.
    32		MSB
    33	)
    34	
    35	const (
    36		maxWidth           = 12
    37		decoderInvalidCode = 0xffff
    38		flushBuffer        = 1 << maxWidth
    39	)
    40	
    41	// decoder is the state from which the readXxx method converts a byte
    42	// stream into a code stream.
    43	type decoder struct {
    44		r        io.ByteReader
    45		bits     uint32
    46		nBits    uint
    47		width    uint
    48		read     func(*decoder) (uint16, error) // readLSB or readMSB
    49		litWidth int                            // width in bits of literal codes
    50		err      error
    51	
    52		// The first 1<<litWidth codes are literal codes.
    53		// The next two codes mean clear and EOF.
    54		// Other valid codes are in the range [lo, hi] where lo := clear + 2,
    55		// with the upper bound incrementing on each code seen.
    56		// overflow is the code at which hi overflows the code width.
    57		// last is the most recently seen code, or decoderInvalidCode.
    58		clear, eof, hi, overflow, last uint16
    59	
    60		// Each code c in [lo, hi] expands to two or more bytes. For c != hi:
    61		//   suffix[c] is the last of these bytes.
    62		//   prefix[c] is the code for all but the last byte.
    63		//   This code can either be a literal code or another code in [lo, c).
    64		// The c == hi case is a special case.
    65		suffix [1 << maxWidth]uint8
    66		prefix [1 << maxWidth]uint16
    67	
    68		// output is the temporary output buffer.
    69		// Literal codes are accumulated from the start of the buffer.
    70		// Non-literal codes decode to a sequence of suffixes that are first
    71		// written right-to-left from the end of the buffer before being copied
    72		// to the start of the buffer.
    73		// It is flushed when it contains >= 1<<maxWidth bytes,
    74		// so that there is always room to decode an entire code.
    75		output [2 * 1 << maxWidth]byte
    76		o      int    // write index into output
    77		toRead []byte // bytes to return from Read
    78	}
    79	
    80	// readLSB returns the next code for "Least Significant Bits first" data.
    81	func (d *decoder) readLSB() (uint16, error) {
    82		for d.nBits < d.width {
    83			x, err := d.r.ReadByte()
    84			if err != nil {
    85				return 0, err
    86			}
    87			d.bits |= uint32(x) << d.nBits
    88			d.nBits += 8
    89		}
    90		code := uint16(d.bits & (1<<d.width - 1))
    91		d.bits >>= d.width
    92		d.nBits -= d.width
    93		return code, nil
    94	}
    95	
    96	// readMSB returns the next code for "Most Significant Bits first" data.
    97	func (d *decoder) readMSB() (uint16, error) {
    98		for d.nBits < d.width {
    99			x, err := d.r.ReadByte()
   100			if err != nil {
   101				return 0, err
   102			}
   103			d.bits |= uint32(x) << (24 - d.nBits)
   104			d.nBits += 8
   105		}
   106		code := uint16(d.bits >> (32 - d.width))
   107		d.bits <<= d.width
   108		d.nBits -= d.width
   109		return code, nil
   110	}
   111	
   112	func (d *decoder) Read(b []byte) (int, error) {
   113		for {
   114			if len(d.toRead) > 0 {
   115				n := copy(b, d.toRead)
   116				d.toRead = d.toRead[n:]
   117				return n, nil
   118			}
   119			if d.err != nil {
   120				return 0, d.err
   121			}
   122			d.decode()
   123		}
   124		panic("unreachable")
   125	}
   126	
   127	// decode decompresses bytes from r and leaves them in d.toRead.
   128	// read specifies how to decode bytes into codes.
   129	// litWidth is the width in bits of literal codes.
   130	func (d *decoder) decode() {
   131		// Loop over the code stream, converting codes into decompressed bytes.
   132		for {
   133			code, err := d.read(d)
   134			if err != nil {
   135				if err == io.EOF {
   136					err = io.ErrUnexpectedEOF
   137				}
   138				d.err = err
   139				return
   140			}
   141			switch {
   142			case code < d.clear:
   143				// We have a literal code.
   144				d.output[d.o] = uint8(code)
   145				d.o++
   146				if d.last != decoderInvalidCode {
   147					// Save what the hi code expands to.
   148					d.suffix[d.hi] = uint8(code)
   149					d.prefix[d.hi] = d.last
   150				}
   151			case code == d.clear:
   152				d.width = 1 + uint(d.litWidth)
   153				d.hi = d.eof
   154				d.overflow = 1 << d.width
   155				d.last = decoderInvalidCode
   156				continue
   157			case code == d.eof:
   158				d.flush()
   159				d.err = io.EOF
   160				return
   161			case code <= d.hi:
   162				c, i := code, len(d.output)-1
   163				if code == d.hi {
   164					// code == hi is a special case which expands to the last expansion
   165					// followed by the head of the last expansion. To find the head, we walk
   166					// the prefix chain until we find a literal code.
   167					c = d.last
   168					for c >= d.clear {
   169						c = d.prefix[c]
   170					}
   171					d.output[i] = uint8(c)
   172					i--
   173					c = d.last
   174				}
   175				// Copy the suffix chain into output and then write that to w.
   176				for c >= d.clear {
   177					d.output[i] = d.suffix[c]
   178					i--
   179					c = d.prefix[c]
   180				}
   181				d.output[i] = uint8(c)
   182				d.o += copy(d.output[d.o:], d.output[i:])
   183				if d.last != decoderInvalidCode {
   184					// Save what the hi code expands to.
   185					d.suffix[d.hi] = uint8(c)
   186					d.prefix[d.hi] = d.last
   187				}
   188			default:
   189				d.err = errors.New("lzw: invalid code")
   190				return
   191			}
   192			d.last, d.hi = code, d.hi+1
   193			if d.hi >= d.overflow {
   194				if d.width == maxWidth {
   195					d.last = decoderInvalidCode
   196				} else {
   197					d.width++
   198					d.overflow <<= 1
   199				}
   200			}
   201			if d.o >= flushBuffer {
   202				d.flush()
   203				return
   204			}
   205		}
   206		panic("unreachable")
   207	}
   208	
   209	func (d *decoder) flush() {
   210		d.toRead = d.output[:d.o]
   211		d.o = 0
   212	}
   213	
   214	var errClosed = errors.New("compress/lzw: reader/writer is closed")
   215	
   216	func (d *decoder) Close() error {
   217		d.err = errClosed // in case any Reads come along
   218		return nil
   219	}
   220	
   221	// NewReader creates a new io.ReadCloser that satisfies reads by decompressing
   222	// the data read from r.
   223	// It is the caller's responsibility to call Close on the ReadCloser when
   224	// finished reading.
   225	// The number of bits to use for literal codes, litWidth, must be in the
   226	// range [2,8] and is typically 8.
   227	func NewReader(r io.Reader, order Order, litWidth int) io.ReadCloser {
   228		d := new(decoder)
   229		switch order {
   230		case LSB:
   231			d.read = (*decoder).readLSB
   232		case MSB:
   233			d.read = (*decoder).readMSB
   234		default:
   235			d.err = errors.New("lzw: unknown order")
   236			return d
   237		}
   238		if litWidth < 2 || 8 < litWidth {
   239			d.err = fmt.Errorf("lzw: litWidth %d out of range", litWidth)
   240			return d
   241		}
   242		if br, ok := r.(io.ByteReader); ok {
   243			d.r = br
   244		} else {
   245			d.r = bufio.NewReader(r)
   246		}
   247		d.litWidth = litWidth
   248		d.width = 1 + uint(litWidth)
   249		d.clear = uint16(1) << uint(litWidth)
   250		d.eof, d.hi = d.clear+1, d.clear+1
   251		d.overflow = uint16(1) << d.width
   252		d.last = decoderInvalidCode
   253	
   254		return d
   255	}