Source file src/pkg/compress/gzip/gunzip.go
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Package gzip implements reading and writing of gzip format compressed files,
6 // as specified in RFC 1952.
7 package gzip
8
9 import (
10 "bufio"
11 "compress/flate"
12 "errors"
13 "hash"
14 "hash/crc32"
15 "io"
16 "time"
17 )
18
19 const (
20 gzipID1 = 0x1f
21 gzipID2 = 0x8b
22 gzipDeflate = 8
23 flagText = 1 << 0
24 flagHdrCrc = 1 << 1
25 flagExtra = 1 << 2
26 flagName = 1 << 3
27 flagComment = 1 << 4
28 )
29
30 func makeReader(r io.Reader) flate.Reader {
31 if rr, ok := r.(flate.Reader); ok {
32 return rr
33 }
34 return bufio.NewReader(r)
35 }
36
37 var (
38 // ErrChecksum is returned when reading GZIP data that has an invalid checksum.
39 ErrChecksum = errors.New("gzip: invalid checksum")
40 // ErrHeader is returned when reading GZIP data that has an invalid header.
41 ErrHeader = errors.New("gzip: invalid header")
42 )
43
44 // The gzip file stores a header giving metadata about the compressed file.
45 // That header is exposed as the fields of the Writer and Reader structs.
46 type Header struct {
47 Comment string // comment
48 Extra []byte // "extra data"
49 ModTime time.Time // modification time
50 Name string // file name
51 OS byte // operating system type
52 }
53
54 // A Reader is an io.Reader that can be read to retrieve
55 // uncompressed data from a gzip-format compressed file.
56 //
57 // In general, a gzip file can be a concatenation of gzip files,
58 // each with its own header. Reads from the Reader
59 // return the concatenation of the uncompressed data of each.
60 // Only the first header is recorded in the Reader fields.
61 //
62 // Gzip files store a length and checksum of the uncompressed data.
63 // The Reader will return a ErrChecksum when Read
64 // reaches the end of the uncompressed data if it does not
65 // have the expected length or checksum. Clients should treat data
66 // returned by Read as tentative until they receive the io.EOF
67 // marking the end of the data.
68 type Reader struct {
69 Header
70 r flate.Reader
71 decompressor io.ReadCloser
72 digest hash.Hash32
73 size uint32
74 flg byte
75 buf [512]byte
76 err error
77 }
78
79 // NewReader creates a new Reader reading the given reader.
80 // The implementation buffers input and may read more data than necessary from r.
81 // It is the caller's responsibility to call Close on the Reader when done.
82 func NewReader(r io.Reader) (*Reader, error) {
83 z := new(Reader)
84 z.r = makeReader(r)
85 z.digest = crc32.NewIEEE()
86 if err := z.readHeader(true); err != nil {
87 return nil, err
88 }
89 return z, nil
90 }
91
92 // GZIP (RFC 1952) is little-endian, unlike ZLIB (RFC 1950).
93 func get4(p []byte) uint32 {
94 return uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24
95 }
96
97 func (z *Reader) readString() (string, error) {
98 var err error
99 needconv := false
100 for i := 0; ; i++ {
101 if i >= len(z.buf) {
102 return "", ErrHeader
103 }
104 z.buf[i], err = z.r.ReadByte()
105 if err != nil {
106 return "", err
107 }
108 if z.buf[i] > 0x7f {
109 needconv = true
110 }
111 if z.buf[i] == 0 {
112 // GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1).
113 if needconv {
114 s := make([]rune, 0, i)
115 for _, v := range z.buf[0:i] {
116 s = append(s, rune(v))
117 }
118 return string(s), nil
119 }
120 return string(z.buf[0:i]), nil
121 }
122 }
123 panic("not reached")
124 }
125
126 func (z *Reader) read2() (uint32, error) {
127 _, err := io.ReadFull(z.r, z.buf[0:2])
128 if err != nil {
129 return 0, err
130 }
131 return uint32(z.buf[0]) | uint32(z.buf[1])<<8, nil
132 }
133
134 func (z *Reader) readHeader(save bool) error {
135 _, err := io.ReadFull(z.r, z.buf[0:10])
136 if err != nil {
137 return err
138 }
139 if z.buf[0] != gzipID1 || z.buf[1] != gzipID2 || z.buf[2] != gzipDeflate {
140 return ErrHeader
141 }
142 z.flg = z.buf[3]
143 if save {
144 z.ModTime = time.Unix(int64(get4(z.buf[4:8])), 0)
145 // z.buf[8] is xfl, ignored
146 z.OS = z.buf[9]
147 }
148 z.digest.Reset()
149 z.digest.Write(z.buf[0:10])
150
151 if z.flg&flagExtra != 0 {
152 n, err := z.read2()
153 if err != nil {
154 return err
155 }
156 data := make([]byte, n)
157 if _, err = io.ReadFull(z.r, data); err != nil {
158 return err
159 }
160 if save {
161 z.Extra = data
162 }
163 }
164
165 var s string
166 if z.flg&flagName != 0 {
167 if s, err = z.readString(); err != nil {
168 return err
169 }
170 if save {
171 z.Name = s
172 }
173 }
174
175 if z.flg&flagComment != 0 {
176 if s, err = z.readString(); err != nil {
177 return err
178 }
179 if save {
180 z.Comment = s
181 }
182 }
183
184 if z.flg&flagHdrCrc != 0 {
185 n, err := z.read2()
186 if err != nil {
187 return err
188 }
189 sum := z.digest.Sum32() & 0xFFFF
190 if n != sum {
191 return ErrHeader
192 }
193 }
194
195 z.digest.Reset()
196 z.decompressor = flate.NewReader(z.r)
197 return nil
198 }
199
200 func (z *Reader) Read(p []byte) (n int, err error) {
201 if z.err != nil {
202 return 0, z.err
203 }
204 if len(p) == 0 {
205 return 0, nil
206 }
207
208 n, err = z.decompressor.Read(p)
209 z.digest.Write(p[0:n])
210 z.size += uint32(n)
211 if n != 0 || err != io.EOF {
212 z.err = err
213 return
214 }
215
216 // Finished file; check checksum + size.
217 if _, err := io.ReadFull(z.r, z.buf[0:8]); err != nil {
218 z.err = err
219 return 0, err
220 }
221 crc32, isize := get4(z.buf[0:4]), get4(z.buf[4:8])
222 sum := z.digest.Sum32()
223 if sum != crc32 || isize != z.size {
224 z.err = ErrChecksum
225 return 0, z.err
226 }
227
228 // File is ok; is there another?
229 if err = z.readHeader(false); err != nil {
230 z.err = err
231 return
232 }
233
234 // Yes. Reset and read from it.
235 z.digest.Reset()
236 z.size = 0
237 return z.Read(p)
238 }
239
240 // Close closes the Reader. It does not close the underlying io.Reader.
241 func (z *Reader) Close() error { return z.decompressor.Close() }