Source file src/pkg/archive/zip/reader.go
1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package zip 6 7 import ( 8 "bufio" 9 "compress/flate" 10 "encoding/binary" 11 "errors" 12 "hash" 13 "hash/crc32" 14 "io" 15 "io/ioutil" 16 "os" 17 ) 18 19 var ( 20 ErrFormat = errors.New("zip: not a valid zip file") 21 ErrAlgorithm = errors.New("zip: unsupported compression algorithm") 22 ErrChecksum = errors.New("zip: checksum error") 23 ) 24 25 type Reader struct { 26 r io.ReaderAt 27 File []*File 28 Comment string 29 } 30 31 type ReadCloser struct { 32 f *os.File 33 Reader 34 } 35 36 type File struct { 37 FileHeader 38 zipr io.ReaderAt 39 zipsize int64 40 headerOffset int64 41 } 42 43 func (f *File) hasDataDescriptor() bool { 44 return f.Flags&0x8 != 0 45 } 46 47 // OpenReader will open the Zip file specified by name and return a ReadCloser. 48 func OpenReader(name string) (*ReadCloser, error) { 49 f, err := os.Open(name) 50 if err != nil { 51 return nil, err 52 } 53 fi, err := f.Stat() 54 if err != nil { 55 f.Close() 56 return nil, err 57 } 58 r := new(ReadCloser) 59 if err := r.init(f, fi.Size()); err != nil { 60 f.Close() 61 return nil, err 62 } 63 r.f = f 64 return r, nil 65 } 66 67 // NewReader returns a new Reader reading from r, which is assumed to 68 // have the given size in bytes. 69 func NewReader(r io.ReaderAt, size int64) (*Reader, error) { 70 zr := new(Reader) 71 if err := zr.init(r, size); err != nil { 72 return nil, err 73 } 74 return zr, nil 75 } 76 77 func (z *Reader) init(r io.ReaderAt, size int64) error { 78 end, err := readDirectoryEnd(r, size) 79 if err != nil { 80 return err 81 } 82 z.r = r 83 z.File = make([]*File, 0, end.directoryRecords) 84 z.Comment = end.comment 85 rs := io.NewSectionReader(r, 0, size) 86 if _, err = rs.Seek(int64(end.directoryOffset), os.SEEK_SET); err != nil { 87 return err 88 } 89 buf := bufio.NewReader(rs) 90 91 // The count of files inside a zip is truncated to fit in a uint16. 92 // Gloss over this by reading headers until we encounter 93 // a bad one, and then only report a ErrFormat or UnexpectedEOF if 94 // the file count modulo 65536 is incorrect. 95 for { 96 f := &File{zipr: r, zipsize: size} 97 err = readDirectoryHeader(f, buf) 98 if err == ErrFormat || err == io.ErrUnexpectedEOF { 99 break 100 } 101 if err != nil { 102 return err 103 } 104 z.File = append(z.File, f) 105 } 106 if uint16(len(z.File)) != end.directoryRecords { 107 // Return the readDirectoryHeader error if we read 108 // the wrong number of directory entries. 109 return err 110 } 111 return nil 112 } 113 114 // Close closes the Zip file, rendering it unusable for I/O. 115 func (rc *ReadCloser) Close() error { 116 return rc.f.Close() 117 } 118 119 // Open returns a ReadCloser that provides access to the File's contents. 120 // Multiple files may be read concurrently. 121 func (f *File) Open() (rc io.ReadCloser, err error) { 122 bodyOffset, err := f.findBodyOffset() 123 if err != nil { 124 return 125 } 126 size := int64(f.CompressedSize) 127 r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size) 128 switch f.Method { 129 case Store: // (no compression) 130 rc = ioutil.NopCloser(r) 131 case Deflate: 132 rc = flate.NewReader(r) 133 default: 134 err = ErrAlgorithm 135 return 136 } 137 var desr io.Reader 138 if f.hasDataDescriptor() { 139 desr = io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, dataDescriptorLen) 140 } 141 rc = &checksumReader{rc, crc32.NewIEEE(), f, desr, nil} 142 return 143 } 144 145 type checksumReader struct { 146 rc io.ReadCloser 147 hash hash.Hash32 148 f *File 149 desr io.Reader // if non-nil, where to read the data descriptor 150 err error // sticky error 151 } 152 153 func (r *checksumReader) Read(b []byte) (n int, err error) { 154 if r.err != nil { 155 return 0, r.err 156 } 157 n, err = r.rc.Read(b) 158 r.hash.Write(b[:n]) 159 if err == nil { 160 return 161 } 162 if err == io.EOF { 163 if r.desr != nil { 164 if err1 := readDataDescriptor(r.desr, r.f); err1 != nil { 165 err = err1 166 } else if r.hash.Sum32() != r.f.CRC32 { 167 err = ErrChecksum 168 } 169 } else { 170 // If there's not a data descriptor, we still compare 171 // the CRC32 of what we've read against the file header 172 // or TOC's CRC32, if it seems like it was set. 173 if r.f.CRC32 != 0 && r.hash.Sum32() != r.f.CRC32 { 174 err = ErrChecksum 175 } 176 } 177 } 178 r.err = err 179 return 180 } 181 182 func (r *checksumReader) Close() error { return r.rc.Close() } 183 184 // findBodyOffset does the minimum work to verify the file has a header 185 // and returns the file body offset. 186 func (f *File) findBodyOffset() (int64, error) { 187 r := io.NewSectionReader(f.zipr, f.headerOffset, f.zipsize-f.headerOffset) 188 var buf [fileHeaderLen]byte 189 if _, err := io.ReadFull(r, buf[:]); err != nil { 190 return 0, err 191 } 192 b := readBuf(buf[:]) 193 if sig := b.uint32(); sig != fileHeaderSignature { 194 return 0, ErrFormat 195 } 196 b = b[22:] // skip over most of the header 197 filenameLen := int(b.uint16()) 198 extraLen := int(b.uint16()) 199 return int64(fileHeaderLen + filenameLen + extraLen), nil 200 } 201 202 // readDirectoryHeader attempts to read a directory header from r. 203 // It returns io.ErrUnexpectedEOF if it cannot read a complete header, 204 // and ErrFormat if it doesn't find a valid header signature. 205 func readDirectoryHeader(f *File, r io.Reader) error { 206 var buf [directoryHeaderLen]byte 207 if _, err := io.ReadFull(r, buf[:]); err != nil { 208 return err 209 } 210 b := readBuf(buf[:]) 211 if sig := b.uint32(); sig != directoryHeaderSignature { 212 return ErrFormat 213 } 214 f.CreatorVersion = b.uint16() 215 f.ReaderVersion = b.uint16() 216 f.Flags = b.uint16() 217 f.Method = b.uint16() 218 f.ModifiedTime = b.uint16() 219 f.ModifiedDate = b.uint16() 220 f.CRC32 = b.uint32() 221 f.CompressedSize = b.uint32() 222 f.UncompressedSize = b.uint32() 223 filenameLen := int(b.uint16()) 224 extraLen := int(b.uint16()) 225 commentLen := int(b.uint16()) 226 b = b[4:] // skipped start disk number and internal attributes (2x uint16) 227 f.ExternalAttrs = b.uint32() 228 f.headerOffset = int64(b.uint32()) 229 d := make([]byte, filenameLen+extraLen+commentLen) 230 if _, err := io.ReadFull(r, d); err != nil { 231 return err 232 } 233 f.Name = string(d[:filenameLen]) 234 f.Extra = d[filenameLen : filenameLen+extraLen] 235 f.Comment = string(d[filenameLen+extraLen:]) 236 return nil 237 } 238 239 func readDataDescriptor(r io.Reader, f *File) error { 240 var buf [dataDescriptorLen]byte 241 242 // The spec says: "Although not originally assigned a 243 // signature, the value 0x08074b50 has commonly been adopted 244 // as a signature value for the data descriptor record. 245 // Implementers should be aware that ZIP files may be 246 // encountered with or without this signature marking data 247 // descriptors and should account for either case when reading 248 // ZIP files to ensure compatibility." 249 // 250 // dataDescriptorLen includes the size of the signature but 251 // first read just those 4 bytes to see if it exists. 252 if _, err := io.ReadFull(r, buf[:4]); err != nil { 253 return err 254 } 255 off := 0 256 maybeSig := readBuf(buf[:4]) 257 if maybeSig.uint32() != dataDescriptorSignature { 258 // No data descriptor signature. Keep these four 259 // bytes. 260 off += 4 261 } 262 if _, err := io.ReadFull(r, buf[off:12]); err != nil { 263 return err 264 } 265 b := readBuf(buf[:12]) 266 f.CRC32 = b.uint32() 267 f.CompressedSize = b.uint32() 268 f.UncompressedSize = b.uint32() 269 return nil 270 } 271 272 func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) { 273 // look for directoryEndSignature in the last 1k, then in the last 65k 274 var buf []byte 275 for i, bLen := range []int64{1024, 65 * 1024} { 276 if bLen > size { 277 bLen = size 278 } 279 buf = make([]byte, int(bLen)) 280 if _, err := r.ReadAt(buf, size-bLen); err != nil && err != io.EOF { 281 return nil, err 282 } 283 if p := findSignatureInBlock(buf); p >= 0 { 284 buf = buf[p:] 285 break 286 } 287 if i == 1 || bLen == size { 288 return nil, ErrFormat 289 } 290 } 291 292 // read header into struct 293 b := readBuf(buf[4:]) // skip signature 294 d := &directoryEnd{ 295 diskNbr: b.uint16(), 296 dirDiskNbr: b.uint16(), 297 dirRecordsThisDisk: b.uint16(), 298 directoryRecords: b.uint16(), 299 directorySize: b.uint32(), 300 directoryOffset: b.uint32(), 301 commentLen: b.uint16(), 302 } 303 l := int(d.commentLen) 304 if l > len(b) { 305 return nil, errors.New("zip: invalid comment length") 306 } 307 d.comment = string(b[:l]) 308 return d, nil 309 } 310 311 func findSignatureInBlock(b []byte) int { 312 for i := len(b) - directoryEndLen; i >= 0; i-- { 313 // defined from directoryEndSignature in struct.go 314 if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 { 315 // n is length of comment 316 n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8 317 if n+directoryEndLen+i == len(b) { 318 return i 319 } 320 } 321 } 322 return -1 323 } 324 325 type readBuf []byte 326 327 func (b *readBuf) uint16() uint16 { 328 v := binary.LittleEndian.Uint16(*b) 329 *b = (*b)[2:] 330 return v 331 } 332 333 func (b *readBuf) uint32() uint32 { 334 v := binary.LittleEndian.Uint32(*b) 335 *b = (*b)[4:] 336 return v 337 }