Source file src/pkg/archive/tar/reader.go
1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package tar 6 7 // TODO(dsymonds): 8 // - pax extensions 9 10 import ( 11 "bytes" 12 "errors" 13 "io" 14 "io/ioutil" 15 "os" 16 "strconv" 17 "time" 18 ) 19 20 var ( 21 ErrHeader = errors.New("archive/tar: invalid tar header") 22 ) 23 24 // A Reader provides sequential access to the contents of a tar archive. 25 // A tar archive consists of a sequence of files. 26 // The Next method advances to the next file in the archive (including the first), 27 // and then it can be treated as an io.Reader to access the file's data. 28 // 29 // Example: 30 // tr := tar.NewReader(r) 31 // for { 32 // hdr, err := tr.Next() 33 // if err == io.EOF { 34 // // end of tar archive 35 // break 36 // } 37 // if err != nil { 38 // // handle error 39 // } 40 // io.Copy(data, tr) 41 // } 42 type Reader struct { 43 r io.Reader 44 err error 45 nb int64 // number of unread bytes for current file entry 46 pad int64 // amount of padding (ignored) after current file entry 47 } 48 49 // NewReader creates a new Reader reading from r. 50 func NewReader(r io.Reader) *Reader { return &Reader{r: r} } 51 52 // Next advances to the next entry in the tar archive. 53 func (tr *Reader) Next() (*Header, error) { 54 var hdr *Header 55 if tr.err == nil { 56 tr.skipUnread() 57 } 58 if tr.err == nil { 59 hdr = tr.readHeader() 60 } 61 return hdr, tr.err 62 } 63 64 // Parse bytes as a NUL-terminated C-style string. 65 // If a NUL byte is not found then the whole slice is returned as a string. 66 func cString(b []byte) string { 67 n := 0 68 for n < len(b) && b[n] != 0 { 69 n++ 70 } 71 return string(b[0:n]) 72 } 73 74 func (tr *Reader) octal(b []byte) int64 { 75 // Removing leading spaces. 76 for len(b) > 0 && b[0] == ' ' { 77 b = b[1:] 78 } 79 // Removing trailing NULs and spaces. 80 for len(b) > 0 && (b[len(b)-1] == ' ' || b[len(b)-1] == '\x00') { 81 b = b[0 : len(b)-1] 82 } 83 x, err := strconv.ParseUint(cString(b), 8, 64) 84 if err != nil { 85 tr.err = err 86 } 87 return int64(x) 88 } 89 90 // Skip any unread bytes in the existing file entry, as well as any alignment padding. 91 func (tr *Reader) skipUnread() { 92 nr := tr.nb + tr.pad // number of bytes to skip 93 tr.nb, tr.pad = 0, 0 94 if sr, ok := tr.r.(io.Seeker); ok { 95 if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil { 96 return 97 } 98 } 99 _, tr.err = io.CopyN(ioutil.Discard, tr.r, nr) 100 } 101 102 func (tr *Reader) verifyChecksum(header []byte) bool { 103 if tr.err != nil { 104 return false 105 } 106 107 given := tr.octal(header[148:156]) 108 unsigned, signed := checksum(header) 109 return given == unsigned || given == signed 110 } 111 112 func (tr *Reader) readHeader() *Header { 113 header := make([]byte, blockSize) 114 if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil { 115 return nil 116 } 117 118 // Two blocks of zero bytes marks the end of the archive. 119 if bytes.Equal(header, zeroBlock[0:blockSize]) { 120 if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil { 121 return nil 122 } 123 if bytes.Equal(header, zeroBlock[0:blockSize]) { 124 tr.err = io.EOF 125 } else { 126 tr.err = ErrHeader // zero block and then non-zero block 127 } 128 return nil 129 } 130 131 if !tr.verifyChecksum(header) { 132 tr.err = ErrHeader 133 return nil 134 } 135 136 // Unpack 137 hdr := new(Header) 138 s := slicer(header) 139 140 hdr.Name = cString(s.next(100)) 141 hdr.Mode = tr.octal(s.next(8)) 142 hdr.Uid = int(tr.octal(s.next(8))) 143 hdr.Gid = int(tr.octal(s.next(8))) 144 hdr.Size = tr.octal(s.next(12)) 145 hdr.ModTime = time.Unix(tr.octal(s.next(12)), 0) 146 s.next(8) // chksum 147 hdr.Typeflag = s.next(1)[0] 148 hdr.Linkname = cString(s.next(100)) 149 150 // The remainder of the header depends on the value of magic. 151 // The original (v7) version of tar had no explicit magic field, 152 // so its magic bytes, like the rest of the block, are NULs. 153 magic := string(s.next(8)) // contains version field as well. 154 var format string 155 switch magic { 156 case "ustar\x0000": // POSIX tar (1003.1-1988) 157 if string(header[508:512]) == "tar\x00" { 158 format = "star" 159 } else { 160 format = "posix" 161 } 162 case "ustar \x00": // old GNU tar 163 format = "gnu" 164 } 165 166 switch format { 167 case "posix", "gnu", "star": 168 hdr.Uname = cString(s.next(32)) 169 hdr.Gname = cString(s.next(32)) 170 devmajor := s.next(8) 171 devminor := s.next(8) 172 if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock { 173 hdr.Devmajor = tr.octal(devmajor) 174 hdr.Devminor = tr.octal(devminor) 175 } 176 var prefix string 177 switch format { 178 case "posix", "gnu": 179 prefix = cString(s.next(155)) 180 case "star": 181 prefix = cString(s.next(131)) 182 hdr.AccessTime = time.Unix(tr.octal(s.next(12)), 0) 183 hdr.ChangeTime = time.Unix(tr.octal(s.next(12)), 0) 184 } 185 if len(prefix) > 0 { 186 hdr.Name = prefix + "/" + hdr.Name 187 } 188 } 189 190 if tr.err != nil { 191 tr.err = ErrHeader 192 return nil 193 } 194 195 // Maximum value of hdr.Size is 64 GB (12 octal digits), 196 // so there's no risk of int64 overflowing. 197 tr.nb = int64(hdr.Size) 198 tr.pad = -tr.nb & (blockSize - 1) // blockSize is a power of two 199 200 return hdr 201 } 202 203 // Read reads from the current entry in the tar archive. 204 // It returns 0, io.EOF when it reaches the end of that entry, 205 // until Next is called to advance to the next entry. 206 func (tr *Reader) Read(b []byte) (n int, err error) { 207 if tr.nb == 0 { 208 // file consumed 209 return 0, io.EOF 210 } 211 212 if int64(len(b)) > tr.nb { 213 b = b[0:tr.nb] 214 } 215 n, err = tr.r.Read(b) 216 tr.nb -= int64(n) 217 218 if err == io.EOF && tr.nb > 0 { 219 err = io.ErrUnexpectedEOF 220 } 221 tr.err = err 222 return 223 }