Source file src/pkg/archive/tar/reader.go
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package tar
6
7 // TODO(dsymonds):
8 // - pax extensions
9
10 import (
11 "bytes"
12 "errors"
13 "io"
14 "io/ioutil"
15 "os"
16 "strconv"
17 "time"
18 )
19
20 var (
21 ErrHeader = errors.New("archive/tar: invalid tar header")
22 )
23
24 // A Reader provides sequential access to the contents of a tar archive.
25 // A tar archive consists of a sequence of files.
26 // The Next method advances to the next file in the archive (including the first),
27 // and then it can be treated as an io.Reader to access the file's data.
28 //
29 // Example:
30 // tr := tar.NewReader(r)
31 // for {
32 // hdr, err := tr.Next()
33 // if err == io.EOF {
34 // // end of tar archive
35 // break
36 // }
37 // if err != nil {
38 // // handle error
39 // }
40 // io.Copy(data, tr)
41 // }
42 type Reader struct {
43 r io.Reader
44 err error
45 nb int64 // number of unread bytes for current file entry
46 pad int64 // amount of padding (ignored) after current file entry
47 }
48
49 // NewReader creates a new Reader reading from r.
50 func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
51
52 // Next advances to the next entry in the tar archive.
53 func (tr *Reader) Next() (*Header, error) {
54 var hdr *Header
55 if tr.err == nil {
56 tr.skipUnread()
57 }
58 if tr.err == nil {
59 hdr = tr.readHeader()
60 }
61 return hdr, tr.err
62 }
63
64 // Parse bytes as a NUL-terminated C-style string.
65 // If a NUL byte is not found then the whole slice is returned as a string.
66 func cString(b []byte) string {
67 n := 0
68 for n < len(b) && b[n] != 0 {
69 n++
70 }
71 return string(b[0:n])
72 }
73
74 func (tr *Reader) octal(b []byte) int64 {
75 // Removing leading spaces.
76 for len(b) > 0 && b[0] == ' ' {
77 b = b[1:]
78 }
79 // Removing trailing NULs and spaces.
80 for len(b) > 0 && (b[len(b)-1] == ' ' || b[len(b)-1] == '\x00') {
81 b = b[0 : len(b)-1]
82 }
83 x, err := strconv.ParseUint(cString(b), 8, 64)
84 if err != nil {
85 tr.err = err
86 }
87 return int64(x)
88 }
89
90 // Skip any unread bytes in the existing file entry, as well as any alignment padding.
91 func (tr *Reader) skipUnread() {
92 nr := tr.nb + tr.pad // number of bytes to skip
93 tr.nb, tr.pad = 0, 0
94 if sr, ok := tr.r.(io.Seeker); ok {
95 if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil {
96 return
97 }
98 }
99 _, tr.err = io.CopyN(ioutil.Discard, tr.r, nr)
100 }
101
102 func (tr *Reader) verifyChecksum(header []byte) bool {
103 if tr.err != nil {
104 return false
105 }
106
107 given := tr.octal(header[148:156])
108 unsigned, signed := checksum(header)
109 return given == unsigned || given == signed
110 }
111
112 func (tr *Reader) readHeader() *Header {
113 header := make([]byte, blockSize)
114 if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
115 return nil
116 }
117
118 // Two blocks of zero bytes marks the end of the archive.
119 if bytes.Equal(header, zeroBlock[0:blockSize]) {
120 if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
121 return nil
122 }
123 if bytes.Equal(header, zeroBlock[0:blockSize]) {
124 tr.err = io.EOF
125 } else {
126 tr.err = ErrHeader // zero block and then non-zero block
127 }
128 return nil
129 }
130
131 if !tr.verifyChecksum(header) {
132 tr.err = ErrHeader
133 return nil
134 }
135
136 // Unpack
137 hdr := new(Header)
138 s := slicer(header)
139
140 hdr.Name = cString(s.next(100))
141 hdr.Mode = tr.octal(s.next(8))
142 hdr.Uid = int(tr.octal(s.next(8)))
143 hdr.Gid = int(tr.octal(s.next(8)))
144 hdr.Size = tr.octal(s.next(12))
145 hdr.ModTime = time.Unix(tr.octal(s.next(12)), 0)
146 s.next(8) // chksum
147 hdr.Typeflag = s.next(1)[0]
148 hdr.Linkname = cString(s.next(100))
149
150 // The remainder of the header depends on the value of magic.
151 // The original (v7) version of tar had no explicit magic field,
152 // so its magic bytes, like the rest of the block, are NULs.
153 magic := string(s.next(8)) // contains version field as well.
154 var format string
155 switch magic {
156 case "ustar\x0000": // POSIX tar (1003.1-1988)
157 if string(header[508:512]) == "tar\x00" {
158 format = "star"
159 } else {
160 format = "posix"
161 }
162 case "ustar \x00": // old GNU tar
163 format = "gnu"
164 }
165
166 switch format {
167 case "posix", "gnu", "star":
168 hdr.Uname = cString(s.next(32))
169 hdr.Gname = cString(s.next(32))
170 devmajor := s.next(8)
171 devminor := s.next(8)
172 if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock {
173 hdr.Devmajor = tr.octal(devmajor)
174 hdr.Devminor = tr.octal(devminor)
175 }
176 var prefix string
177 switch format {
178 case "posix", "gnu":
179 prefix = cString(s.next(155))
180 case "star":
181 prefix = cString(s.next(131))
182 hdr.AccessTime = time.Unix(tr.octal(s.next(12)), 0)
183 hdr.ChangeTime = time.Unix(tr.octal(s.next(12)), 0)
184 }
185 if len(prefix) > 0 {
186 hdr.Name = prefix + "/" + hdr.Name
187 }
188 }
189
190 if tr.err != nil {
191 tr.err = ErrHeader
192 return nil
193 }
194
195 // Maximum value of hdr.Size is 64 GB (12 octal digits),
196 // so there's no risk of int64 overflowing.
197 tr.nb = int64(hdr.Size)
198 tr.pad = -tr.nb & (blockSize - 1) // blockSize is a power of two
199
200 return hdr
201 }
202
203 // Read reads from the current entry in the tar archive.
204 // It returns 0, io.EOF when it reaches the end of that entry,
205 // until Next is called to advance to the next entry.
206 func (tr *Reader) Read(b []byte) (n int, err error) {
207 if tr.nb == 0 {
208 // file consumed
209 return 0, io.EOF
210 }
211
212 if int64(len(b)) > tr.nb {
213 b = b[0:tr.nb]
214 }
215 n, err = tr.r.Read(b)
216 tr.nb -= int64(n)
217
218 if err == io.EOF && tr.nb > 0 {
219 err = io.ErrUnexpectedEOF
220 }
221 tr.err = err
222 return
223 }