src/pkg/archive/tar/reader.go - The Go Programming Language

Golang

Source file src/pkg/archive/tar/reader.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package tar
     6	
     7	// TODO(dsymonds):
     8	//   - pax extensions
     9	
    10	import (
    11		"bytes"
    12		"errors"
    13		"io"
    14		"io/ioutil"
    15		"os"
    16		"strconv"
    17		"time"
    18	)
    19	
    20	var (
    21		ErrHeader = errors.New("archive/tar: invalid tar header")
    22	)
    23	
    24	// A Reader provides sequential access to the contents of a tar archive.
    25	// A tar archive consists of a sequence of files.
    26	// The Next method advances to the next file in the archive (including the first),
    27	// and then it can be treated as an io.Reader to access the file's data.
    28	//
    29	// Example:
    30	//	tr := tar.NewReader(r)
    31	//	for {
    32	//		hdr, err := tr.Next()
    33	//		if err == io.EOF {
    34	//			// end of tar archive
    35	//			break
    36	//		}
    37	//		if err != nil {
    38	//			// handle error
    39	//		}
    40	//		io.Copy(data, tr)
    41	//	}
    42	type Reader struct {
    43		r   io.Reader
    44		err error
    45		nb  int64 // number of unread bytes for current file entry
    46		pad int64 // amount of padding (ignored) after current file entry
    47	}
    48	
    49	// NewReader creates a new Reader reading from r.
    50	func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
    51	
    52	// Next advances to the next entry in the tar archive.
    53	func (tr *Reader) Next() (*Header, error) {
    54		var hdr *Header
    55		if tr.err == nil {
    56			tr.skipUnread()
    57		}
    58		if tr.err == nil {
    59			hdr = tr.readHeader()
    60		}
    61		return hdr, tr.err
    62	}
    63	
    64	// Parse bytes as a NUL-terminated C-style string.
    65	// If a NUL byte is not found then the whole slice is returned as a string.
    66	func cString(b []byte) string {
    67		n := 0
    68		for n < len(b) && b[n] != 0 {
    69			n++
    70		}
    71		return string(b[0:n])
    72	}
    73	
    74	func (tr *Reader) octal(b []byte) int64 {
    75		// Removing leading spaces.
    76		for len(b) > 0 && b[0] == ' ' {
    77			b = b[1:]
    78		}
    79		// Removing trailing NULs and spaces.
    80		for len(b) > 0 && (b[len(b)-1] == ' ' || b[len(b)-1] == '\x00') {
    81			b = b[0 : len(b)-1]
    82		}
    83		x, err := strconv.ParseUint(cString(b), 8, 64)
    84		if err != nil {
    85			tr.err = err
    86		}
    87		return int64(x)
    88	}
    89	
    90	// Skip any unread bytes in the existing file entry, as well as any alignment padding.
    91	func (tr *Reader) skipUnread() {
    92		nr := tr.nb + tr.pad // number of bytes to skip
    93		tr.nb, tr.pad = 0, 0
    94		if sr, ok := tr.r.(io.Seeker); ok {
    95			if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil {
    96				return
    97			}
    98		}
    99		_, tr.err = io.CopyN(ioutil.Discard, tr.r, nr)
   100	}
   101	
   102	func (tr *Reader) verifyChecksum(header []byte) bool {
   103		if tr.err != nil {
   104			return false
   105		}
   106	
   107		given := tr.octal(header[148:156])
   108		unsigned, signed := checksum(header)
   109		return given == unsigned || given == signed
   110	}
   111	
   112	func (tr *Reader) readHeader() *Header {
   113		header := make([]byte, blockSize)
   114		if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
   115			return nil
   116		}
   117	
   118		// Two blocks of zero bytes marks the end of the archive.
   119		if bytes.Equal(header, zeroBlock[0:blockSize]) {
   120			if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
   121				return nil
   122			}
   123			if bytes.Equal(header, zeroBlock[0:blockSize]) {
   124				tr.err = io.EOF
   125			} else {
   126				tr.err = ErrHeader // zero block and then non-zero block
   127			}
   128			return nil
   129		}
   130	
   131		if !tr.verifyChecksum(header) {
   132			tr.err = ErrHeader
   133			return nil
   134		}
   135	
   136		// Unpack
   137		hdr := new(Header)
   138		s := slicer(header)
   139	
   140		hdr.Name = cString(s.next(100))
   141		hdr.Mode = tr.octal(s.next(8))
   142		hdr.Uid = int(tr.octal(s.next(8)))
   143		hdr.Gid = int(tr.octal(s.next(8)))
   144		hdr.Size = tr.octal(s.next(12))
   145		hdr.ModTime = time.Unix(tr.octal(s.next(12)), 0)
   146		s.next(8) // chksum
   147		hdr.Typeflag = s.next(1)[0]
   148		hdr.Linkname = cString(s.next(100))
   149	
   150		// The remainder of the header depends on the value of magic.
   151		// The original (v7) version of tar had no explicit magic field,
   152		// so its magic bytes, like the rest of the block, are NULs.
   153		magic := string(s.next(8)) // contains version field as well.
   154		var format string
   155		switch magic {
   156		case "ustar\x0000": // POSIX tar (1003.1-1988)
   157			if string(header[508:512]) == "tar\x00" {
   158				format = "star"
   159			} else {
   160				format = "posix"
   161			}
   162		case "ustar  \x00": // old GNU tar
   163			format = "gnu"
   164		}
   165	
   166		switch format {
   167		case "posix", "gnu", "star":
   168			hdr.Uname = cString(s.next(32))
   169			hdr.Gname = cString(s.next(32))
   170			devmajor := s.next(8)
   171			devminor := s.next(8)
   172			if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock {
   173				hdr.Devmajor = tr.octal(devmajor)
   174				hdr.Devminor = tr.octal(devminor)
   175			}
   176			var prefix string
   177			switch format {
   178			case "posix", "gnu":
   179				prefix = cString(s.next(155))
   180			case "star":
   181				prefix = cString(s.next(131))
   182				hdr.AccessTime = time.Unix(tr.octal(s.next(12)), 0)
   183				hdr.ChangeTime = time.Unix(tr.octal(s.next(12)), 0)
   184			}
   185			if len(prefix) > 0 {
   186				hdr.Name = prefix + "/" + hdr.Name
   187			}
   188		}
   189	
   190		if tr.err != nil {
   191			tr.err = ErrHeader
   192			return nil
   193		}
   194	
   195		// Maximum value of hdr.Size is 64 GB (12 octal digits),
   196		// so there's no risk of int64 overflowing.
   197		tr.nb = int64(hdr.Size)
   198		tr.pad = -tr.nb & (blockSize - 1) // blockSize is a power of two
   199	
   200		return hdr
   201	}
   202	
   203	// Read reads from the current entry in the tar archive.
   204	// It returns 0, io.EOF when it reaches the end of that entry,
   205	// until Next is called to advance to the next entry.
   206	func (tr *Reader) Read(b []byte) (n int, err error) {
   207		if tr.nb == 0 {
   208			// file consumed
   209			return 0, io.EOF
   210		}
   211	
   212		if int64(len(b)) > tr.nb {
   213			b = b[0:tr.nb]
   214		}
   215		n, err = tr.r.Read(b)
   216		tr.nb -= int64(n)
   217	
   218		if err == io.EOF && tr.nb > 0 {
   219			err = io.ErrUnexpectedEOF
   220		}
   221		tr.err = err
   222		return
   223	}