src/pkg/net/textproto/reader.go - The Go Programming Language

Golang

Source file src/pkg/net/textproto/reader.go

     1	// Copyright 2010 The Go Authors.  All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package textproto
     6	
     7	import (
     8		"bufio"
     9		"bytes"
    10		"io"
    11		"io/ioutil"
    12		"strconv"
    13		"strings"
    14	)
    15	
    16	// BUG(rsc): To let callers manage exposure to denial of service
    17	// attacks, Reader should allow them to set and reset a limit on
    18	// the number of bytes read from the connection.
    19	
    20	// A Reader implements convenience methods for reading requests
    21	// or responses from a text protocol network connection.
    22	type Reader struct {
    23		R   *bufio.Reader
    24		dot *dotReader
    25		buf []byte // a re-usable buffer for readContinuedLineSlice
    26	}
    27	
    28	// NewReader returns a new Reader reading from r.
    29	func NewReader(r *bufio.Reader) *Reader {
    30		return &Reader{R: r}
    31	}
    32	
    33	// ReadLine reads a single line from r,
    34	// eliding the final \n or \r\n from the returned string.
    35	func (r *Reader) ReadLine() (string, error) {
    36		line, err := r.readLineSlice()
    37		return string(line), err
    38	}
    39	
    40	// ReadLineBytes is like ReadLine but returns a []byte instead of a string.
    41	func (r *Reader) ReadLineBytes() ([]byte, error) {
    42		line, err := r.readLineSlice()
    43		if line != nil {
    44			buf := make([]byte, len(line))
    45			copy(buf, line)
    46			line = buf
    47		}
    48		return line, err
    49	}
    50	
    51	func (r *Reader) readLineSlice() ([]byte, error) {
    52		r.closeDot()
    53		var line []byte
    54		for {
    55			l, more, err := r.R.ReadLine()
    56			if err != nil {
    57				return nil, err
    58			}
    59			// Avoid the copy if the first call produced a full line.
    60			if line == nil && !more {
    61				return l, nil
    62			}
    63			line = append(line, l...)
    64			if !more {
    65				break
    66			}
    67		}
    68		return line, nil
    69	}
    70	
    71	// ReadContinuedLine reads a possibly continued line from r,
    72	// eliding the final trailing ASCII white space.
    73	// Lines after the first are considered continuations if they
    74	// begin with a space or tab character.  In the returned data,
    75	// continuation lines are separated from the previous line
    76	// only by a single space: the newline and leading white space
    77	// are removed.
    78	//
    79	// For example, consider this input:
    80	//
    81	//	Line 1
    82	//	  continued...
    83	//	Line 2
    84	//
    85	// The first call to ReadContinuedLine will return "Line 1 continued..."
    86	// and the second will return "Line 2".
    87	//
    88	// A line consisting of only white space is never continued.
    89	//
    90	func (r *Reader) ReadContinuedLine() (string, error) {
    91		line, err := r.readContinuedLineSlice()
    92		return string(line), err
    93	}
    94	
    95	// trim returns s with leading and trailing spaces and tabs removed.
    96	// It does not assume Unicode or UTF-8.
    97	func trim(s []byte) []byte {
    98		i := 0
    99		for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
   100			i++
   101		}
   102		n := len(s)
   103		for n > i && (s[n-1] == ' ' || s[n-1] == '\t') {
   104			n--
   105		}
   106		return s[i:n]
   107	}
   108	
   109	// ReadContinuedLineBytes is like ReadContinuedLine but
   110	// returns a []byte instead of a string.
   111	func (r *Reader) ReadContinuedLineBytes() ([]byte, error) {
   112		line, err := r.readContinuedLineSlice()
   113		if line != nil {
   114			buf := make([]byte, len(line))
   115			copy(buf, line)
   116			line = buf
   117		}
   118		return line, err
   119	}
   120	
   121	func (r *Reader) readContinuedLineSlice() ([]byte, error) {
   122		// Read the first line.
   123		line, err := r.readLineSlice()
   124		if err != nil {
   125			return nil, err
   126		}
   127		if len(line) == 0 { // blank line - no continuation
   128			return line, nil
   129		}
   130	
   131		// ReadByte or the next readLineSlice will flush the read buffer;
   132		// copy the slice into buf.
   133		r.buf = append(r.buf[:0], trim(line)...)
   134	
   135		// Read continuation lines.
   136		for r.skipSpace() > 0 {
   137			line, err := r.readLineSlice()
   138			if err != nil {
   139				break
   140			}
   141			r.buf = append(r.buf, ' ')
   142			r.buf = append(r.buf, line...)
   143		}
   144		return r.buf, nil
   145	}
   146	
   147	// skipSpace skips R over all spaces and returns the number of bytes skipped.
   148	func (r *Reader) skipSpace() int {
   149		n := 0
   150		for {
   151			c, err := r.R.ReadByte()
   152			if err != nil {
   153				// Bufio will keep err until next read.
   154				break
   155			}
   156			if c != ' ' && c != '\t' {
   157				r.R.UnreadByte()
   158				break
   159			}
   160			n++
   161		}
   162		return n
   163	}
   164	
   165	func (r *Reader) readCodeLine(expectCode int) (code int, continued bool, message string, err error) {
   166		line, err := r.ReadLine()
   167		if err != nil {
   168			return
   169		}
   170		return parseCodeLine(line, expectCode)
   171	}
   172	
   173	func parseCodeLine(line string, expectCode int) (code int, continued bool, message string, err error) {
   174		if len(line) < 4 || line[3] != ' ' && line[3] != '-' {
   175			err = ProtocolError("short response: " + line)
   176			return
   177		}
   178		continued = line[3] == '-'
   179		code, err = strconv.Atoi(line[0:3])
   180		if err != nil || code < 100 {
   181			err = ProtocolError("invalid response code: " + line)
   182			return
   183		}
   184		message = line[4:]
   185		if 1 <= expectCode && expectCode < 10 && code/100 != expectCode ||
   186			10 <= expectCode && expectCode < 100 && code/10 != expectCode ||
   187			100 <= expectCode && expectCode < 1000 && code != expectCode {
   188			err = &Error{code, message}
   189		}
   190		return
   191	}
   192	
   193	// ReadCodeLine reads a response code line of the form
   194	//	code message
   195	// where code is a 3-digit status code and the message
   196	// extends to the rest of the line.  An example of such a line is:
   197	//	220 plan9.bell-labs.com ESMTP
   198	//
   199	// If the prefix of the status does not match the digits in expectCode,
   200	// ReadCodeLine returns with err set to &Error{code, message}.
   201	// For example, if expectCode is 31, an error will be returned if
   202	// the status is not in the range [310,319].
   203	//
   204	// If the response is multi-line, ReadCodeLine returns an error.
   205	//
   206	// An expectCode <= 0 disables the check of the status code.
   207	//
   208	func (r *Reader) ReadCodeLine(expectCode int) (code int, message string, err error) {
   209		code, continued, message, err := r.readCodeLine(expectCode)
   210		if err == nil && continued {
   211			err = ProtocolError("unexpected multi-line response: " + message)
   212		}
   213		return
   214	}
   215	
   216	// ReadResponse reads a multi-line response of the form:
   217	//
   218	//	code-message line 1
   219	//	code-message line 2
   220	//	...
   221	//	code message line n
   222	//
   223	// where code is a 3-digit status code. The first line starts with the
   224	// code and a hyphen. The response is terminated by a line that starts
   225	// with the same code followed by a space. Each line in message is
   226	// separated by a newline (\n).
   227	//
   228	// See page 36 of RFC 959 (http://www.ietf.org/rfc/rfc959.txt) for
   229	// details.
   230	//
   231	// If the prefix of the status does not match the digits in expectCode,
   232	// ReadResponse returns with err set to &Error{code, message}.
   233	// For example, if expectCode is 31, an error will be returned if
   234	// the status is not in the range [310,319].
   235	//
   236	// An expectCode <= 0 disables the check of the status code.
   237	//
   238	func (r *Reader) ReadResponse(expectCode int) (code int, message string, err error) {
   239		code, continued, message, err := r.readCodeLine(expectCode)
   240		for err == nil && continued {
   241			line, err := r.ReadLine()
   242			if err != nil {
   243				return 0, "", err
   244			}
   245	
   246			var code2 int
   247			var moreMessage string
   248			code2, continued, moreMessage, err = parseCodeLine(line, expectCode)
   249			if err != nil || code2 != code {
   250				message += "\n" + strings.TrimRight(line, "\r\n")
   251				continued = true
   252				continue
   253			}
   254			message += "\n" + moreMessage
   255		}
   256		return
   257	}
   258	
   259	// DotReader returns a new Reader that satisfies Reads using the
   260	// decoded text of a dot-encoded block read from r.
   261	// The returned Reader is only valid until the next call
   262	// to a method on r.
   263	//
   264	// Dot encoding is a common framing used for data blocks
   265	// in text protocols such as SMTP.  The data consists of a sequence
   266	// of lines, each of which ends in "\r\n".  The sequence itself
   267	// ends at a line containing just a dot: ".\r\n".  Lines beginning
   268	// with a dot are escaped with an additional dot to avoid
   269	// looking like the end of the sequence.
   270	//
   271	// The decoded form returned by the Reader's Read method
   272	// rewrites the "\r\n" line endings into the simpler "\n",
   273	// removes leading dot escapes if present, and stops with error io.EOF
   274	// after consuming (and discarding) the end-of-sequence line.
   275	func (r *Reader) DotReader() io.Reader {
   276		r.closeDot()
   277		r.dot = &dotReader{r: r}
   278		return r.dot
   279	}
   280	
   281	type dotReader struct {
   282		r     *Reader
   283		state int
   284	}
   285	
   286	// Read satisfies reads by decoding dot-encoded data read from d.r.
   287	func (d *dotReader) Read(b []byte) (n int, err error) {
   288		// Run data through a simple state machine to
   289		// elide leading dots, rewrite trailing \r\n into \n,
   290		// and detect ending .\r\n line.
   291		const (
   292			stateBeginLine = iota // beginning of line; initial state; must be zero
   293			stateDot              // read . at beginning of line
   294			stateDotCR            // read .\r at beginning of line
   295			stateCR               // read \r (possibly at end of line)
   296			stateData             // reading data in middle of line
   297			stateEOF              // reached .\r\n end marker line
   298		)
   299		br := d.r.R
   300		for n < len(b) && d.state != stateEOF {
   301			var c byte
   302			c, err = br.ReadByte()
   303			if err != nil {
   304				if err == io.EOF {
   305					err = io.ErrUnexpectedEOF
   306				}
   307				break
   308			}
   309			switch d.state {
   310			case stateBeginLine:
   311				if c == '.' {
   312					d.state = stateDot
   313					continue
   314				}
   315				if c == '\r' {
   316					d.state = stateCR
   317					continue
   318				}
   319				d.state = stateData
   320	
   321			case stateDot:
   322				if c == '\r' {
   323					d.state = stateDotCR
   324					continue
   325				}
   326				if c == '\n' {
   327					d.state = stateEOF
   328					continue
   329				}
   330				d.state = stateData
   331	
   332			case stateDotCR:
   333				if c == '\n' {
   334					d.state = stateEOF
   335					continue
   336				}
   337				// Not part of .\r\n.
   338				// Consume leading dot and emit saved \r.
   339				br.UnreadByte()
   340				c = '\r'
   341				d.state = stateData
   342	
   343			case stateCR:
   344				if c == '\n' {
   345					d.state = stateBeginLine
   346					break
   347				}
   348				// Not part of \r\n.  Emit saved \r
   349				br.UnreadByte()
   350				c = '\r'
   351				d.state = stateData
   352	
   353			case stateData:
   354				if c == '\r' {
   355					d.state = stateCR
   356					continue
   357				}
   358				if c == '\n' {
   359					d.state = stateBeginLine
   360				}
   361			}
   362			b[n] = c
   363			n++
   364		}
   365		if err == nil && d.state == stateEOF {
   366			err = io.EOF
   367		}
   368		if err != nil && d.r.dot == d {
   369			d.r.dot = nil
   370		}
   371		return
   372	}
   373	
   374	// closeDot drains the current DotReader if any,
   375	// making sure that it reads until the ending dot line.
   376	func (r *Reader) closeDot() {
   377		if r.dot == nil {
   378			return
   379		}
   380		buf := make([]byte, 128)
   381		for r.dot != nil {
   382			// When Read reaches EOF or an error,
   383			// it will set r.dot == nil.
   384			r.dot.Read(buf)
   385		}
   386	}
   387	
   388	// ReadDotBytes reads a dot-encoding and returns the decoded data.
   389	//
   390	// See the documentation for the DotReader method for details about dot-encoding.
   391	func (r *Reader) ReadDotBytes() ([]byte, error) {
   392		return ioutil.ReadAll(r.DotReader())
   393	}
   394	
   395	// ReadDotLines reads a dot-encoding and returns a slice
   396	// containing the decoded lines, with the final \r\n or \n elided from each.
   397	//
   398	// See the documentation for the DotReader method for details about dot-encoding.
   399	func (r *Reader) ReadDotLines() ([]string, error) {
   400		// We could use ReadDotBytes and then Split it,
   401		// but reading a line at a time avoids needing a
   402		// large contiguous block of memory and is simpler.
   403		var v []string
   404		var err error
   405		for {
   406			var line string
   407			line, err = r.ReadLine()
   408			if err != nil {
   409				if err == io.EOF {
   410					err = io.ErrUnexpectedEOF
   411				}
   412				break
   413			}
   414	
   415			// Dot by itself marks end; otherwise cut one dot.
   416			if len(line) > 0 && line[0] == '.' {
   417				if len(line) == 1 {
   418					break
   419				}
   420				line = line[1:]
   421			}
   422			v = append(v, line)
   423		}
   424		return v, err
   425	}
   426	
   427	// ReadMIMEHeader reads a MIME-style header from r.
   428	// The header is a sequence of possibly continued Key: Value lines
   429	// ending in a blank line.
   430	// The returned map m maps CanonicalMIMEHeaderKey(key) to a
   431	// sequence of values in the same order encountered in the input.
   432	//
   433	// For example, consider this input:
   434	//
   435	//	My-Key: Value 1
   436	//	Long-Key: Even
   437	//	       Longer Value
   438	//	My-Key: Value 2
   439	//
   440	// Given that input, ReadMIMEHeader returns the map:
   441	//
   442	//	map[string][]string{
   443	//		"My-Key": {"Value 1", "Value 2"},
   444	//		"Long-Key": {"Even Longer Value"},
   445	//	}
   446	//
   447	func (r *Reader) ReadMIMEHeader() (MIMEHeader, error) {
   448		m := make(MIMEHeader)
   449		for {
   450			kv, err := r.readContinuedLineSlice()
   451			if len(kv) == 0 {
   452				return m, err
   453			}
   454	
   455			// Key ends at first colon; must not have spaces.
   456			i := bytes.IndexByte(kv, ':')
   457			if i < 0 {
   458				return m, ProtocolError("malformed MIME header line: " + string(kv))
   459			}
   460			key := string(kv[0:i])
   461			if strings.Index(key, " ") >= 0 {
   462				key = strings.TrimRight(key, " ")
   463			}
   464			key = CanonicalMIMEHeaderKey(key)
   465	
   466			// Skip initial spaces in value.
   467			i++ // skip colon
   468			for i < len(kv) && (kv[i] == ' ' || kv[i] == '\t') {
   469				i++
   470			}
   471			value := string(kv[i:])
   472	
   473			m[key] = append(m[key], value)
   474	
   475			if err != nil {
   476				return m, err
   477			}
   478		}
   479		panic("unreachable")
   480	}
   481	
   482	// CanonicalMIMEHeaderKey returns the canonical format of the
   483	// MIME header key s.  The canonicalization converts the first
   484	// letter and any letter following a hyphen to upper case;
   485	// the rest are converted to lowercase.  For example, the
   486	// canonical key for "accept-encoding" is "Accept-Encoding".
   487	func CanonicalMIMEHeaderKey(s string) string {
   488		// Quick check for canonical encoding.
   489		needUpper := true
   490		for i := 0; i < len(s); i++ {
   491			c := s[i]
   492			if needUpper && 'a' <= c && c <= 'z' {
   493				goto MustRewrite
   494			}
   495			if !needUpper && 'A' <= c && c <= 'Z' {
   496				goto MustRewrite
   497			}
   498			needUpper = c == '-'
   499		}
   500		return s
   501	
   502	MustRewrite:
   503		// Canonicalize: first letter upper case
   504		// and upper case after each dash.
   505		// (Host, User-Agent, If-Modified-Since).
   506		// MIME headers are ASCII only, so no Unicode issues.
   507		a := []byte(s)
   508		upper := true
   509		for i, v := range a {
   510			if v == ' ' {
   511				a[i] = '-'
   512				upper = true
   513				continue
   514			}
   515			if upper && 'a' <= v && v <= 'z' {
   516				a[i] = v + 'A' - 'a'
   517			}
   518			if !upper && 'A' <= v && v <= 'Z' {
   519				a[i] = v + 'a' - 'A'
   520			}
   521			upper = v == '-'
   522		}
   523		return string(a)
   524	}