src/pkg/net/mail/message.go - The Go Programming Language

Golang

Source file src/pkg/net/mail/message.go

     1	// Copyright 2011 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	/*
     6	Package mail implements parsing of mail messages.
     7	
     8	For the most part, this package follows the syntax as specified by RFC 5322.
     9	Notable divergences:
    10		* Obsolete address formats are not parsed, including addresses with
    11		  embedded route information.
    12		* Group addresses are not parsed.
    13		* The full range of spacing (the CFWS syntax element) is not supported,
    14		  such as breaking addresses across lines.
    15	*/
    16	package mail
    17	
    18	import (
    19		"bufio"
    20		"bytes"
    21		"encoding/base64"
    22		"errors"
    23		"fmt"
    24		"io"
    25		"io/ioutil"
    26		"log"
    27		"net/textproto"
    28		"strconv"
    29		"strings"
    30		"time"
    31	)
    32	
    33	var debug = debugT(false)
    34	
    35	type debugT bool
    36	
    37	func (d debugT) Printf(format string, args ...interface{}) {
    38		if d {
    39			log.Printf(format, args...)
    40		}
    41	}
    42	
    43	// A Message represents a parsed mail message.
    44	type Message struct {
    45		Header Header
    46		Body   io.Reader
    47	}
    48	
    49	// ReadMessage reads a message from r.
    50	// The headers are parsed, and the body of the message will be reading from r.
    51	func ReadMessage(r io.Reader) (msg *Message, err error) {
    52		tp := textproto.NewReader(bufio.NewReader(r))
    53	
    54		hdr, err := tp.ReadMIMEHeader()
    55		if err != nil {
    56			return nil, err
    57		}
    58	
    59		return &Message{
    60			Header: Header(hdr),
    61			Body:   tp.R,
    62		}, nil
    63	}
    64	
    65	// Layouts suitable for passing to time.Parse.
    66	// These are tried in order.
    67	var dateLayouts []string
    68	
    69	func init() {
    70		// Generate layouts based on RFC 5322, section 3.3.
    71	
    72		dows := [...]string{"", "Mon, "}     // day-of-week
    73		days := [...]string{"2", "02"}       // day = 1*2DIGIT
    74		years := [...]string{"2006", "06"}   // year = 4*DIGIT / 2*DIGIT
    75		seconds := [...]string{":05", ""}    // second
    76		zones := [...]string{"-0700", "MST"} // zone = (("+" / "-") 4DIGIT) / "GMT" / ...
    77	
    78		for _, dow := range dows {
    79			for _, day := range days {
    80				for _, year := range years {
    81					for _, second := range seconds {
    82						for _, zone := range zones {
    83							s := dow + day + " Jan " + year + " 15:04" + second + " " + zone
    84							dateLayouts = append(dateLayouts, s)
    85						}
    86					}
    87				}
    88			}
    89		}
    90	}
    91	
    92	func parseDate(date string) (time.Time, error) {
    93		for _, layout := range dateLayouts {
    94			t, err := time.Parse(layout, date)
    95			if err == nil {
    96				return t, nil
    97			}
    98		}
    99		return time.Time{}, errors.New("mail: header could not be parsed")
   100	}
   101	
   102	// A Header represents the key-value pairs in a mail message header.
   103	type Header map[string][]string
   104	
   105	// Get gets the first value associated with the given key.
   106	// If there are no values associated with the key, Get returns "".
   107	func (h Header) Get(key string) string {
   108		return textproto.MIMEHeader(h).Get(key)
   109	}
   110	
   111	var ErrHeaderNotPresent = errors.New("mail: header not in message")
   112	
   113	// Date parses the Date header field.
   114	func (h Header) Date() (time.Time, error) {
   115		hdr := h.Get("Date")
   116		if hdr == "" {
   117			return time.Time{}, ErrHeaderNotPresent
   118		}
   119		return parseDate(hdr)
   120	}
   121	
   122	// AddressList parses the named header field as a list of addresses.
   123	func (h Header) AddressList(key string) ([]*Address, error) {
   124		hdr := h.Get(key)
   125		if hdr == "" {
   126			return nil, ErrHeaderNotPresent
   127		}
   128		return newAddrParser(hdr).parseAddressList()
   129	}
   130	
   131	// Address represents a single mail address.
   132	// An address such as "Barry Gibbs <[email protected]>" is represented
   133	// as Address{Name: "Barry Gibbs", Address: "[email protected]"}.
   134	type Address struct {
   135		Name    string // Proper name; may be empty.
   136		Address string // user@domain
   137	}
   138	
   139	// String formats the address as a valid RFC 5322 address.
   140	// If the address's name contains non-ASCII characters
   141	// the name will be rendered according to RFC 2047.
   142	func (a *Address) String() string {
   143		s := "<" + a.Address + ">"
   144		if a.Name == "" {
   145			return s
   146		}
   147		// If every character is printable ASCII, quoting is simple.
   148		allPrintable := true
   149		for i := 0; i < len(a.Name); i++ {
   150			if !isVchar(a.Name[i]) {
   151				allPrintable = false
   152				break
   153			}
   154		}
   155		if allPrintable {
   156			b := bytes.NewBufferString(`"`)
   157			for i := 0; i < len(a.Name); i++ {
   158				if !isQtext(a.Name[i]) {
   159					b.WriteByte('\\')
   160				}
   161				b.WriteByte(a.Name[i])
   162			}
   163			b.WriteString(`" `)
   164			b.WriteString(s)
   165			return b.String()
   166		}
   167	
   168		// UTF-8 "Q" encoding
   169		b := bytes.NewBufferString("=?utf-8?q?")
   170		for i := 0; i < len(a.Name); i++ {
   171			switch c := a.Name[i]; {
   172			case c == ' ':
   173				b.WriteByte('_')
   174			case isVchar(c) && c != '=' && c != '?' && c != '_':
   175				b.WriteByte(c)
   176			default:
   177				fmt.Fprintf(b, "=%02X", c)
   178			}
   179		}
   180		b.WriteString("?= ")
   181		b.WriteString(s)
   182		return b.String()
   183	}
   184	
   185	type addrParser []byte
   186	
   187	func newAddrParser(s string) *addrParser {
   188		p := addrParser(s)
   189		return &p
   190	}
   191	
   192	func (p *addrParser) parseAddressList() ([]*Address, error) {
   193		var list []*Address
   194		for {
   195			p.skipSpace()
   196			addr, err := p.parseAddress()
   197			if err != nil {
   198				return nil, err
   199			}
   200			list = append(list, addr)
   201	
   202			p.skipSpace()
   203			if p.empty() {
   204				break
   205			}
   206			if !p.consume(',') {
   207				return nil, errors.New("mail: expected comma")
   208			}
   209		}
   210		return list, nil
   211	}
   212	
   213	// parseAddress parses a single RFC 5322 address at the start of p.
   214	func (p *addrParser) parseAddress() (addr *Address, err error) {
   215		debug.Printf("parseAddress: %q", *p)
   216		p.skipSpace()
   217		if p.empty() {
   218			return nil, errors.New("mail: no address")
   219		}
   220	
   221		// address = name-addr / addr-spec
   222		// TODO(dsymonds): Support parsing group address.
   223	
   224		// addr-spec has a more restricted grammar than name-addr,
   225		// so try parsing it first, and fallback to name-addr.
   226		// TODO(dsymonds): Is this really correct?
   227		spec, err := p.consumeAddrSpec()
   228		if err == nil {
   229			return &Address{
   230				Address: spec,
   231			}, err
   232		}
   233		debug.Printf("parseAddress: not an addr-spec: %v", err)
   234		debug.Printf("parseAddress: state is now %q", *p)
   235	
   236		// display-name
   237		var displayName string
   238		if p.peek() != '<' {
   239			displayName, err = p.consumePhrase()
   240			if err != nil {
   241				return nil, err
   242			}
   243		}
   244		debug.Printf("parseAddress: displayName=%q", displayName)
   245	
   246		// angle-addr = "<" addr-spec ">"
   247		p.skipSpace()
   248		if !p.consume('<') {
   249			return nil, errors.New("mail: no angle-addr")
   250		}
   251		spec, err = p.consumeAddrSpec()
   252		if err != nil {
   253			return nil, err
   254		}
   255		if !p.consume('>') {
   256			return nil, errors.New("mail: unclosed angle-addr")
   257		}
   258		debug.Printf("parseAddress: spec=%q", spec)
   259	
   260		return &Address{
   261			Name:    displayName,
   262			Address: spec,
   263		}, nil
   264	}
   265	
   266	// consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p.
   267	func (p *addrParser) consumeAddrSpec() (spec string, err error) {
   268		debug.Printf("consumeAddrSpec: %q", *p)
   269	
   270		orig := *p
   271		defer func() {
   272			if err != nil {
   273				*p = orig
   274			}
   275		}()
   276	
   277		// local-part = dot-atom / quoted-string
   278		var localPart string
   279		p.skipSpace()
   280		if p.empty() {
   281			return "", errors.New("mail: no addr-spec")
   282		}
   283		if p.peek() == '"' {
   284			// quoted-string
   285			debug.Printf("consumeAddrSpec: parsing quoted-string")
   286			localPart, err = p.consumeQuotedString()
   287		} else {
   288			// dot-atom
   289			debug.Printf("consumeAddrSpec: parsing dot-atom")
   290			localPart, err = p.consumeAtom(true)
   291		}
   292		if err != nil {
   293			debug.Printf("consumeAddrSpec: failed: %v", err)
   294			return "", err
   295		}
   296	
   297		if !p.consume('@') {
   298			return "", errors.New("mail: missing @ in addr-spec")
   299		}
   300	
   301		// domain = dot-atom / domain-literal
   302		var domain string
   303		p.skipSpace()
   304		if p.empty() {
   305			return "", errors.New("mail: no domain in addr-spec")
   306		}
   307		// TODO(dsymonds): Handle domain-literal
   308		domain, err = p.consumeAtom(true)
   309		if err != nil {
   310			return "", err
   311		}
   312	
   313		return localPart + "@" + domain, nil
   314	}
   315	
   316	// consumePhrase parses the RFC 5322 phrase at the start of p.
   317	func (p *addrParser) consumePhrase() (phrase string, err error) {
   318		debug.Printf("consumePhrase: [%s]", *p)
   319		// phrase = 1*word
   320		var words []string
   321		for {
   322			// word = atom / quoted-string
   323			var word string
   324			p.skipSpace()
   325			if p.empty() {
   326				return "", errors.New("mail: missing phrase")
   327			}
   328			if p.peek() == '"' {
   329				// quoted-string
   330				word, err = p.consumeQuotedString()
   331			} else {
   332				// atom
   333				word, err = p.consumeAtom(false)
   334			}
   335	
   336			// RFC 2047 encoded-word starts with =?, ends with ?=, and has two other ?s.
   337			if err == nil && strings.HasPrefix(word, "=?") && strings.HasSuffix(word, "?=") && strings.Count(word, "?") == 4 {
   338				word, err = decodeRFC2047Word(word)
   339			}
   340	
   341			if err != nil {
   342				break
   343			}
   344			debug.Printf("consumePhrase: consumed %q", word)
   345			words = append(words, word)
   346		}
   347		// Ignore any error if we got at least one word.
   348		if err != nil && len(words) == 0 {
   349			debug.Printf("consumePhrase: hit err: %v", err)
   350			return "", errors.New("mail: missing word in phrase")
   351		}
   352		phrase = strings.Join(words, " ")
   353		return phrase, nil
   354	}
   355	
   356	// consumeQuotedString parses the quoted string at the start of p.
   357	func (p *addrParser) consumeQuotedString() (qs string, err error) {
   358		// Assume first byte is '"'.
   359		i := 1
   360		qsb := make([]byte, 0, 10)
   361	Loop:
   362		for {
   363			if i >= p.len() {
   364				return "", errors.New("mail: unclosed quoted-string")
   365			}
   366			switch c := (*p)[i]; {
   367			case c == '"':
   368				break Loop
   369			case c == '\\':
   370				if i+1 == p.len() {
   371					return "", errors.New("mail: unclosed quoted-string")
   372				}
   373				qsb = append(qsb, (*p)[i+1])
   374				i += 2
   375			case isQtext(c), c == ' ' || c == '\t':
   376				// qtext (printable US-ASCII excluding " and \), or
   377				// FWS (almost; we're ignoring CRLF)
   378				qsb = append(qsb, c)
   379				i++
   380			default:
   381				return "", fmt.Errorf("mail: bad character in quoted-string: %q", c)
   382			}
   383		}
   384		*p = (*p)[i+1:]
   385		return string(qsb), nil
   386	}
   387	
   388	// consumeAtom parses an RFC 5322 atom at the start of p.
   389	// If dot is true, consumeAtom parses an RFC 5322 dot-atom instead.
   390	func (p *addrParser) consumeAtom(dot bool) (atom string, err error) {
   391		if !isAtext(p.peek(), false) {
   392			return "", errors.New("mail: invalid string")
   393		}
   394		i := 1
   395		for ; i < p.len() && isAtext((*p)[i], dot); i++ {
   396		}
   397		atom, *p = string((*p)[:i]), (*p)[i:]
   398		return atom, nil
   399	}
   400	
   401	func (p *addrParser) consume(c byte) bool {
   402		if p.empty() || p.peek() != c {
   403			return false
   404		}
   405		*p = (*p)[1:]
   406		return true
   407	}
   408	
   409	// skipSpace skips the leading space and tab characters.
   410	func (p *addrParser) skipSpace() {
   411		*p = bytes.TrimLeft(*p, " \t")
   412	}
   413	
   414	func (p *addrParser) peek() byte {
   415		return (*p)[0]
   416	}
   417	
   418	func (p *addrParser) empty() bool {
   419		return p.len() == 0
   420	}
   421	
   422	func (p *addrParser) len() int {
   423		return len(*p)
   424	}
   425	
   426	func decodeRFC2047Word(s string) (string, error) {
   427		fields := strings.Split(s, "?")
   428		if len(fields) != 5 || fields[0] != "=" || fields[4] != "=" {
   429			return "", errors.New("mail: address not RFC 2047 encoded")
   430		}
   431		charset, enc := strings.ToLower(fields[1]), strings.ToLower(fields[2])
   432		if charset != "iso-8859-1" && charset != "utf-8" {
   433			return "", fmt.Errorf("mail: charset not supported: %q", charset)
   434		}
   435	
   436		in := bytes.NewBufferString(fields[3])
   437		var r io.Reader
   438		switch enc {
   439		case "b":
   440			r = base64.NewDecoder(base64.StdEncoding, in)
   441		case "q":
   442			r = qDecoder{r: in}
   443		default:
   444			return "", fmt.Errorf("mail: RFC 2047 encoding not supported: %q", enc)
   445		}
   446	
   447		dec, err := ioutil.ReadAll(r)
   448		if err != nil {
   449			return "", err
   450		}
   451	
   452		switch charset {
   453		case "iso-8859-1":
   454			b := new(bytes.Buffer)
   455			for _, c := range dec {
   456				b.WriteRune(rune(c))
   457			}
   458			return b.String(), nil
   459		case "utf-8":
   460			return string(dec), nil
   461		}
   462		panic("unreachable")
   463	}
   464	
   465	type qDecoder struct {
   466		r       io.Reader
   467		scratch [2]byte
   468	}
   469	
   470	func (qd qDecoder) Read(p []byte) (n int, err error) {
   471		// This method writes at most one byte into p.
   472		if len(p) == 0 {
   473			return 0, nil
   474		}
   475		if _, err := qd.r.Read(qd.scratch[:1]); err != nil {
   476			return 0, err
   477		}
   478		switch c := qd.scratch[0]; {
   479		case c == '=':
   480			if _, err := io.ReadFull(qd.r, qd.scratch[:2]); err != nil {
   481				return 0, err
   482			}
   483			x, err := strconv.ParseInt(string(qd.scratch[:2]), 16, 64)
   484			if err != nil {
   485				return 0, fmt.Errorf("mail: invalid RFC 2047 encoding: %q", qd.scratch[:2])
   486			}
   487			p[0] = byte(x)
   488		case c == '_':
   489			p[0] = ' '
   490		default:
   491			p[0] = c
   492		}
   493		return 1, nil
   494	}
   495	
   496	var atextChars = []byte("ABCDEFGHIJKLMNOPQRSTUVWXYZ" +
   497		"abcdefghijklmnopqrstuvwxyz" +
   498		"0123456789" +
   499		"!#$%&'*+-/=?^_`{|}~")
   500	
   501	// isAtext returns true if c is an RFC 5322 atext character.
   502	// If dot is true, period is included.
   503	func isAtext(c byte, dot bool) bool {
   504		if dot && c == '.' {
   505			return true
   506		}
   507		return bytes.IndexByte(atextChars, c) >= 0
   508	}
   509	
   510	// isQtext returns true if c is an RFC 5322 qtest character.
   511	func isQtext(c byte) bool {
   512		// Printable US-ASCII, excluding backslash or quote.
   513		if c == '\\' || c == '"' {
   514			return false
   515		}
   516		return '!' <= c && c <= '~'
   517	}
   518	
   519	// isVchar returns true if c is an RFC 5322 VCHAR character.
   520	func isVchar(c byte) bool {
   521		// Visible (printing) characters.
   522		return '!' <= c && c <= '~'
   523	}