Source file src/pkg/net/mail/message.go
1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 /* 6 Package mail implements parsing of mail messages. 7 8 For the most part, this package follows the syntax as specified by RFC 5322. 9 Notable divergences: 10 * Obsolete address formats are not parsed, including addresses with 11 embedded route information. 12 * Group addresses are not parsed. 13 * The full range of spacing (the CFWS syntax element) is not supported, 14 such as breaking addresses across lines. 15 */ 16 package mail 17 18 import ( 19 "bufio" 20 "bytes" 21 "encoding/base64" 22 "errors" 23 "fmt" 24 "io" 25 "io/ioutil" 26 "log" 27 "net/textproto" 28 "strconv" 29 "strings" 30 "time" 31 ) 32 33 var debug = debugT(false) 34 35 type debugT bool 36 37 func (d debugT) Printf(format string, args ...interface{}) { 38 if d { 39 log.Printf(format, args...) 40 } 41 } 42 43 // A Message represents a parsed mail message. 44 type Message struct { 45 Header Header 46 Body io.Reader 47 } 48 49 // ReadMessage reads a message from r. 50 // The headers are parsed, and the body of the message will be reading from r. 51 func ReadMessage(r io.Reader) (msg *Message, err error) { 52 tp := textproto.NewReader(bufio.NewReader(r)) 53 54 hdr, err := tp.ReadMIMEHeader() 55 if err != nil { 56 return nil, err 57 } 58 59 return &Message{ 60 Header: Header(hdr), 61 Body: tp.R, 62 }, nil 63 } 64 65 // Layouts suitable for passing to time.Parse. 66 // These are tried in order. 67 var dateLayouts []string 68 69 func init() { 70 // Generate layouts based on RFC 5322, section 3.3. 71 72 dows := [...]string{"", "Mon, "} // day-of-week 73 days := [...]string{"2", "02"} // day = 1*2DIGIT 74 years := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT 75 seconds := [...]string{":05", ""} // second 76 zones := [...]string{"-0700", "MST"} // zone = (("+" / "-") 4DIGIT) / "GMT" / ... 77 78 for _, dow := range dows { 79 for _, day := range days { 80 for _, year := range years { 81 for _, second := range seconds { 82 for _, zone := range zones { 83 s := dow + day + " Jan " + year + " 15:04" + second + " " + zone 84 dateLayouts = append(dateLayouts, s) 85 } 86 } 87 } 88 } 89 } 90 } 91 92 func parseDate(date string) (time.Time, error) { 93 for _, layout := range dateLayouts { 94 t, err := time.Parse(layout, date) 95 if err == nil { 96 return t, nil 97 } 98 } 99 return time.Time{}, errors.New("mail: header could not be parsed") 100 } 101 102 // A Header represents the key-value pairs in a mail message header. 103 type Header map[string][]string 104 105 // Get gets the first value associated with the given key. 106 // If there are no values associated with the key, Get returns "". 107 func (h Header) Get(key string) string { 108 return textproto.MIMEHeader(h).Get(key) 109 } 110 111 var ErrHeaderNotPresent = errors.New("mail: header not in message") 112 113 // Date parses the Date header field. 114 func (h Header) Date() (time.Time, error) { 115 hdr := h.Get("Date") 116 if hdr == "" { 117 return time.Time{}, ErrHeaderNotPresent 118 } 119 return parseDate(hdr) 120 } 121 122 // AddressList parses the named header field as a list of addresses. 123 func (h Header) AddressList(key string) ([]*Address, error) { 124 hdr := h.Get(key) 125 if hdr == "" { 126 return nil, ErrHeaderNotPresent 127 } 128 return newAddrParser(hdr).parseAddressList() 129 } 130 131 // Address represents a single mail address. 132 // An address such as "Barry Gibbs <[email protected]>" is represented 133 // as Address{Name: "Barry Gibbs", Address: "[email protected]"}. 134 type Address struct { 135 Name string // Proper name; may be empty. 136 Address string // user@domain 137 } 138 139 // String formats the address as a valid RFC 5322 address. 140 // If the address's name contains non-ASCII characters 141 // the name will be rendered according to RFC 2047. 142 func (a *Address) String() string { 143 s := "<" + a.Address + ">" 144 if a.Name == "" { 145 return s 146 } 147 // If every character is printable ASCII, quoting is simple. 148 allPrintable := true 149 for i := 0; i < len(a.Name); i++ { 150 if !isVchar(a.Name[i]) { 151 allPrintable = false 152 break 153 } 154 } 155 if allPrintable { 156 b := bytes.NewBufferString(`"`) 157 for i := 0; i < len(a.Name); i++ { 158 if !isQtext(a.Name[i]) { 159 b.WriteByte('\\') 160 } 161 b.WriteByte(a.Name[i]) 162 } 163 b.WriteString(`" `) 164 b.WriteString(s) 165 return b.String() 166 } 167 168 // UTF-8 "Q" encoding 169 b := bytes.NewBufferString("=?utf-8?q?") 170 for i := 0; i < len(a.Name); i++ { 171 switch c := a.Name[i]; { 172 case c == ' ': 173 b.WriteByte('_') 174 case isVchar(c) && c != '=' && c != '?' && c != '_': 175 b.WriteByte(c) 176 default: 177 fmt.Fprintf(b, "=%02X", c) 178 } 179 } 180 b.WriteString("?= ") 181 b.WriteString(s) 182 return b.String() 183 } 184 185 type addrParser []byte 186 187 func newAddrParser(s string) *addrParser { 188 p := addrParser(s) 189 return &p 190 } 191 192 func (p *addrParser) parseAddressList() ([]*Address, error) { 193 var list []*Address 194 for { 195 p.skipSpace() 196 addr, err := p.parseAddress() 197 if err != nil { 198 return nil, err 199 } 200 list = append(list, addr) 201 202 p.skipSpace() 203 if p.empty() { 204 break 205 } 206 if !p.consume(',') { 207 return nil, errors.New("mail: expected comma") 208 } 209 } 210 return list, nil 211 } 212 213 // parseAddress parses a single RFC 5322 address at the start of p. 214 func (p *addrParser) parseAddress() (addr *Address, err error) { 215 debug.Printf("parseAddress: %q", *p) 216 p.skipSpace() 217 if p.empty() { 218 return nil, errors.New("mail: no address") 219 } 220 221 // address = name-addr / addr-spec 222 // TODO(dsymonds): Support parsing group address. 223 224 // addr-spec has a more restricted grammar than name-addr, 225 // so try parsing it first, and fallback to name-addr. 226 // TODO(dsymonds): Is this really correct? 227 spec, err := p.consumeAddrSpec() 228 if err == nil { 229 return &Address{ 230 Address: spec, 231 }, err 232 } 233 debug.Printf("parseAddress: not an addr-spec: %v", err) 234 debug.Printf("parseAddress: state is now %q", *p) 235 236 // display-name 237 var displayName string 238 if p.peek() != '<' { 239 displayName, err = p.consumePhrase() 240 if err != nil { 241 return nil, err 242 } 243 } 244 debug.Printf("parseAddress: displayName=%q", displayName) 245 246 // angle-addr = "<" addr-spec ">" 247 p.skipSpace() 248 if !p.consume('<') { 249 return nil, errors.New("mail: no angle-addr") 250 } 251 spec, err = p.consumeAddrSpec() 252 if err != nil { 253 return nil, err 254 } 255 if !p.consume('>') { 256 return nil, errors.New("mail: unclosed angle-addr") 257 } 258 debug.Printf("parseAddress: spec=%q", spec) 259 260 return &Address{ 261 Name: displayName, 262 Address: spec, 263 }, nil 264 } 265 266 // consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p. 267 func (p *addrParser) consumeAddrSpec() (spec string, err error) { 268 debug.Printf("consumeAddrSpec: %q", *p) 269 270 orig := *p 271 defer func() { 272 if err != nil { 273 *p = orig 274 } 275 }() 276 277 // local-part = dot-atom / quoted-string 278 var localPart string 279 p.skipSpace() 280 if p.empty() { 281 return "", errors.New("mail: no addr-spec") 282 } 283 if p.peek() == '"' { 284 // quoted-string 285 debug.Printf("consumeAddrSpec: parsing quoted-string") 286 localPart, err = p.consumeQuotedString() 287 } else { 288 // dot-atom 289 debug.Printf("consumeAddrSpec: parsing dot-atom") 290 localPart, err = p.consumeAtom(true) 291 } 292 if err != nil { 293 debug.Printf("consumeAddrSpec: failed: %v", err) 294 return "", err 295 } 296 297 if !p.consume('@') { 298 return "", errors.New("mail: missing @ in addr-spec") 299 } 300 301 // domain = dot-atom / domain-literal 302 var domain string 303 p.skipSpace() 304 if p.empty() { 305 return "", errors.New("mail: no domain in addr-spec") 306 } 307 // TODO(dsymonds): Handle domain-literal 308 domain, err = p.consumeAtom(true) 309 if err != nil { 310 return "", err 311 } 312 313 return localPart + "@" + domain, nil 314 } 315 316 // consumePhrase parses the RFC 5322 phrase at the start of p. 317 func (p *addrParser) consumePhrase() (phrase string, err error) { 318 debug.Printf("consumePhrase: [%s]", *p) 319 // phrase = 1*word 320 var words []string 321 for { 322 // word = atom / quoted-string 323 var word string 324 p.skipSpace() 325 if p.empty() { 326 return "", errors.New("mail: missing phrase") 327 } 328 if p.peek() == '"' { 329 // quoted-string 330 word, err = p.consumeQuotedString() 331 } else { 332 // atom 333 word, err = p.consumeAtom(false) 334 } 335 336 // RFC 2047 encoded-word starts with =?, ends with ?=, and has two other ?s. 337 if err == nil && strings.HasPrefix(word, "=?") && strings.HasSuffix(word, "?=") && strings.Count(word, "?") == 4 { 338 word, err = decodeRFC2047Word(word) 339 } 340 341 if err != nil { 342 break 343 } 344 debug.Printf("consumePhrase: consumed %q", word) 345 words = append(words, word) 346 } 347 // Ignore any error if we got at least one word. 348 if err != nil && len(words) == 0 { 349 debug.Printf("consumePhrase: hit err: %v", err) 350 return "", errors.New("mail: missing word in phrase") 351 } 352 phrase = strings.Join(words, " ") 353 return phrase, nil 354 } 355 356 // consumeQuotedString parses the quoted string at the start of p. 357 func (p *addrParser) consumeQuotedString() (qs string, err error) { 358 // Assume first byte is '"'. 359 i := 1 360 qsb := make([]byte, 0, 10) 361 Loop: 362 for { 363 if i >= p.len() { 364 return "", errors.New("mail: unclosed quoted-string") 365 } 366 switch c := (*p)[i]; { 367 case c == '"': 368 break Loop 369 case c == '\\': 370 if i+1 == p.len() { 371 return "", errors.New("mail: unclosed quoted-string") 372 } 373 qsb = append(qsb, (*p)[i+1]) 374 i += 2 375 case isQtext(c), c == ' ' || c == '\t': 376 // qtext (printable US-ASCII excluding " and \), or 377 // FWS (almost; we're ignoring CRLF) 378 qsb = append(qsb, c) 379 i++ 380 default: 381 return "", fmt.Errorf("mail: bad character in quoted-string: %q", c) 382 } 383 } 384 *p = (*p)[i+1:] 385 return string(qsb), nil 386 } 387 388 // consumeAtom parses an RFC 5322 atom at the start of p. 389 // If dot is true, consumeAtom parses an RFC 5322 dot-atom instead. 390 func (p *addrParser) consumeAtom(dot bool) (atom string, err error) { 391 if !isAtext(p.peek(), false) { 392 return "", errors.New("mail: invalid string") 393 } 394 i := 1 395 for ; i < p.len() && isAtext((*p)[i], dot); i++ { 396 } 397 atom, *p = string((*p)[:i]), (*p)[i:] 398 return atom, nil 399 } 400 401 func (p *addrParser) consume(c byte) bool { 402 if p.empty() || p.peek() != c { 403 return false 404 } 405 *p = (*p)[1:] 406 return true 407 } 408 409 // skipSpace skips the leading space and tab characters. 410 func (p *addrParser) skipSpace() { 411 *p = bytes.TrimLeft(*p, " \t") 412 } 413 414 func (p *addrParser) peek() byte { 415 return (*p)[0] 416 } 417 418 func (p *addrParser) empty() bool { 419 return p.len() == 0 420 } 421 422 func (p *addrParser) len() int { 423 return len(*p) 424 } 425 426 func decodeRFC2047Word(s string) (string, error) { 427 fields := strings.Split(s, "?") 428 if len(fields) != 5 || fields[0] != "=" || fields[4] != "=" { 429 return "", errors.New("mail: address not RFC 2047 encoded") 430 } 431 charset, enc := strings.ToLower(fields[1]), strings.ToLower(fields[2]) 432 if charset != "iso-8859-1" && charset != "utf-8" { 433 return "", fmt.Errorf("mail: charset not supported: %q", charset) 434 } 435 436 in := bytes.NewBufferString(fields[3]) 437 var r io.Reader 438 switch enc { 439 case "b": 440 r = base64.NewDecoder(base64.StdEncoding, in) 441 case "q": 442 r = qDecoder{r: in} 443 default: 444 return "", fmt.Errorf("mail: RFC 2047 encoding not supported: %q", enc) 445 } 446 447 dec, err := ioutil.ReadAll(r) 448 if err != nil { 449 return "", err 450 } 451 452 switch charset { 453 case "iso-8859-1": 454 b := new(bytes.Buffer) 455 for _, c := range dec { 456 b.WriteRune(rune(c)) 457 } 458 return b.String(), nil 459 case "utf-8": 460 return string(dec), nil 461 } 462 panic("unreachable") 463 } 464 465 type qDecoder struct { 466 r io.Reader 467 scratch [2]byte 468 } 469 470 func (qd qDecoder) Read(p []byte) (n int, err error) { 471 // This method writes at most one byte into p. 472 if len(p) == 0 { 473 return 0, nil 474 } 475 if _, err := qd.r.Read(qd.scratch[:1]); err != nil { 476 return 0, err 477 } 478 switch c := qd.scratch[0]; { 479 case c == '=': 480 if _, err := io.ReadFull(qd.r, qd.scratch[:2]); err != nil { 481 return 0, err 482 } 483 x, err := strconv.ParseInt(string(qd.scratch[:2]), 16, 64) 484 if err != nil { 485 return 0, fmt.Errorf("mail: invalid RFC 2047 encoding: %q", qd.scratch[:2]) 486 } 487 p[0] = byte(x) 488 case c == '_': 489 p[0] = ' ' 490 default: 491 p[0] = c 492 } 493 return 1, nil 494 } 495 496 var atextChars = []byte("ABCDEFGHIJKLMNOPQRSTUVWXYZ" + 497 "abcdefghijklmnopqrstuvwxyz" + 498 "0123456789" + 499 "!#$%&'*+-/=?^_`{|}~") 500 501 // isAtext returns true if c is an RFC 5322 atext character. 502 // If dot is true, period is included. 503 func isAtext(c byte, dot bool) bool { 504 if dot && c == '.' { 505 return true 506 } 507 return bytes.IndexByte(atextChars, c) >= 0 508 } 509 510 // isQtext returns true if c is an RFC 5322 qtest character. 511 func isQtext(c byte) bool { 512 // Printable US-ASCII, excluding backslash or quote. 513 if c == '\\' || c == '"' { 514 return false 515 } 516 return '!' <= c && c <= '~' 517 } 518 519 // isVchar returns true if c is an RFC 5322 VCHAR character. 520 func isVchar(c byte) bool { 521 // Visible (printing) characters. 522 return '!' <= c && c <= '~' 523 }