Source file src/pkg/net/textproto/reader.go
1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package textproto 6 7 import ( 8 "bufio" 9 "bytes" 10 "io" 11 "io/ioutil" 12 "strconv" 13 "strings" 14 ) 15 16 // BUG(rsc): To let callers manage exposure to denial of service 17 // attacks, Reader should allow them to set and reset a limit on 18 // the number of bytes read from the connection. 19 20 // A Reader implements convenience methods for reading requests 21 // or responses from a text protocol network connection. 22 type Reader struct { 23 R *bufio.Reader 24 dot *dotReader 25 buf []byte // a re-usable buffer for readContinuedLineSlice 26 } 27 28 // NewReader returns a new Reader reading from r. 29 func NewReader(r *bufio.Reader) *Reader { 30 return &Reader{R: r} 31 } 32 33 // ReadLine reads a single line from r, 34 // eliding the final \n or \r\n from the returned string. 35 func (r *Reader) ReadLine() (string, error) { 36 line, err := r.readLineSlice() 37 return string(line), err 38 } 39 40 // ReadLineBytes is like ReadLine but returns a []byte instead of a string. 41 func (r *Reader) ReadLineBytes() ([]byte, error) { 42 line, err := r.readLineSlice() 43 if line != nil { 44 buf := make([]byte, len(line)) 45 copy(buf, line) 46 line = buf 47 } 48 return line, err 49 } 50 51 func (r *Reader) readLineSlice() ([]byte, error) { 52 r.closeDot() 53 var line []byte 54 for { 55 l, more, err := r.R.ReadLine() 56 if err != nil { 57 return nil, err 58 } 59 // Avoid the copy if the first call produced a full line. 60 if line == nil && !more { 61 return l, nil 62 } 63 line = append(line, l...) 64 if !more { 65 break 66 } 67 } 68 return line, nil 69 } 70 71 // ReadContinuedLine reads a possibly continued line from r, 72 // eliding the final trailing ASCII white space. 73 // Lines after the first are considered continuations if they 74 // begin with a space or tab character. In the returned data, 75 // continuation lines are separated from the previous line 76 // only by a single space: the newline and leading white space 77 // are removed. 78 // 79 // For example, consider this input: 80 // 81 // Line 1 82 // continued... 83 // Line 2 84 // 85 // The first call to ReadContinuedLine will return "Line 1 continued..." 86 // and the second will return "Line 2". 87 // 88 // A line consisting of only white space is never continued. 89 // 90 func (r *Reader) ReadContinuedLine() (string, error) { 91 line, err := r.readContinuedLineSlice() 92 return string(line), err 93 } 94 95 // trim returns s with leading and trailing spaces and tabs removed. 96 // It does not assume Unicode or UTF-8. 97 func trim(s []byte) []byte { 98 i := 0 99 for i < len(s) && (s[i] == ' ' || s[i] == '\t') { 100 i++ 101 } 102 n := len(s) 103 for n > i && (s[n-1] == ' ' || s[n-1] == '\t') { 104 n-- 105 } 106 return s[i:n] 107 } 108 109 // ReadContinuedLineBytes is like ReadContinuedLine but 110 // returns a []byte instead of a string. 111 func (r *Reader) ReadContinuedLineBytes() ([]byte, error) { 112 line, err := r.readContinuedLineSlice() 113 if line != nil { 114 buf := make([]byte, len(line)) 115 copy(buf, line) 116 line = buf 117 } 118 return line, err 119 } 120 121 func (r *Reader) readContinuedLineSlice() ([]byte, error) { 122 // Read the first line. 123 line, err := r.readLineSlice() 124 if err != nil { 125 return nil, err 126 } 127 if len(line) == 0 { // blank line - no continuation 128 return line, nil 129 } 130 131 // ReadByte or the next readLineSlice will flush the read buffer; 132 // copy the slice into buf. 133 r.buf = append(r.buf[:0], trim(line)...) 134 135 // Read continuation lines. 136 for r.skipSpace() > 0 { 137 line, err := r.readLineSlice() 138 if err != nil { 139 break 140 } 141 r.buf = append(r.buf, ' ') 142 r.buf = append(r.buf, line...) 143 } 144 return r.buf, nil 145 } 146 147 // skipSpace skips R over all spaces and returns the number of bytes skipped. 148 func (r *Reader) skipSpace() int { 149 n := 0 150 for { 151 c, err := r.R.ReadByte() 152 if err != nil { 153 // Bufio will keep err until next read. 154 break 155 } 156 if c != ' ' && c != '\t' { 157 r.R.UnreadByte() 158 break 159 } 160 n++ 161 } 162 return n 163 } 164 165 func (r *Reader) readCodeLine(expectCode int) (code int, continued bool, message string, err error) { 166 line, err := r.ReadLine() 167 if err != nil { 168 return 169 } 170 return parseCodeLine(line, expectCode) 171 } 172 173 func parseCodeLine(line string, expectCode int) (code int, continued bool, message string, err error) { 174 if len(line) < 4 || line[3] != ' ' && line[3] != '-' { 175 err = ProtocolError("short response: " + line) 176 return 177 } 178 continued = line[3] == '-' 179 code, err = strconv.Atoi(line[0:3]) 180 if err != nil || code < 100 { 181 err = ProtocolError("invalid response code: " + line) 182 return 183 } 184 message = line[4:] 185 if 1 <= expectCode && expectCode < 10 && code/100 != expectCode || 186 10 <= expectCode && expectCode < 100 && code/10 != expectCode || 187 100 <= expectCode && expectCode < 1000 && code != expectCode { 188 err = &Error{code, message} 189 } 190 return 191 } 192 193 // ReadCodeLine reads a response code line of the form 194 // code message 195 // where code is a 3-digit status code and the message 196 // extends to the rest of the line. An example of such a line is: 197 // 220 plan9.bell-labs.com ESMTP 198 // 199 // If the prefix of the status does not match the digits in expectCode, 200 // ReadCodeLine returns with err set to &Error{code, message}. 201 // For example, if expectCode is 31, an error will be returned if 202 // the status is not in the range [310,319]. 203 // 204 // If the response is multi-line, ReadCodeLine returns an error. 205 // 206 // An expectCode <= 0 disables the check of the status code. 207 // 208 func (r *Reader) ReadCodeLine(expectCode int) (code int, message string, err error) { 209 code, continued, message, err := r.readCodeLine(expectCode) 210 if err == nil && continued { 211 err = ProtocolError("unexpected multi-line response: " + message) 212 } 213 return 214 } 215 216 // ReadResponse reads a multi-line response of the form: 217 // 218 // code-message line 1 219 // code-message line 2 220 // ... 221 // code message line n 222 // 223 // where code is a 3-digit status code. The first line starts with the 224 // code and a hyphen. The response is terminated by a line that starts 225 // with the same code followed by a space. Each line in message is 226 // separated by a newline (\n). 227 // 228 // See page 36 of RFC 959 (http://www.ietf.org/rfc/rfc959.txt) for 229 // details. 230 // 231 // If the prefix of the status does not match the digits in expectCode, 232 // ReadResponse returns with err set to &Error{code, message}. 233 // For example, if expectCode is 31, an error will be returned if 234 // the status is not in the range [310,319]. 235 // 236 // An expectCode <= 0 disables the check of the status code. 237 // 238 func (r *Reader) ReadResponse(expectCode int) (code int, message string, err error) { 239 code, continued, message, err := r.readCodeLine(expectCode) 240 for err == nil && continued { 241 line, err := r.ReadLine() 242 if err != nil { 243 return 0, "", err 244 } 245 246 var code2 int 247 var moreMessage string 248 code2, continued, moreMessage, err = parseCodeLine(line, expectCode) 249 if err != nil || code2 != code { 250 message += "\n" + strings.TrimRight(line, "\r\n") 251 continued = true 252 continue 253 } 254 message += "\n" + moreMessage 255 } 256 return 257 } 258 259 // DotReader returns a new Reader that satisfies Reads using the 260 // decoded text of a dot-encoded block read from r. 261 // The returned Reader is only valid until the next call 262 // to a method on r. 263 // 264 // Dot encoding is a common framing used for data blocks 265 // in text protocols such as SMTP. The data consists of a sequence 266 // of lines, each of which ends in "\r\n". The sequence itself 267 // ends at a line containing just a dot: ".\r\n". Lines beginning 268 // with a dot are escaped with an additional dot to avoid 269 // looking like the end of the sequence. 270 // 271 // The decoded form returned by the Reader's Read method 272 // rewrites the "\r\n" line endings into the simpler "\n", 273 // removes leading dot escapes if present, and stops with error io.EOF 274 // after consuming (and discarding) the end-of-sequence line. 275 func (r *Reader) DotReader() io.Reader { 276 r.closeDot() 277 r.dot = &dotReader{r: r} 278 return r.dot 279 } 280 281 type dotReader struct { 282 r *Reader 283 state int 284 } 285 286 // Read satisfies reads by decoding dot-encoded data read from d.r. 287 func (d *dotReader) Read(b []byte) (n int, err error) { 288 // Run data through a simple state machine to 289 // elide leading dots, rewrite trailing \r\n into \n, 290 // and detect ending .\r\n line. 291 const ( 292 stateBeginLine = iota // beginning of line; initial state; must be zero 293 stateDot // read . at beginning of line 294 stateDotCR // read .\r at beginning of line 295 stateCR // read \r (possibly at end of line) 296 stateData // reading data in middle of line 297 stateEOF // reached .\r\n end marker line 298 ) 299 br := d.r.R 300 for n < len(b) && d.state != stateEOF { 301 var c byte 302 c, err = br.ReadByte() 303 if err != nil { 304 if err == io.EOF { 305 err = io.ErrUnexpectedEOF 306 } 307 break 308 } 309 switch d.state { 310 case stateBeginLine: 311 if c == '.' { 312 d.state = stateDot 313 continue 314 } 315 if c == '\r' { 316 d.state = stateCR 317 continue 318 } 319 d.state = stateData 320 321 case stateDot: 322 if c == '\r' { 323 d.state = stateDotCR 324 continue 325 } 326 if c == '\n' { 327 d.state = stateEOF 328 continue 329 } 330 d.state = stateData 331 332 case stateDotCR: 333 if c == '\n' { 334 d.state = stateEOF 335 continue 336 } 337 // Not part of .\r\n. 338 // Consume leading dot and emit saved \r. 339 br.UnreadByte() 340 c = '\r' 341 d.state = stateData 342 343 case stateCR: 344 if c == '\n' { 345 d.state = stateBeginLine 346 break 347 } 348 // Not part of \r\n. Emit saved \r 349 br.UnreadByte() 350 c = '\r' 351 d.state = stateData 352 353 case stateData: 354 if c == '\r' { 355 d.state = stateCR 356 continue 357 } 358 if c == '\n' { 359 d.state = stateBeginLine 360 } 361 } 362 b[n] = c 363 n++ 364 } 365 if err == nil && d.state == stateEOF { 366 err = io.EOF 367 } 368 if err != nil && d.r.dot == d { 369 d.r.dot = nil 370 } 371 return 372 } 373 374 // closeDot drains the current DotReader if any, 375 // making sure that it reads until the ending dot line. 376 func (r *Reader) closeDot() { 377 if r.dot == nil { 378 return 379 } 380 buf := make([]byte, 128) 381 for r.dot != nil { 382 // When Read reaches EOF or an error, 383 // it will set r.dot == nil. 384 r.dot.Read(buf) 385 } 386 } 387 388 // ReadDotBytes reads a dot-encoding and returns the decoded data. 389 // 390 // See the documentation for the DotReader method for details about dot-encoding. 391 func (r *Reader) ReadDotBytes() ([]byte, error) { 392 return ioutil.ReadAll(r.DotReader()) 393 } 394 395 // ReadDotLines reads a dot-encoding and returns a slice 396 // containing the decoded lines, with the final \r\n or \n elided from each. 397 // 398 // See the documentation for the DotReader method for details about dot-encoding. 399 func (r *Reader) ReadDotLines() ([]string, error) { 400 // We could use ReadDotBytes and then Split it, 401 // but reading a line at a time avoids needing a 402 // large contiguous block of memory and is simpler. 403 var v []string 404 var err error 405 for { 406 var line string 407 line, err = r.ReadLine() 408 if err != nil { 409 if err == io.EOF { 410 err = io.ErrUnexpectedEOF 411 } 412 break 413 } 414 415 // Dot by itself marks end; otherwise cut one dot. 416 if len(line) > 0 && line[0] == '.' { 417 if len(line) == 1 { 418 break 419 } 420 line = line[1:] 421 } 422 v = append(v, line) 423 } 424 return v, err 425 } 426 427 // ReadMIMEHeader reads a MIME-style header from r. 428 // The header is a sequence of possibly continued Key: Value lines 429 // ending in a blank line. 430 // The returned map m maps CanonicalMIMEHeaderKey(key) to a 431 // sequence of values in the same order encountered in the input. 432 // 433 // For example, consider this input: 434 // 435 // My-Key: Value 1 436 // Long-Key: Even 437 // Longer Value 438 // My-Key: Value 2 439 // 440 // Given that input, ReadMIMEHeader returns the map: 441 // 442 // map[string][]string{ 443 // "My-Key": {"Value 1", "Value 2"}, 444 // "Long-Key": {"Even Longer Value"}, 445 // } 446 // 447 func (r *Reader) ReadMIMEHeader() (MIMEHeader, error) { 448 m := make(MIMEHeader) 449 for { 450 kv, err := r.readContinuedLineSlice() 451 if len(kv) == 0 { 452 return m, err 453 } 454 455 // Key ends at first colon; must not have spaces. 456 i := bytes.IndexByte(kv, ':') 457 if i < 0 { 458 return m, ProtocolError("malformed MIME header line: " + string(kv)) 459 } 460 key := string(kv[0:i]) 461 if strings.Index(key, " ") >= 0 { 462 key = strings.TrimRight(key, " ") 463 } 464 key = CanonicalMIMEHeaderKey(key) 465 466 // Skip initial spaces in value. 467 i++ // skip colon 468 for i < len(kv) && (kv[i] == ' ' || kv[i] == '\t') { 469 i++ 470 } 471 value := string(kv[i:]) 472 473 m[key] = append(m[key], value) 474 475 if err != nil { 476 return m, err 477 } 478 } 479 panic("unreachable") 480 } 481 482 // CanonicalMIMEHeaderKey returns the canonical format of the 483 // MIME header key s. The canonicalization converts the first 484 // letter and any letter following a hyphen to upper case; 485 // the rest are converted to lowercase. For example, the 486 // canonical key for "accept-encoding" is "Accept-Encoding". 487 func CanonicalMIMEHeaderKey(s string) string { 488 // Quick check for canonical encoding. 489 needUpper := true 490 for i := 0; i < len(s); i++ { 491 c := s[i] 492 if needUpper && 'a' <= c && c <= 'z' { 493 goto MustRewrite 494 } 495 if !needUpper && 'A' <= c && c <= 'Z' { 496 goto MustRewrite 497 } 498 needUpper = c == '-' 499 } 500 return s 501 502 MustRewrite: 503 // Canonicalize: first letter upper case 504 // and upper case after each dash. 505 // (Host, User-Agent, If-Modified-Since). 506 // MIME headers are ASCII only, so no Unicode issues. 507 a := []byte(s) 508 upper := true 509 for i, v := range a { 510 if v == ' ' { 511 a[i] = '-' 512 upper = true 513 continue 514 } 515 if upper && 'a' <= v && v <= 'z' { 516 a[i] = v + 'A' - 'a' 517 } 518 if !upper && 'A' <= v && v <= 'Z' { 519 a[i] = v + 'a' - 'A' 520 } 521 upper = v == '-' 522 } 523 return string(a) 524 }