Source file src/pkg/net/textproto/reader.go
1 // Copyright 2010 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package textproto
6
7 import (
8 "bufio"
9 "bytes"
10 "io"
11 "io/ioutil"
12 "strconv"
13 "strings"
14 )
15
16 // BUG(rsc): To let callers manage exposure to denial of service
17 // attacks, Reader should allow them to set and reset a limit on
18 // the number of bytes read from the connection.
19
20 // A Reader implements convenience methods for reading requests
21 // or responses from a text protocol network connection.
22 type Reader struct {
23 R *bufio.Reader
24 dot *dotReader
25 buf []byte // a re-usable buffer for readContinuedLineSlice
26 }
27
28 // NewReader returns a new Reader reading from r.
29 func NewReader(r *bufio.Reader) *Reader {
30 return &Reader{R: r}
31 }
32
33 // ReadLine reads a single line from r,
34 // eliding the final \n or \r\n from the returned string.
35 func (r *Reader) ReadLine() (string, error) {
36 line, err := r.readLineSlice()
37 return string(line), err
38 }
39
40 // ReadLineBytes is like ReadLine but returns a []byte instead of a string.
41 func (r *Reader) ReadLineBytes() ([]byte, error) {
42 line, err := r.readLineSlice()
43 if line != nil {
44 buf := make([]byte, len(line))
45 copy(buf, line)
46 line = buf
47 }
48 return line, err
49 }
50
51 func (r *Reader) readLineSlice() ([]byte, error) {
52 r.closeDot()
53 var line []byte
54 for {
55 l, more, err := r.R.ReadLine()
56 if err != nil {
57 return nil, err
58 }
59 // Avoid the copy if the first call produced a full line.
60 if line == nil && !more {
61 return l, nil
62 }
63 line = append(line, l...)
64 if !more {
65 break
66 }
67 }
68 return line, nil
69 }
70
71 // ReadContinuedLine reads a possibly continued line from r,
72 // eliding the final trailing ASCII white space.
73 // Lines after the first are considered continuations if they
74 // begin with a space or tab character. In the returned data,
75 // continuation lines are separated from the previous line
76 // only by a single space: the newline and leading white space
77 // are removed.
78 //
79 // For example, consider this input:
80 //
81 // Line 1
82 // continued...
83 // Line 2
84 //
85 // The first call to ReadContinuedLine will return "Line 1 continued..."
86 // and the second will return "Line 2".
87 //
88 // A line consisting of only white space is never continued.
89 //
90 func (r *Reader) ReadContinuedLine() (string, error) {
91 line, err := r.readContinuedLineSlice()
92 return string(line), err
93 }
94
95 // trim returns s with leading and trailing spaces and tabs removed.
96 // It does not assume Unicode or UTF-8.
97 func trim(s []byte) []byte {
98 i := 0
99 for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
100 i++
101 }
102 n := len(s)
103 for n > i && (s[n-1] == ' ' || s[n-1] == '\t') {
104 n--
105 }
106 return s[i:n]
107 }
108
109 // ReadContinuedLineBytes is like ReadContinuedLine but
110 // returns a []byte instead of a string.
111 func (r *Reader) ReadContinuedLineBytes() ([]byte, error) {
112 line, err := r.readContinuedLineSlice()
113 if line != nil {
114 buf := make([]byte, len(line))
115 copy(buf, line)
116 line = buf
117 }
118 return line, err
119 }
120
121 func (r *Reader) readContinuedLineSlice() ([]byte, error) {
122 // Read the first line.
123 line, err := r.readLineSlice()
124 if err != nil {
125 return nil, err
126 }
127 if len(line) == 0 { // blank line - no continuation
128 return line, nil
129 }
130
131 // ReadByte or the next readLineSlice will flush the read buffer;
132 // copy the slice into buf.
133 r.buf = append(r.buf[:0], trim(line)...)
134
135 // Read continuation lines.
136 for r.skipSpace() > 0 {
137 line, err := r.readLineSlice()
138 if err != nil {
139 break
140 }
141 r.buf = append(r.buf, ' ')
142 r.buf = append(r.buf, line...)
143 }
144 return r.buf, nil
145 }
146
147 // skipSpace skips R over all spaces and returns the number of bytes skipped.
148 func (r *Reader) skipSpace() int {
149 n := 0
150 for {
151 c, err := r.R.ReadByte()
152 if err != nil {
153 // Bufio will keep err until next read.
154 break
155 }
156 if c != ' ' && c != '\t' {
157 r.R.UnreadByte()
158 break
159 }
160 n++
161 }
162 return n
163 }
164
165 func (r *Reader) readCodeLine(expectCode int) (code int, continued bool, message string, err error) {
166 line, err := r.ReadLine()
167 if err != nil {
168 return
169 }
170 return parseCodeLine(line, expectCode)
171 }
172
173 func parseCodeLine(line string, expectCode int) (code int, continued bool, message string, err error) {
174 if len(line) < 4 || line[3] != ' ' && line[3] != '-' {
175 err = ProtocolError("short response: " + line)
176 return
177 }
178 continued = line[3] == '-'
179 code, err = strconv.Atoi(line[0:3])
180 if err != nil || code < 100 {
181 err = ProtocolError("invalid response code: " + line)
182 return
183 }
184 message = line[4:]
185 if 1 <= expectCode && expectCode < 10 && code/100 != expectCode ||
186 10 <= expectCode && expectCode < 100 && code/10 != expectCode ||
187 100 <= expectCode && expectCode < 1000 && code != expectCode {
188 err = &Error{code, message}
189 }
190 return
191 }
192
193 // ReadCodeLine reads a response code line of the form
194 // code message
195 // where code is a 3-digit status code and the message
196 // extends to the rest of the line. An example of such a line is:
197 // 220 plan9.bell-labs.com ESMTP
198 //
199 // If the prefix of the status does not match the digits in expectCode,
200 // ReadCodeLine returns with err set to &Error{code, message}.
201 // For example, if expectCode is 31, an error will be returned if
202 // the status is not in the range [310,319].
203 //
204 // If the response is multi-line, ReadCodeLine returns an error.
205 //
206 // An expectCode <= 0 disables the check of the status code.
207 //
208 func (r *Reader) ReadCodeLine(expectCode int) (code int, message string, err error) {
209 code, continued, message, err := r.readCodeLine(expectCode)
210 if err == nil && continued {
211 err = ProtocolError("unexpected multi-line response: " + message)
212 }
213 return
214 }
215
216 // ReadResponse reads a multi-line response of the form:
217 //
218 // code-message line 1
219 // code-message line 2
220 // ...
221 // code message line n
222 //
223 // where code is a 3-digit status code. The first line starts with the
224 // code and a hyphen. The response is terminated by a line that starts
225 // with the same code followed by a space. Each line in message is
226 // separated by a newline (\n).
227 //
228 // See page 36 of RFC 959 (http://www.ietf.org/rfc/rfc959.txt) for
229 // details.
230 //
231 // If the prefix of the status does not match the digits in expectCode,
232 // ReadResponse returns with err set to &Error{code, message}.
233 // For example, if expectCode is 31, an error will be returned if
234 // the status is not in the range [310,319].
235 //
236 // An expectCode <= 0 disables the check of the status code.
237 //
238 func (r *Reader) ReadResponse(expectCode int) (code int, message string, err error) {
239 code, continued, message, err := r.readCodeLine(expectCode)
240 for err == nil && continued {
241 line, err := r.ReadLine()
242 if err != nil {
243 return 0, "", err
244 }
245
246 var code2 int
247 var moreMessage string
248 code2, continued, moreMessage, err = parseCodeLine(line, expectCode)
249 if err != nil || code2 != code {
250 message += "\n" + strings.TrimRight(line, "\r\n")
251 continued = true
252 continue
253 }
254 message += "\n" + moreMessage
255 }
256 return
257 }
258
259 // DotReader returns a new Reader that satisfies Reads using the
260 // decoded text of a dot-encoded block read from r.
261 // The returned Reader is only valid until the next call
262 // to a method on r.
263 //
264 // Dot encoding is a common framing used for data blocks
265 // in text protocols such as SMTP. The data consists of a sequence
266 // of lines, each of which ends in "\r\n". The sequence itself
267 // ends at a line containing just a dot: ".\r\n". Lines beginning
268 // with a dot are escaped with an additional dot to avoid
269 // looking like the end of the sequence.
270 //
271 // The decoded form returned by the Reader's Read method
272 // rewrites the "\r\n" line endings into the simpler "\n",
273 // removes leading dot escapes if present, and stops with error io.EOF
274 // after consuming (and discarding) the end-of-sequence line.
275 func (r *Reader) DotReader() io.Reader {
276 r.closeDot()
277 r.dot = &dotReader{r: r}
278 return r.dot
279 }
280
281 type dotReader struct {
282 r *Reader
283 state int
284 }
285
286 // Read satisfies reads by decoding dot-encoded data read from d.r.
287 func (d *dotReader) Read(b []byte) (n int, err error) {
288 // Run data through a simple state machine to
289 // elide leading dots, rewrite trailing \r\n into \n,
290 // and detect ending .\r\n line.
291 const (
292 stateBeginLine = iota // beginning of line; initial state; must be zero
293 stateDot // read . at beginning of line
294 stateDotCR // read .\r at beginning of line
295 stateCR // read \r (possibly at end of line)
296 stateData // reading data in middle of line
297 stateEOF // reached .\r\n end marker line
298 )
299 br := d.r.R
300 for n < len(b) && d.state != stateEOF {
301 var c byte
302 c, err = br.ReadByte()
303 if err != nil {
304 if err == io.EOF {
305 err = io.ErrUnexpectedEOF
306 }
307 break
308 }
309 switch d.state {
310 case stateBeginLine:
311 if c == '.' {
312 d.state = stateDot
313 continue
314 }
315 if c == '\r' {
316 d.state = stateCR
317 continue
318 }
319 d.state = stateData
320
321 case stateDot:
322 if c == '\r' {
323 d.state = stateDotCR
324 continue
325 }
326 if c == '\n' {
327 d.state = stateEOF
328 continue
329 }
330 d.state = stateData
331
332 case stateDotCR:
333 if c == '\n' {
334 d.state = stateEOF
335 continue
336 }
337 // Not part of .\r\n.
338 // Consume leading dot and emit saved \r.
339 br.UnreadByte()
340 c = '\r'
341 d.state = stateData
342
343 case stateCR:
344 if c == '\n' {
345 d.state = stateBeginLine
346 break
347 }
348 // Not part of \r\n. Emit saved \r
349 br.UnreadByte()
350 c = '\r'
351 d.state = stateData
352
353 case stateData:
354 if c == '\r' {
355 d.state = stateCR
356 continue
357 }
358 if c == '\n' {
359 d.state = stateBeginLine
360 }
361 }
362 b[n] = c
363 n++
364 }
365 if err == nil && d.state == stateEOF {
366 err = io.EOF
367 }
368 if err != nil && d.r.dot == d {
369 d.r.dot = nil
370 }
371 return
372 }
373
374 // closeDot drains the current DotReader if any,
375 // making sure that it reads until the ending dot line.
376 func (r *Reader) closeDot() {
377 if r.dot == nil {
378 return
379 }
380 buf := make([]byte, 128)
381 for r.dot != nil {
382 // When Read reaches EOF or an error,
383 // it will set r.dot == nil.
384 r.dot.Read(buf)
385 }
386 }
387
388 // ReadDotBytes reads a dot-encoding and returns the decoded data.
389 //
390 // See the documentation for the DotReader method for details about dot-encoding.
391 func (r *Reader) ReadDotBytes() ([]byte, error) {
392 return ioutil.ReadAll(r.DotReader())
393 }
394
395 // ReadDotLines reads a dot-encoding and returns a slice
396 // containing the decoded lines, with the final \r\n or \n elided from each.
397 //
398 // See the documentation for the DotReader method for details about dot-encoding.
399 func (r *Reader) ReadDotLines() ([]string, error) {
400 // We could use ReadDotBytes and then Split it,
401 // but reading a line at a time avoids needing a
402 // large contiguous block of memory and is simpler.
403 var v []string
404 var err error
405 for {
406 var line string
407 line, err = r.ReadLine()
408 if err != nil {
409 if err == io.EOF {
410 err = io.ErrUnexpectedEOF
411 }
412 break
413 }
414
415 // Dot by itself marks end; otherwise cut one dot.
416 if len(line) > 0 && line[0] == '.' {
417 if len(line) == 1 {
418 break
419 }
420 line = line[1:]
421 }
422 v = append(v, line)
423 }
424 return v, err
425 }
426
427 // ReadMIMEHeader reads a MIME-style header from r.
428 // The header is a sequence of possibly continued Key: Value lines
429 // ending in a blank line.
430 // The returned map m maps CanonicalMIMEHeaderKey(key) to a
431 // sequence of values in the same order encountered in the input.
432 //
433 // For example, consider this input:
434 //
435 // My-Key: Value 1
436 // Long-Key: Even
437 // Longer Value
438 // My-Key: Value 2
439 //
440 // Given that input, ReadMIMEHeader returns the map:
441 //
442 // map[string][]string{
443 // "My-Key": {"Value 1", "Value 2"},
444 // "Long-Key": {"Even Longer Value"},
445 // }
446 //
447 func (r *Reader) ReadMIMEHeader() (MIMEHeader, error) {
448 m := make(MIMEHeader)
449 for {
450 kv, err := r.readContinuedLineSlice()
451 if len(kv) == 0 {
452 return m, err
453 }
454
455 // Key ends at first colon; must not have spaces.
456 i := bytes.IndexByte(kv, ':')
457 if i < 0 {
458 return m, ProtocolError("malformed MIME header line: " + string(kv))
459 }
460 key := string(kv[0:i])
461 if strings.Index(key, " ") >= 0 {
462 key = strings.TrimRight(key, " ")
463 }
464 key = CanonicalMIMEHeaderKey(key)
465
466 // Skip initial spaces in value.
467 i++ // skip colon
468 for i < len(kv) && (kv[i] == ' ' || kv[i] == '\t') {
469 i++
470 }
471 value := string(kv[i:])
472
473 m[key] = append(m[key], value)
474
475 if err != nil {
476 return m, err
477 }
478 }
479 panic("unreachable")
480 }
481
482 // CanonicalMIMEHeaderKey returns the canonical format of the
483 // MIME header key s. The canonicalization converts the first
484 // letter and any letter following a hyphen to upper case;
485 // the rest are converted to lowercase. For example, the
486 // canonical key for "accept-encoding" is "Accept-Encoding".
487 func CanonicalMIMEHeaderKey(s string) string {
488 // Quick check for canonical encoding.
489 needUpper := true
490 for i := 0; i < len(s); i++ {
491 c := s[i]
492 if needUpper && 'a' <= c && c <= 'z' {
493 goto MustRewrite
494 }
495 if !needUpper && 'A' <= c && c <= 'Z' {
496 goto MustRewrite
497 }
498 needUpper = c == '-'
499 }
500 return s
501
502 MustRewrite:
503 // Canonicalize: first letter upper case
504 // and upper case after each dash.
505 // (Host, User-Agent, If-Modified-Since).
506 // MIME headers are ASCII only, so no Unicode issues.
507 a := []byte(s)
508 upper := true
509 for i, v := range a {
510 if v == ' ' {
511 a[i] = '-'
512 upper = true
513 continue
514 }
515 if upper && 'a' <= v && v <= 'z' {
516 a[i] = v + 'A' - 'a'
517 }
518 if !upper && 'A' <= v && v <= 'Z' {
519 a[i] = v + 'a' - 'A'
520 }
521 upper = v == '-'
522 }
523 return string(a)
524 }