Source file src/pkg/mime/mediatype.go
1 // Copyright 2010 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package mime
6
7 import (
8 "bytes"
9 "errors"
10 "fmt"
11 "strings"
12 "unicode"
13 )
14
15 // FormatMediaType serializes mediatype t and the parameters
16 // param as a media type conforming to RFC 2045 and RFC 2616.
17 // The type and parameter names are written in lower-case.
18 // When any of the arguments result in a standard violation then
19 // FormatMediaType returns the empty string.
20 func FormatMediaType(t string, param map[string]string) string {
21 slash := strings.Index(t, "/")
22 if slash == -1 {
23 return ""
24 }
25 major, sub := t[:slash], t[slash+1:]
26 if !isToken(major) || !isToken(sub) {
27 return ""
28 }
29 var b bytes.Buffer
30 b.WriteString(strings.ToLower(major))
31 b.WriteByte('/')
32 b.WriteString(strings.ToLower(sub))
33
34 for attribute, value := range param {
35 b.WriteByte(';')
36 b.WriteByte(' ')
37 if !isToken(attribute) {
38 return ""
39 }
40 b.WriteString(strings.ToLower(attribute))
41 b.WriteByte('=')
42 if isToken(value) {
43 b.WriteString(value)
44 continue
45 }
46
47 b.WriteByte('"')
48 offset := 0
49 for index, character := range value {
50 if character == '"' || character == '\r' {
51 b.WriteString(value[offset:index])
52 offset = index
53 b.WriteByte('\\')
54 }
55 if character&0x80 != 0 {
56 return ""
57 }
58 }
59 b.WriteString(value[offset:])
60 b.WriteByte('"')
61 }
62 return b.String()
63 }
64
65 func checkMediaTypeDisposition(s string) error {
66 typ, rest := consumeToken(s)
67 if typ == "" {
68 return errors.New("mime: no media type")
69 }
70 if rest == "" {
71 return nil
72 }
73 if !strings.HasPrefix(rest, "/") {
74 return errors.New("mime: expected slash after first token")
75 }
76 subtype, rest := consumeToken(rest[1:])
77 if subtype == "" {
78 return errors.New("mime: expected token after slash")
79 }
80 if rest != "" {
81 return errors.New("mime: unexpected content after media subtype")
82 }
83 return nil
84 }
85
86 // ParseMediaType parses a media type value and any optional
87 // parameters, per RFC 1521. Media types are the values in
88 // Content-Type and Content-Disposition headers (RFC 2183).
89 // On success, ParseMediaType returns the media type converted
90 // to lowercase and trimmed of white space and a non-nil map.
91 // The returned map, params, maps from the lowercase
92 // attribute to the attribute value with its case preserved.
93 func ParseMediaType(v string) (mediatype string, params map[string]string, err error) {
94 i := strings.Index(v, ";")
95 if i == -1 {
96 i = len(v)
97 }
98 mediatype = strings.TrimSpace(strings.ToLower(v[0:i]))
99
100 err = checkMediaTypeDisposition(mediatype)
101 if err != nil {
102 return "", nil, err
103 }
104
105 params = make(map[string]string)
106
107 // Map of base parameter name -> parameter name -> value
108 // for parameters containing a '*' character.
109 // Lazily initialized.
110 var continuation map[string]map[string]string
111
112 v = v[i:]
113 for len(v) > 0 {
114 v = strings.TrimLeftFunc(v, unicode.IsSpace)
115 if len(v) == 0 {
116 break
117 }
118 key, value, rest := consumeMediaParam(v)
119 if key == "" {
120 if strings.TrimSpace(rest) == ";" {
121 // Ignore trailing semicolons.
122 // Not an error.
123 return
124 }
125 // Parse error.
126 return "", nil, errors.New("mime: invalid media parameter")
127 }
128
129 pmap := params
130 if idx := strings.Index(key, "*"); idx != -1 {
131 baseName := key[:idx]
132 if continuation == nil {
133 continuation = make(map[string]map[string]string)
134 }
135 var ok bool
136 if pmap, ok = continuation[baseName]; !ok {
137 continuation[baseName] = make(map[string]string)
138 pmap = continuation[baseName]
139 }
140 }
141 if _, exists := pmap[key]; exists {
142 // Duplicate parameter name is bogus.
143 return "", nil, errors.New("mime: duplicate parameter name")
144 }
145 pmap[key] = value
146 v = rest
147 }
148
149 // Stitch together any continuations or things with stars
150 // (i.e. RFC 2231 things with stars: "foo*0" or "foo*")
151 var buf bytes.Buffer
152 for key, pieceMap := range continuation {
153 singlePartKey := key + "*"
154 if v, ok := pieceMap[singlePartKey]; ok {
155 decv := decode2231Enc(v)
156 params[key] = decv
157 continue
158 }
159
160 buf.Reset()
161 valid := false
162 for n := 0; ; n++ {
163 simplePart := fmt.Sprintf("%s*%d", key, n)
164 if v, ok := pieceMap[simplePart]; ok {
165 valid = true
166 buf.WriteString(v)
167 continue
168 }
169 encodedPart := simplePart + "*"
170 if v, ok := pieceMap[encodedPart]; ok {
171 valid = true
172 if n == 0 {
173 buf.WriteString(decode2231Enc(v))
174 } else {
175 decv, _ := percentHexUnescape(v)
176 buf.WriteString(decv)
177 }
178 } else {
179 break
180 }
181 }
182 if valid {
183 params[key] = buf.String()
184 }
185 }
186
187 return
188 }
189
190 func decode2231Enc(v string) string {
191 sv := strings.SplitN(v, "'", 3)
192 if len(sv) != 3 {
193 return ""
194 }
195 // TODO: ignoring lang in sv[1] for now. If anybody needs it we'll
196 // need to decide how to expose it in the API. But I'm not sure
197 // anybody uses it in practice.
198 charset := strings.ToLower(sv[0])
199 if charset != "us-ascii" && charset != "utf-8" {
200 // TODO: unsupported encoding
201 return ""
202 }
203 encv, _ := percentHexUnescape(sv[2])
204 return encv
205 }
206
207 func isNotTokenChar(r rune) bool {
208 return !isTokenChar(r)
209 }
210
211 // consumeToken consumes a token from the beginning of provided
212 // string, per RFC 2045 section 5.1 (referenced from 2183), and return
213 // the token consumed and the rest of the string. Returns ("", v) on
214 // failure to consume at least one character.
215 func consumeToken(v string) (token, rest string) {
216 notPos := strings.IndexFunc(v, isNotTokenChar)
217 if notPos == -1 {
218 return v, ""
219 }
220 if notPos == 0 {
221 return "", v
222 }
223 return v[0:notPos], v[notPos:]
224 }
225
226 // consumeValue consumes a "value" per RFC 2045, where a value is
227 // either a 'token' or a 'quoted-string'. On success, consumeValue
228 // returns the value consumed (and de-quoted/escaped, if a
229 // quoted-string) and the rest of the string. On failure, returns
230 // ("", v).
231 func consumeValue(v string) (value, rest string) {
232 if !strings.HasPrefix(v, `"`) && !strings.HasPrefix(v, `'`) {
233 return consumeToken(v)
234 }
235
236 leadQuote := rune(v[0])
237
238 // parse a quoted-string
239 rest = v[1:] // consume the leading quote
240 buffer := new(bytes.Buffer)
241 var idx int
242 var r rune
243 var nextIsLiteral bool
244 for idx, r = range rest {
245 switch {
246 case nextIsLiteral:
247 buffer.WriteRune(r)
248 nextIsLiteral = false
249 case r == leadQuote:
250 return buffer.String(), rest[idx+1:]
251 case r == '\\':
252 nextIsLiteral = true
253 case r != '\r' && r != '\n':
254 buffer.WriteRune(r)
255 default:
256 return "", v
257 }
258 }
259 return "", v
260 }
261
262 func consumeMediaParam(v string) (param, value, rest string) {
263 rest = strings.TrimLeftFunc(v, unicode.IsSpace)
264 if !strings.HasPrefix(rest, ";") {
265 return "", "", v
266 }
267
268 rest = rest[1:] // consume semicolon
269 rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
270 param, rest = consumeToken(rest)
271 param = strings.ToLower(param)
272 if param == "" {
273 return "", "", v
274 }
275
276 rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
277 if !strings.HasPrefix(rest, "=") {
278 return "", "", v
279 }
280 rest = rest[1:] // consume equals sign
281 rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
282 value, rest = consumeValue(rest)
283 if value == "" {
284 return "", "", v
285 }
286 return param, value, rest
287 }
288
289 func percentHexUnescape(s string) (string, error) {
290 // Count %, check that they're well-formed.
291 percents := 0
292 for i := 0; i < len(s); {
293 if s[i] != '%' {
294 i++
295 continue
296 }
297 percents++
298 if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
299 s = s[i:]
300 if len(s) > 3 {
301 s = s[0:3]
302 }
303 return "", fmt.Errorf("mime: bogus characters after %%: %q", s)
304 }
305 i += 3
306 }
307 if percents == 0 {
308 return s, nil
309 }
310
311 t := make([]byte, len(s)-2*percents)
312 j := 0
313 for i := 0; i < len(s); {
314 switch s[i] {
315 case '%':
316 t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
317 j++
318 i += 3
319 default:
320 t[j] = s[i]
321 j++
322 i++
323 }
324 }
325 return string(t), nil
326 }
327
328 func ishex(c byte) bool {
329 switch {
330 case '0' <= c && c <= '9':
331 return true
332 case 'a' <= c && c <= 'f':
333 return true
334 case 'A' <= c && c <= 'F':
335 return true
336 }
337 return false
338 }
339
340 func unhex(c byte) byte {
341 switch {
342 case '0' <= c && c <= '9':
343 return c - '0'
344 case 'a' <= c && c <= 'f':
345 return c - 'a' + 10
346 case 'A' <= c && c <= 'F':
347 return c - 'A' + 10
348 }
349 return 0
350 }