src/pkg/mime/mediatype.go - The Go Programming Language

Golang

Source file src/pkg/mime/mediatype.go

     1	// Copyright 2010 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package mime
     6	
     7	import (
     8		"bytes"
     9		"errors"
    10		"fmt"
    11		"strings"
    12		"unicode"
    13	)
    14	
    15	// FormatMediaType serializes mediatype t and the parameters
    16	// param as a media type conforming to RFC 2045 and RFC 2616.
    17	// The type and parameter names are written in lower-case.
    18	// When any of the arguments result in a standard violation then
    19	// FormatMediaType returns the empty string.
    20	func FormatMediaType(t string, param map[string]string) string {
    21		slash := strings.Index(t, "/")
    22		if slash == -1 {
    23			return ""
    24		}
    25		major, sub := t[:slash], t[slash+1:]
    26		if !isToken(major) || !isToken(sub) {
    27			return ""
    28		}
    29		var b bytes.Buffer
    30		b.WriteString(strings.ToLower(major))
    31		b.WriteByte('/')
    32		b.WriteString(strings.ToLower(sub))
    33	
    34		for attribute, value := range param {
    35			b.WriteByte(';')
    36			b.WriteByte(' ')
    37			if !isToken(attribute) {
    38				return ""
    39			}
    40			b.WriteString(strings.ToLower(attribute))
    41			b.WriteByte('=')
    42			if isToken(value) {
    43				b.WriteString(value)
    44				continue
    45			}
    46	
    47			b.WriteByte('"')
    48			offset := 0
    49			for index, character := range value {
    50				if character == '"' || character == '\r' {
    51					b.WriteString(value[offset:index])
    52					offset = index
    53					b.WriteByte('\\')
    54				}
    55				if character&0x80 != 0 {
    56					return ""
    57				}
    58			}
    59			b.WriteString(value[offset:])
    60			b.WriteByte('"')
    61		}
    62		return b.String()
    63	}
    64	
    65	func checkMediaTypeDisposition(s string) error {
    66		typ, rest := consumeToken(s)
    67		if typ == "" {
    68			return errors.New("mime: no media type")
    69		}
    70		if rest == "" {
    71			return nil
    72		}
    73		if !strings.HasPrefix(rest, "/") {
    74			return errors.New("mime: expected slash after first token")
    75		}
    76		subtype, rest := consumeToken(rest[1:])
    77		if subtype == "" {
    78			return errors.New("mime: expected token after slash")
    79		}
    80		if rest != "" {
    81			return errors.New("mime: unexpected content after media subtype")
    82		}
    83		return nil
    84	}
    85	
    86	// ParseMediaType parses a media type value and any optional
    87	// parameters, per RFC 1521.  Media types are the values in
    88	// Content-Type and Content-Disposition headers (RFC 2183).
    89	// On success, ParseMediaType returns the media type converted
    90	// to lowercase and trimmed of white space and a non-nil map.
    91	// The returned map, params, maps from the lowercase
    92	// attribute to the attribute value with its case preserved.
    93	func ParseMediaType(v string) (mediatype string, params map[string]string, err error) {
    94		i := strings.Index(v, ";")
    95		if i == -1 {
    96			i = len(v)
    97		}
    98		mediatype = strings.TrimSpace(strings.ToLower(v[0:i]))
    99	
   100		err = checkMediaTypeDisposition(mediatype)
   101		if err != nil {
   102			return "", nil, err
   103		}
   104	
   105		params = make(map[string]string)
   106	
   107		// Map of base parameter name -> parameter name -> value
   108		// for parameters containing a '*' character.
   109		// Lazily initialized.
   110		var continuation map[string]map[string]string
   111	
   112		v = v[i:]
   113		for len(v) > 0 {
   114			v = strings.TrimLeftFunc(v, unicode.IsSpace)
   115			if len(v) == 0 {
   116				break
   117			}
   118			key, value, rest := consumeMediaParam(v)
   119			if key == "" {
   120				if strings.TrimSpace(rest) == ";" {
   121					// Ignore trailing semicolons.
   122					// Not an error.
   123					return
   124				}
   125				// Parse error.
   126				return "", nil, errors.New("mime: invalid media parameter")
   127			}
   128	
   129			pmap := params
   130			if idx := strings.Index(key, "*"); idx != -1 {
   131				baseName := key[:idx]
   132				if continuation == nil {
   133					continuation = make(map[string]map[string]string)
   134				}
   135				var ok bool
   136				if pmap, ok = continuation[baseName]; !ok {
   137					continuation[baseName] = make(map[string]string)
   138					pmap = continuation[baseName]
   139				}
   140			}
   141			if _, exists := pmap[key]; exists {
   142				// Duplicate parameter name is bogus.
   143				return "", nil, errors.New("mime: duplicate parameter name")
   144			}
   145			pmap[key] = value
   146			v = rest
   147		}
   148	
   149		// Stitch together any continuations or things with stars
   150		// (i.e. RFC 2231 things with stars: "foo*0" or "foo*")
   151		var buf bytes.Buffer
   152		for key, pieceMap := range continuation {
   153			singlePartKey := key + "*"
   154			if v, ok := pieceMap[singlePartKey]; ok {
   155				decv := decode2231Enc(v)
   156				params[key] = decv
   157				continue
   158			}
   159	
   160			buf.Reset()
   161			valid := false
   162			for n := 0; ; n++ {
   163				simplePart := fmt.Sprintf("%s*%d", key, n)
   164				if v, ok := pieceMap[simplePart]; ok {
   165					valid = true
   166					buf.WriteString(v)
   167					continue
   168				}
   169				encodedPart := simplePart + "*"
   170				if v, ok := pieceMap[encodedPart]; ok {
   171					valid = true
   172					if n == 0 {
   173						buf.WriteString(decode2231Enc(v))
   174					} else {
   175						decv, _ := percentHexUnescape(v)
   176						buf.WriteString(decv)
   177					}
   178				} else {
   179					break
   180				}
   181			}
   182			if valid {
   183				params[key] = buf.String()
   184			}
   185		}
   186	
   187		return
   188	}
   189	
   190	func decode2231Enc(v string) string {
   191		sv := strings.SplitN(v, "'", 3)
   192		if len(sv) != 3 {
   193			return ""
   194		}
   195		// TODO: ignoring lang in sv[1] for now. If anybody needs it we'll
   196		// need to decide how to expose it in the API. But I'm not sure
   197		// anybody uses it in practice.
   198		charset := strings.ToLower(sv[0])
   199		if charset != "us-ascii" && charset != "utf-8" {
   200			// TODO: unsupported encoding
   201			return ""
   202		}
   203		encv, _ := percentHexUnescape(sv[2])
   204		return encv
   205	}
   206	
   207	func isNotTokenChar(r rune) bool {
   208		return !isTokenChar(r)
   209	}
   210	
   211	// consumeToken consumes a token from the beginning of provided
   212	// string, per RFC 2045 section 5.1 (referenced from 2183), and return
   213	// the token consumed and the rest of the string.  Returns ("", v) on
   214	// failure to consume at least one character.
   215	func consumeToken(v string) (token, rest string) {
   216		notPos := strings.IndexFunc(v, isNotTokenChar)
   217		if notPos == -1 {
   218			return v, ""
   219		}
   220		if notPos == 0 {
   221			return "", v
   222		}
   223		return v[0:notPos], v[notPos:]
   224	}
   225	
   226	// consumeValue consumes a "value" per RFC 2045, where a value is
   227	// either a 'token' or a 'quoted-string'.  On success, consumeValue
   228	// returns the value consumed (and de-quoted/escaped, if a
   229	// quoted-string) and the rest of the string.  On failure, returns
   230	// ("", v).
   231	func consumeValue(v string) (value, rest string) {
   232		if !strings.HasPrefix(v, `"`) && !strings.HasPrefix(v, `'`) {
   233			return consumeToken(v)
   234		}
   235	
   236		leadQuote := rune(v[0])
   237	
   238		// parse a quoted-string
   239		rest = v[1:] // consume the leading quote
   240		buffer := new(bytes.Buffer)
   241		var idx int
   242		var r rune
   243		var nextIsLiteral bool
   244		for idx, r = range rest {
   245			switch {
   246			case nextIsLiteral:
   247				buffer.WriteRune(r)
   248				nextIsLiteral = false
   249			case r == leadQuote:
   250				return buffer.String(), rest[idx+1:]
   251			case r == '\\':
   252				nextIsLiteral = true
   253			case r != '\r' && r != '\n':
   254				buffer.WriteRune(r)
   255			default:
   256				return "", v
   257			}
   258		}
   259		return "", v
   260	}
   261	
   262	func consumeMediaParam(v string) (param, value, rest string) {
   263		rest = strings.TrimLeftFunc(v, unicode.IsSpace)
   264		if !strings.HasPrefix(rest, ";") {
   265			return "", "", v
   266		}
   267	
   268		rest = rest[1:] // consume semicolon
   269		rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
   270		param, rest = consumeToken(rest)
   271		param = strings.ToLower(param)
   272		if param == "" {
   273			return "", "", v
   274		}
   275	
   276		rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
   277		if !strings.HasPrefix(rest, "=") {
   278			return "", "", v
   279		}
   280		rest = rest[1:] // consume equals sign
   281		rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
   282		value, rest = consumeValue(rest)
   283		if value == "" {
   284			return "", "", v
   285		}
   286		return param, value, rest
   287	}
   288	
   289	func percentHexUnescape(s string) (string, error) {
   290		// Count %, check that they're well-formed.
   291		percents := 0
   292		for i := 0; i < len(s); {
   293			if s[i] != '%' {
   294				i++
   295				continue
   296			}
   297			percents++
   298			if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
   299				s = s[i:]
   300				if len(s) > 3 {
   301					s = s[0:3]
   302				}
   303				return "", fmt.Errorf("mime: bogus characters after %%: %q", s)
   304			}
   305			i += 3
   306		}
   307		if percents == 0 {
   308			return s, nil
   309		}
   310	
   311		t := make([]byte, len(s)-2*percents)
   312		j := 0
   313		for i := 0; i < len(s); {
   314			switch s[i] {
   315			case '%':
   316				t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
   317				j++
   318				i += 3
   319			default:
   320				t[j] = s[i]
   321				j++
   322				i++
   323			}
   324		}
   325		return string(t), nil
   326	}
   327	
   328	func ishex(c byte) bool {
   329		switch {
   330		case '0' <= c && c <= '9':
   331			return true
   332		case 'a' <= c && c <= 'f':
   333			return true
   334		case 'A' <= c && c <= 'F':
   335			return true
   336		}
   337		return false
   338	}
   339	
   340	func unhex(c byte) byte {
   341		switch {
   342		case '0' <= c && c <= '9':
   343			return c - '0'
   344		case 'a' <= c && c <= 'f':
   345			return c - 'a' + 10
   346		case 'A' <= c && c <= 'F':
   347			return c - 'A' + 10
   348		}
   349		return 0
   350	}