src/pkg/net/url/url.go - The Go Programming Language

Golang

Source file src/pkg/net/url/url.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Package url parses URLs and implements query escaping.
     6	// See RFC 3986.
     7	package url
     8	
     9	import (
    10		"errors"
    11		"strconv"
    12		"strings"
    13	)
    14	
    15	// Error reports an error and the operation and URL that caused it.
    16	type Error struct {
    17		Op  string
    18		URL string
    19		Err error
    20	}
    21	
    22	func (e *Error) Error() string { return e.Op + " " + e.URL + ": " + e.Err.Error() }
    23	
    24	func ishex(c byte) bool {
    25		switch {
    26		case '0' <= c && c <= '9':
    27			return true
    28		case 'a' <= c && c <= 'f':
    29			return true
    30		case 'A' <= c && c <= 'F':
    31			return true
    32		}
    33		return false
    34	}
    35	
    36	func unhex(c byte) byte {
    37		switch {
    38		case '0' <= c && c <= '9':
    39			return c - '0'
    40		case 'a' <= c && c <= 'f':
    41			return c - 'a' + 10
    42		case 'A' <= c && c <= 'F':
    43			return c - 'A' + 10
    44		}
    45		return 0
    46	}
    47	
    48	type encoding int
    49	
    50	const (
    51		encodePath encoding = 1 + iota
    52		encodeUserPassword
    53		encodeQueryComponent
    54		encodeFragment
    55	)
    56	
    57	type EscapeError string
    58	
    59	func (e EscapeError) Error() string {
    60		return "invalid URL escape " + strconv.Quote(string(e))
    61	}
    62	
    63	// Return true if the specified character should be escaped when
    64	// appearing in a URL string, according to RFC 3986.
    65	// When 'all' is true the full range of reserved characters are matched.
    66	func shouldEscape(c byte, mode encoding) bool {
    67		// §2.3 Unreserved characters (alphanum)
    68		if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' {
    69			return false
    70		}
    71	
    72		switch c {
    73		case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
    74			return false
    75	
    76		case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
    77			// Different sections of the URL allow a few of
    78			// the reserved characters to appear unescaped.
    79			switch mode {
    80			case encodePath: // §3.3
    81				// The RFC allows : @ & = + $ but saves / ; , for assigning
    82				// meaning to individual path segments. This package
    83				// only manipulates the path as a whole, so we allow those
    84				// last two as well. That leaves only ? to escape.
    85				return c == '?'
    86	
    87			case encodeUserPassword: // §3.2.2
    88				// The RFC allows ; : & = + $ , in userinfo, so we must escape only @ and /.
    89				// The parsing of userinfo treats : as special so we must escape that too.
    90				return c == '@' || c == '/' || c == ':'
    91	
    92			case encodeQueryComponent: // §3.4
    93				// The RFC reserves (so we must escape) everything.
    94				return true
    95	
    96			case encodeFragment: // §4.1
    97				// The RFC text is silent but the grammar allows
    98				// everything, so escape nothing.
    99				return false
   100			}
   101		}
   102	
   103		// Everything else must be escaped.
   104		return true
   105	}
   106	
   107	// QueryUnescape does the inverse transformation of QueryEscape, converting
   108	// %AB into the byte 0xAB and '+' into ' ' (space). It returns an error if
   109	// any % is not followed by two hexadecimal digits.
   110	func QueryUnescape(s string) (string, error) {
   111		return unescape(s, encodeQueryComponent)
   112	}
   113	
   114	// unescape unescapes a string; the mode specifies
   115	// which section of the URL string is being unescaped.
   116	func unescape(s string, mode encoding) (string, error) {
   117		// Count %, check that they're well-formed.
   118		n := 0
   119		hasPlus := false
   120		for i := 0; i < len(s); {
   121			switch s[i] {
   122			case '%':
   123				n++
   124				if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
   125					s = s[i:]
   126					if len(s) > 3 {
   127						s = s[0:3]
   128					}
   129					return "", EscapeError(s)
   130				}
   131				i += 3
   132			case '+':
   133				hasPlus = mode == encodeQueryComponent
   134				i++
   135			default:
   136				i++
   137			}
   138		}
   139	
   140		if n == 0 && !hasPlus {
   141			return s, nil
   142		}
   143	
   144		t := make([]byte, len(s)-2*n)
   145		j := 0
   146		for i := 0; i < len(s); {
   147			switch s[i] {
   148			case '%':
   149				t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
   150				j++
   151				i += 3
   152			case '+':
   153				if mode == encodeQueryComponent {
   154					t[j] = ' '
   155				} else {
   156					t[j] = '+'
   157				}
   158				j++
   159				i++
   160			default:
   161				t[j] = s[i]
   162				j++
   163				i++
   164			}
   165		}
   166		return string(t), nil
   167	}
   168	
   169	// QueryEscape escapes the string so it can be safely placed
   170	// inside a URL query.
   171	func QueryEscape(s string) string {
   172		return escape(s, encodeQueryComponent)
   173	}
   174	
   175	func escape(s string, mode encoding) string {
   176		spaceCount, hexCount := 0, 0
   177		for i := 0; i < len(s); i++ {
   178			c := s[i]
   179			if shouldEscape(c, mode) {
   180				if c == ' ' && mode == encodeQueryComponent {
   181					spaceCount++
   182				} else {
   183					hexCount++
   184				}
   185			}
   186		}
   187	
   188		if spaceCount == 0 && hexCount == 0 {
   189			return s
   190		}
   191	
   192		t := make([]byte, len(s)+2*hexCount)
   193		j := 0
   194		for i := 0; i < len(s); i++ {
   195			switch c := s[i]; {
   196			case c == ' ' && mode == encodeQueryComponent:
   197				t[j] = '+'
   198				j++
   199			case shouldEscape(c, mode):
   200				t[j] = '%'
   201				t[j+1] = "0123456789ABCDEF"[c>>4]
   202				t[j+2] = "0123456789ABCDEF"[c&15]
   203				j += 3
   204			default:
   205				t[j] = s[i]
   206				j++
   207			}
   208		}
   209		return string(t)
   210	}
   211	
   212	// A URL represents a parsed URL (technically, a URI reference).
   213	// The general form represented is:
   214	//
   215	//	scheme://[userinfo@]host/path[?query][#fragment]
   216	//
   217	// URLs that do not start with a slash after the scheme are interpreted as:
   218	//
   219	//	scheme:opaque[?query][#fragment]
   220	//
   221	type URL struct {
   222		Scheme   string
   223		Opaque   string    // encoded opaque data
   224		User     *Userinfo // username and password information
   225		Host     string
   226		Path     string
   227		RawQuery string // encoded query values, without '?'
   228		Fragment string // fragment for references, without '#'
   229	}
   230	
   231	// User returns a Userinfo containing the provided username
   232	// and no password set.
   233	func User(username string) *Userinfo {
   234		return &Userinfo{username, "", false}
   235	}
   236	
   237	// UserPassword returns a Userinfo containing the provided username
   238	// and password.
   239	// This functionality should only be used with legacy web sites.
   240	// RFC 2396 warns that interpreting Userinfo this way
   241	// ``is NOT RECOMMENDED, because the passing of authentication
   242	// information in clear text (such as URI) has proven to be a
   243	// security risk in almost every case where it has been used.''
   244	func UserPassword(username, password string) *Userinfo {
   245		return &Userinfo{username, password, true}
   246	}
   247	
   248	// The Userinfo type is an immutable encapsulation of username and
   249	// password details for a URL. An existing Userinfo value is guaranteed
   250	// to have a username set (potentially empty, as allowed by RFC 2396),
   251	// and optionally a password.
   252	type Userinfo struct {
   253		username    string
   254		password    string
   255		passwordSet bool
   256	}
   257	
   258	// Username returns the username.
   259	func (u *Userinfo) Username() string {
   260		return u.username
   261	}
   262	
   263	// Password returns the password in case it is set, and whether it is set.
   264	func (u *Userinfo) Password() (string, bool) {
   265		if u.passwordSet {
   266			return u.password, true
   267		}
   268		return "", false
   269	}
   270	
   271	// String returns the encoded userinfo information in the standard form
   272	// of "username[:password]".
   273	func (u *Userinfo) String() string {
   274		s := escape(u.username, encodeUserPassword)
   275		if u.passwordSet {
   276			s += ":" + escape(u.password, encodeUserPassword)
   277		}
   278		return s
   279	}
   280	
   281	// Maybe rawurl is of the form scheme:path.
   282	// (Scheme must be [a-zA-Z][a-zA-Z0-9+-.]*)
   283	// If so, return scheme, path; else return "", rawurl.
   284	func getscheme(rawurl string) (scheme, path string, err error) {
   285		for i := 0; i < len(rawurl); i++ {
   286			c := rawurl[i]
   287			switch {
   288			case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z':
   289			// do nothing
   290			case '0' <= c && c <= '9' || c == '+' || c == '-' || c == '.':
   291				if i == 0 {
   292					return "", rawurl, nil
   293				}
   294			case c == ':':
   295				if i == 0 {
   296					return "", "", errors.New("missing protocol scheme")
   297				}
   298				return rawurl[0:i], rawurl[i+1:], nil
   299			default:
   300				// we have encountered an invalid character,
   301				// so there is no valid scheme
   302				return "", rawurl, nil
   303			}
   304		}
   305		return "", rawurl, nil
   306	}
   307	
   308	// Maybe s is of the form t c u.
   309	// If so, return t, c u (or t, u if cutc == true).
   310	// If not, return s, "".
   311	func split(s string, c byte, cutc bool) (string, string) {
   312		for i := 0; i < len(s); i++ {
   313			if s[i] == c {
   314				if cutc {
   315					return s[0:i], s[i+1:]
   316				}
   317				return s[0:i], s[i:]
   318			}
   319		}
   320		return s, ""
   321	}
   322	
   323	// Parse parses rawurl into a URL structure.
   324	// The rawurl may be relative or absolute.
   325	func Parse(rawurl string) (url *URL, err error) {
   326		// Cut off #frag
   327		u, frag := split(rawurl, '#', true)
   328		if url, err = parse(u, false); err != nil {
   329			return nil, err
   330		}
   331		if frag == "" {
   332			return url, nil
   333		}
   334		if url.Fragment, err = unescape(frag, encodeFragment); err != nil {
   335			return nil, &Error{"parse", rawurl, err}
   336		}
   337		return url, nil
   338	}
   339	
   340	// ParseRequestURI parses rawurl into a URL structure.  It assumes that
   341	// rawurl was received in an HTTP request, so the rawurl is interpreted
   342	// only as an absolute URI or an absolute path.
   343	// The string rawurl is assumed not to have a #fragment suffix.
   344	// (Web browsers strip #fragment before sending the URL to a web server.)
   345	func ParseRequestURI(rawurl string) (url *URL, err error) {
   346		return parse(rawurl, true)
   347	}
   348	
   349	// parse parses a URL from a string in one of two contexts.  If
   350	// viaRequest is true, the URL is assumed to have arrived via an HTTP request,
   351	// in which case only absolute URLs or path-absolute relative URLs are allowed.
   352	// If viaRequest is false, all forms of relative URLs are allowed.
   353	func parse(rawurl string, viaRequest bool) (url *URL, err error) {
   354		var rest string
   355	
   356		if rawurl == "" {
   357			err = errors.New("empty url")
   358			goto Error
   359		}
   360		url = new(URL)
   361	
   362		// Split off possible leading "http:", "mailto:", etc.
   363		// Cannot contain escaped characters.
   364		if url.Scheme, rest, err = getscheme(rawurl); err != nil {
   365			goto Error
   366		}
   367	
   368		rest, url.RawQuery = split(rest, '?', true)
   369	
   370		if !strings.HasPrefix(rest, "/") {
   371			if url.Scheme != "" {
   372				// We consider rootless paths per RFC 3986 as opaque.
   373				url.Opaque = rest
   374				return url, nil
   375			}
   376			if viaRequest {
   377				err = errors.New("invalid URI for request")
   378				goto Error
   379			}
   380		}
   381	
   382		if (url.Scheme != "" || !viaRequest) && strings.HasPrefix(rest, "//") && !strings.HasPrefix(rest, "///") {
   383			var authority string
   384			authority, rest = split(rest[2:], '/', false)
   385			url.User, url.Host, err = parseAuthority(authority)
   386			if err != nil {
   387				goto Error
   388			}
   389			if strings.Contains(url.Host, "%") {
   390				err = errors.New("hexadecimal escape in host")
   391				goto Error
   392			}
   393		}
   394		if url.Path, err = unescape(rest, encodePath); err != nil {
   395			goto Error
   396		}
   397		return url, nil
   398	
   399	Error:
   400		return nil, &Error{"parse", rawurl, err}
   401	}
   402	
   403	func parseAuthority(authority string) (user *Userinfo, host string, err error) {
   404		if strings.Index(authority, "@") < 0 {
   405			host = authority
   406			return
   407		}
   408		userinfo, host := split(authority, '@', true)
   409		if strings.Index(userinfo, ":") < 0 {
   410			if userinfo, err = unescape(userinfo, encodeUserPassword); err != nil {
   411				return
   412			}
   413			user = User(userinfo)
   414		} else {
   415			username, password := split(userinfo, ':', true)
   416			if username, err = unescape(username, encodeUserPassword); err != nil {
   417				return
   418			}
   419			if password, err = unescape(password, encodeUserPassword); err != nil {
   420				return
   421			}
   422			user = UserPassword(username, password)
   423		}
   424		return
   425	}
   426	
   427	// String reassembles the URL into a valid URL string.
   428	func (u *URL) String() string {
   429		// TODO: Rewrite to use bytes.Buffer
   430		result := ""
   431		if u.Scheme != "" {
   432			result += u.Scheme + ":"
   433		}
   434		if u.Opaque != "" {
   435			result += u.Opaque
   436		} else {
   437			if u.Host != "" || u.User != nil {
   438				result += "//"
   439				if u := u.User; u != nil {
   440					result += u.String() + "@"
   441				}
   442				result += u.Host
   443			}
   444			result += escape(u.Path, encodePath)
   445		}
   446		if u.RawQuery != "" {
   447			result += "?" + u.RawQuery
   448		}
   449		if u.Fragment != "" {
   450			result += "#" + escape(u.Fragment, encodeFragment)
   451		}
   452		return result
   453	}
   454	
   455	// Values maps a string key to a list of values.
   456	// It is typically used for query parameters and form values.
   457	// Unlike in the http.Header map, the keys in a Values map
   458	// are case-sensitive.
   459	type Values map[string][]string
   460	
   461	// Get gets the first value associated with the given key.
   462	// If there are no values associated with the key, Get returns
   463	// the empty string. To access multiple values, use the map
   464	// directly.
   465	func (v Values) Get(key string) string {
   466		if v == nil {
   467			return ""
   468		}
   469		vs, ok := v[key]
   470		if !ok || len(vs) == 0 {
   471			return ""
   472		}
   473		return vs[0]
   474	}
   475	
   476	// Set sets the key to value. It replaces any existing
   477	// values.
   478	func (v Values) Set(key, value string) {
   479		v[key] = []string{value}
   480	}
   481	
   482	// Add adds the key to value. It appends to any existing
   483	// values associated with key.
   484	func (v Values) Add(key, value string) {
   485		v[key] = append(v[key], value)
   486	}
   487	
   488	// Del deletes the values associated with key.
   489	func (v Values) Del(key string) {
   490		delete(v, key)
   491	}
   492	
   493	// ParseQuery parses the URL-encoded query string and returns
   494	// a map listing the values specified for each key.
   495	// ParseQuery always returns a non-nil map containing all the
   496	// valid query parameters found; err describes the first decoding error
   497	// encountered, if any.
   498	func ParseQuery(query string) (m Values, err error) {
   499		m = make(Values)
   500		err = parseQuery(m, query)
   501		return
   502	}
   503	
   504	func parseQuery(m Values, query string) (err error) {
   505		for query != "" {
   506			key := query
   507			if i := strings.IndexAny(key, "&;"); i >= 0 {
   508				key, query = key[:i], key[i+1:]
   509			} else {
   510				query = ""
   511			}
   512			if key == "" {
   513				continue
   514			}
   515			value := ""
   516			if i := strings.Index(key, "="); i >= 0 {
   517				key, value = key[:i], key[i+1:]
   518			}
   519			key, err1 := QueryUnescape(key)
   520			if err1 != nil {
   521				err = err1
   522				continue
   523			}
   524			value, err1 = QueryUnescape(value)
   525			if err1 != nil {
   526				err = err1
   527				continue
   528			}
   529			m[key] = append(m[key], value)
   530		}
   531		return err
   532	}
   533	
   534	// Encode encodes the values into ``URL encoded'' form.
   535	// e.g. "foo=bar&bar=baz"
   536	func (v Values) Encode() string {
   537		if v == nil {
   538			return ""
   539		}
   540		parts := make([]string, 0, len(v)) // will be large enough for most uses
   541		for k, vs := range v {
   542			prefix := QueryEscape(k) + "="
   543			for _, v := range vs {
   544				parts = append(parts, prefix+QueryEscape(v))
   545			}
   546		}
   547		return strings.Join(parts, "&")
   548	}
   549	
   550	// resolvePath applies special path segments from refs and applies
   551	// them to base, per RFC 2396.
   552	func resolvePath(basepath string, refpath string) string {
   553		base := strings.Split(basepath, "/")
   554		refs := strings.Split(refpath, "/")
   555		if len(base) == 0 {
   556			base = []string{""}
   557		}
   558		for idx, ref := range refs {
   559			switch {
   560			case ref == ".":
   561				base[len(base)-1] = ""
   562			case ref == "..":
   563				newLen := len(base) - 1
   564				if newLen < 1 {
   565					newLen = 1
   566				}
   567				base = base[0:newLen]
   568				base[len(base)-1] = ""
   569			default:
   570				if idx == 0 || base[len(base)-1] == "" {
   571					base[len(base)-1] = ref
   572				} else {
   573					base = append(base, ref)
   574				}
   575			}
   576		}
   577		return strings.Join(base, "/")
   578	}
   579	
   580	// IsAbs returns true if the URL is absolute.
   581	func (u *URL) IsAbs() bool {
   582		return u.Scheme != ""
   583	}
   584	
   585	// Parse parses a URL in the context of the receiver.  The provided URL
   586	// may be relative or absolute.  Parse returns nil, err on parse
   587	// failure, otherwise its return value is the same as ResolveReference.
   588	func (u *URL) Parse(ref string) (*URL, error) {
   589		refurl, err := Parse(ref)
   590		if err != nil {
   591			return nil, err
   592		}
   593		return u.ResolveReference(refurl), nil
   594	}
   595	
   596	// ResolveReference resolves a URI reference to an absolute URI from
   597	// an absolute base URI, per RFC 2396 Section 5.2.  The URI reference
   598	// may be relative or absolute.  ResolveReference always returns a new
   599	// URL instance, even if the returned URL is identical to either the
   600	// base or reference. If ref is an absolute URL, then ResolveReference
   601	// ignores base and returns a copy of ref.
   602	func (u *URL) ResolveReference(ref *URL) *URL {
   603		if ref.IsAbs() {
   604			url := *ref
   605			return &url
   606		}
   607		// relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
   608		url := *u
   609		url.RawQuery = ref.RawQuery
   610		url.Fragment = ref.Fragment
   611		if ref.Opaque != "" {
   612			url.Opaque = ref.Opaque
   613			url.User = nil
   614			url.Host = ""
   615			url.Path = ""
   616			return &url
   617		}
   618		if ref.Host != "" || ref.User != nil {
   619			// The "net_path" case.
   620			url.Host = ref.Host
   621			url.User = ref.User
   622		}
   623		if strings.HasPrefix(ref.Path, "/") {
   624			// The "abs_path" case.
   625			url.Path = ref.Path
   626		} else {
   627			// The "rel_path" case.
   628			path := resolvePath(u.Path, ref.Path)
   629			if !strings.HasPrefix(path, "/") {
   630				path = "/" + path
   631			}
   632			url.Path = path
   633		}
   634		return &url
   635	}
   636	
   637	// Query parses RawQuery and returns the corresponding values.
   638	func (u *URL) Query() Values {
   639		v, _ := ParseQuery(u.RawQuery)
   640		return v
   641	}
   642	
   643	// RequestURI returns the encoded path?query or opaque?query
   644	// string that would be used in an HTTP request for u.
   645	func (u *URL) RequestURI() string {
   646		result := u.Opaque
   647		if result == "" {
   648			result = escape(u.Path, encodePath)
   649			if result == "" {
   650				result = "/"
   651			}
   652		}
   653		if u.RawQuery != "" {
   654			result += "?" + u.RawQuery
   655		}
   656		return result
   657	}