Source file src/pkg/html/template/url.go
1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package template
6
7 import (
8 "bytes"
9 "fmt"
10 "strings"
11 )
12
13 // urlFilter returns its input unless it contains an unsafe protocol in which
14 // case it defangs the entire URL.
15 func urlFilter(args ...interface{}) string {
16 s, t := stringify(args...)
17 if t == contentTypeURL {
18 return s
19 }
20 if i := strings.IndexRune(s, ':'); i >= 0 && strings.IndexRune(s[:i], '/') < 0 {
21 protocol := strings.ToLower(s[:i])
22 if protocol != "http" && protocol != "https" && protocol != "mailto" {
23 return "#" + filterFailsafe
24 }
25 }
26 return s
27 }
28
29 // urlEscaper produces an output that can be embedded in a URL query.
30 // The output can be embedded in an HTML attribute without further escaping.
31 func urlEscaper(args ...interface{}) string {
32 return urlProcessor(false, args...)
33 }
34
35 // urlEscaper normalizes URL content so it can be embedded in a quote-delimited
36 // string or parenthesis delimited url(...).
37 // The normalizer does not encode all HTML specials. Specifically, it does not
38 // encode '&' so correct embedding in an HTML attribute requires escaping of
39 // '&' to '&'.
40 func urlNormalizer(args ...interface{}) string {
41 return urlProcessor(true, args...)
42 }
43
44 // urlProcessor normalizes (when norm is true) or escapes its input to produce
45 // a valid hierarchical or opaque URL part.
46 func urlProcessor(norm bool, args ...interface{}) string {
47 s, t := stringify(args...)
48 if t == contentTypeURL {
49 norm = true
50 }
51 var b bytes.Buffer
52 written := 0
53 // The byte loop below assumes that all URLs use UTF-8 as the
54 // content-encoding. This is similar to the URI to IRI encoding scheme
55 // defined in section 3.1 of RFC 3987, and behaves the same as the
56 // EcmaScript builtin encodeURIComponent.
57 // It should not cause any misencoding of URLs in pages with
58 // Content-type: text/html;charset=UTF-8.
59 for i, n := 0, len(s); i < n; i++ {
60 c := s[i]
61 switch c {
62 // Single quote and parens are sub-delims in RFC 3986, but we
63 // escape them so the output can be embedded in in single
64 // quoted attributes and unquoted CSS url(...) constructs.
65 // Single quotes are reserved in URLs, but are only used in
66 // the obsolete "mark" rule in an appendix in RFC 3986
67 // so can be safely encoded.
68 case '!', '#', '$', '&', '*', '+', ',', '/', ':', ';', '=', '?', '@', '[', ']':
69 if norm {
70 continue
71 }
72 // Unreserved according to RFC 3986 sec 2.3
73 // "For consistency, percent-encoded octets in the ranges of
74 // ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D),
75 // period (%2E), underscore (%5F), or tilde (%7E) should not be
76 // created by URI producers
77 case '-', '.', '_', '~':
78 continue
79 case '%':
80 // When normalizing do not re-encode valid escapes.
81 if norm && i+2 < len(s) && isHex(s[i+1]) && isHex(s[i+2]) {
82 continue
83 }
84 default:
85 // Unreserved according to RFC 3986 sec 2.3
86 if 'a' <= c && c <= 'z' {
87 continue
88 }
89 if 'A' <= c && c <= 'Z' {
90 continue
91 }
92 if '0' <= c && c <= '9' {
93 continue
94 }
95 }
96 b.WriteString(s[written:i])
97 fmt.Fprintf(&b, "%%%02x", c)
98 written = i + 1
99 }
100 if written == 0 {
101 return s
102 }
103 b.WriteString(s[written:])
104 return b.String()
105 }