src/pkg/html/template/html.go - The Go Programming Language

Golang

previous page next page
Source file src/pkg/html/template/html.go

     1	// Copyright 2011 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package template
     6	
     7	import (
     8		"bytes"
     9		"fmt"
    10		"strings"
    11		"unicode/utf8"
    12	)
    13	
    14	// htmlNospaceEscaper escapes for inclusion in unquoted attribute values.
    15	func htmlNospaceEscaper(args ...interface{}) string {
    16		s, t := stringify(args...)
    17		if t == contentTypeHTML {
    18			return htmlReplacer(stripTags(s), htmlNospaceNormReplacementTable, false)
    19		}
    20		return htmlReplacer(s, htmlNospaceReplacementTable, false)
    21	}
    22	
    23	// attrEscaper escapes for inclusion in quoted attribute values.
    24	func attrEscaper(args ...interface{}) string {
    25		s, t := stringify(args...)
    26		if t == contentTypeHTML {
    27			return htmlReplacer(stripTags(s), htmlNormReplacementTable, true)
    28		}
    29		return htmlReplacer(s, htmlReplacementTable, true)
    30	}
    31	
    32	// rcdataEscaper escapes for inclusion in an RCDATA element body.
    33	func rcdataEscaper(args ...interface{}) string {
    34		s, t := stringify(args...)
    35		if t == contentTypeHTML {
    36			return htmlReplacer(s, htmlNormReplacementTable, true)
    37		}
    38		return htmlReplacer(s, htmlReplacementTable, true)
    39	}
    40	
    41	// htmlEscaper escapes for inclusion in HTML text.
    42	func htmlEscaper(args ...interface{}) string {
    43		s, t := stringify(args...)
    44		if t == contentTypeHTML {
    45			return s
    46		}
    47		return htmlReplacer(s, htmlReplacementTable, true)
    48	}
    49	
    50	// htmlReplacementTable contains the runes that need to be escaped
    51	// inside a quoted attribute value or in a text node.
    52	var htmlReplacementTable = []string{
    53		// http://www.w3.org/TR/html5/tokenization.html#attribute-value-unquoted-state: "
    54		// U+0000 NULL Parse error. Append a U+FFFD REPLACEMENT
    55		// CHARACTER character to the current attribute's value.
    56		// "
    57		// and similarly
    58		// http://www.w3.org/TR/html5/tokenization.html#before-attribute-value-state
    59		0:    "\uFFFD",
    60		'"':  "&#34;",
    61		'&':  "&amp;",
    62		'\'': "&#39;",
    63		'+':  "&#43;",
    64		'<':  "&lt;",
    65		'>':  "&gt;",
    66	}
    67	
    68	// htmlNormReplacementTable is like htmlReplacementTable but without '&' to
    69	// avoid over-encoding existing entities.
    70	var htmlNormReplacementTable = []string{
    71		0:    "\uFFFD",
    72		'"':  "&#34;",
    73		'\'': "&#39;",
    74		'+':  "&#43;",
    75		'<':  "&lt;",
    76		'>':  "&gt;",
    77	}
    78	
    79	// htmlNospaceReplacementTable contains the runes that need to be escaped
    80	// inside an unquoted attribute value.
    81	// The set of runes escaped is the union of the HTML specials and
    82	// those determined by running the JS below in browsers:
    83	// <div id=d></div>
    84	// <script>(function () {
    85	// var a = [], d = document.getElementById("d"), i, c, s;
    86	// for (i = 0; i < 0x10000; ++i) {
    87	//   c = String.fromCharCode(i);
    88	//   d.innerHTML = "<span title=" + c + "lt" + c + "></span>"
    89	//   s = d.getElementsByTagName("SPAN")[0];
    90	//   if (!s || s.title !== c + "lt" + c) { a.push(i.toString(16)); }
    91	// }
    92	// document.write(a.join(", "));
    93	// })()</script>
    94	var htmlNospaceReplacementTable = []string{
    95		0:    "&#xfffd;",
    96		'\t': "&#9;",
    97		'\n': "&#10;",
    98		'\v': "&#11;",
    99		'\f': "&#12;",
   100		'\r': "&#13;",
   101		' ':  "&#32;",
   102		'"':  "&#34;",
   103		'&':  "&amp;",
   104		'\'': "&#39;",
   105		'+':  "&#43;",
   106		'<':  "&lt;",
   107		'=':  "&#61;",
   108		'>':  "&gt;",
   109		// A parse error in the attribute value (unquoted) and 
   110		// before attribute value states.
   111		// Treated as a quoting character by IE.
   112		'`': "&#96;",
   113	}
   114	
   115	// htmlNospaceNormReplacementTable is like htmlNospaceReplacementTable but
   116	// without '&' to avoid over-encoding existing entities.
   117	var htmlNospaceNormReplacementTable = []string{
   118		0:    "&#xfffd;",
   119		'\t': "&#9;",
   120		'\n': "&#10;",
   121		'\v': "&#11;",
   122		'\f': "&#12;",
   123		'\r': "&#13;",
   124		' ':  "&#32;",
   125		'"':  "&#34;",
   126		'\'': "&#39;",
   127		'+':  "&#43;",
   128		'<':  "&lt;",
   129		'=':  "&#61;",
   130		'>':  "&gt;",
   131		// A parse error in the attribute value (unquoted) and 
   132		// before attribute value states.
   133		// Treated as a quoting character by IE.
   134		'`': "&#96;",
   135	}
   136	
   137	// htmlReplacer returns s with runes replaced according to replacementTable
   138	// and when badRunes is true, certain bad runes are allowed through unescaped.
   139	func htmlReplacer(s string, replacementTable []string, badRunes bool) string {
   140		written, b := 0, new(bytes.Buffer)
   141		for i, r := range s {
   142			if int(r) < len(replacementTable) {
   143				if repl := replacementTable[r]; len(repl) != 0 {
   144					b.WriteString(s[written:i])
   145					b.WriteString(repl)
   146					// Valid as long as replacementTable doesn't 
   147					// include anything above 0x7f.
   148					written = i + utf8.RuneLen(r)
   149				}
   150			} else if badRunes {
   151				// No-op.
   152				// IE does not allow these ranges in unquoted attrs.
   153			} else if 0xfdd0 <= r && r <= 0xfdef || 0xfff0 <= r && r <= 0xffff {
   154				fmt.Fprintf(b, "%s&#x%x;", s[written:i], r)
   155				written = i + utf8.RuneLen(r)
   156			}
   157		}
   158		if written == 0 {
   159			return s
   160		}
   161		b.WriteString(s[written:])
   162		return b.String()
   163	}
   164	
   165	// stripTags takes a snippet of HTML and returns only the text content.
   166	// For example, `<b>&iexcl;Hi!</b> <script>...</script>` -> `&iexcl;Hi! `.
   167	func stripTags(html string) string {
   168		var b bytes.Buffer
   169		s, c, i, allText := []byte(html), context{}, 0, true
   170		// Using the transition funcs helps us avoid mangling
   171		// `<div title="1>2">` or `I <3 Ponies!`.
   172		for i != len(s) {
   173			if c.delim == delimNone {
   174				st := c.state
   175				// Use RCDATA instead of parsing into JS or CSS styles.
   176				if c.element != elementNone && !isInTag(st) {
   177					st = stateRCDATA
   178				}
   179				d, nread := transitionFunc[st](c, s[i:])
   180				i1 := i + nread
   181				if c.state == stateText || c.state == stateRCDATA {
   182					// Emit text up to the start of the tag or comment.
   183					j := i1
   184					if d.state != c.state {
   185						for j1 := j - 1; j1 >= i; j1-- {
   186							if s[j1] == '<' {
   187								j = j1
   188								break
   189							}
   190						}
   191					}
   192					b.Write(s[i:j])
   193				} else {
   194					allText = false
   195				}
   196				c, i = d, i1
   197				continue
   198			}
   199			i1 := i + bytes.IndexAny(s[i:], delimEnds[c.delim])
   200			if i1 < i {
   201				break
   202			}
   203			if c.delim != delimSpaceOrTagEnd {
   204				// Consume any quote.
   205				i1++
   206			}
   207			c, i = context{state: stateTag, element: c.element}, i1
   208		}
   209		if allText {
   210			return html
   211		} else if c.state == stateText || c.state == stateRCDATA {
   212			b.Write(s[i:])
   213		}
   214		return b.String()
   215	}
   216	
   217	// htmlNameFilter accepts valid parts of an HTML attribute or tag name or
   218	// a known-safe HTML attribute.
   219	func htmlNameFilter(args ...interface{}) string {
   220		s, t := stringify(args...)
   221		if t == contentTypeHTMLAttr {
   222			return s
   223		}
   224		if len(s) == 0 {
   225			// Avoid violation of structure preservation.
   226			// <input checked {{.K}}={{.V}}>.
   227			// Without this, if .K is empty then .V is the value of
   228			// checked, but otherwise .V is the value of the attribute
   229			// named .K.
   230			return filterFailsafe
   231		}
   232		s = strings.ToLower(s)
   233		if t := attrType(s); t != contentTypePlain {
   234			// TODO: Split attr and element name part filters so we can whitelist
   235			// attributes.
   236			return filterFailsafe
   237		}
   238		for _, r := range s {
   239			switch {
   240			case '0' <= r && r <= '9':
   241			case 'a' <= r && r <= 'z':
   242			default:
   243				return filterFailsafe
   244			}
   245		}
   246		return s
   247	}
   248	
   249	// commentEscaper returns the empty string regardless of input.
   250	// Comment content does not correspond to any parsed structure or
   251	// human-readable content, so the simplest and most secure policy is to drop
   252	// content interpolated into comments.
   253	// This approach is equally valid whether or not static comment content is
   254	// removed from the template.
   255	func commentEscaper(args ...interface{}) string {
   256		return ""
   257	}
previous page start next page