src/pkg/html/template/transition.go - The Go Programming Language

Golang

Source file src/pkg/html/template/transition.go

     1	// Copyright 2011 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package template
     6	
     7	import (
     8		"bytes"
     9		"strings"
    10	)
    11	
    12	// transitionFunc is the array of context transition functions for text nodes.
    13	// A transition function takes a context and template text input, and returns
    14	// the updated context and the number of bytes consumed from the front of the
    15	// input.
    16	var transitionFunc = [...]func(context, []byte) (context, int){
    17		stateText:        tText,
    18		stateTag:         tTag,
    19		stateAttrName:    tAttrName,
    20		stateAfterName:   tAfterName,
    21		stateBeforeValue: tBeforeValue,
    22		stateHTMLCmt:     tHTMLCmt,
    23		stateRCDATA:      tSpecialTagEnd,
    24		stateAttr:        tAttr,
    25		stateURL:         tURL,
    26		stateJS:          tJS,
    27		stateJSDqStr:     tJSDelimited,
    28		stateJSSqStr:     tJSDelimited,
    29		stateJSRegexp:    tJSDelimited,
    30		stateJSBlockCmt:  tBlockCmt,
    31		stateJSLineCmt:   tLineCmt,
    32		stateCSS:         tCSS,
    33		stateCSSDqStr:    tCSSStr,
    34		stateCSSSqStr:    tCSSStr,
    35		stateCSSDqURL:    tCSSStr,
    36		stateCSSSqURL:    tCSSStr,
    37		stateCSSURL:      tCSSStr,
    38		stateCSSBlockCmt: tBlockCmt,
    39		stateCSSLineCmt:  tLineCmt,
    40		stateError:       tError,
    41	}
    42	
    43	var commentStart = []byte("<!--")
    44	var commentEnd = []byte("-->")
    45	
    46	// tText is the context transition function for the text state.
    47	func tText(c context, s []byte) (context, int) {
    48		k := 0
    49		for {
    50			i := k + bytes.IndexByte(s[k:], '<')
    51			if i < k || i+1 == len(s) {
    52				return c, len(s)
    53			} else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
    54				return context{state: stateHTMLCmt}, i + 4
    55			}
    56			i++
    57			end := false
    58			if s[i] == '/' {
    59				if i+1 == len(s) {
    60					return c, len(s)
    61				}
    62				end, i = true, i+1
    63			}
    64			j, e := eatTagName(s, i)
    65			if j != i {
    66				if end {
    67					e = elementNone
    68				}
    69				// We've found an HTML tag.
    70				return context{state: stateTag, element: e}, j
    71			}
    72			k = j
    73		}
    74		panic("unreachable")
    75	}
    76	
    77	var elementContentType = [...]state{
    78		elementNone:     stateText,
    79		elementScript:   stateJS,
    80		elementStyle:    stateCSS,
    81		elementTextarea: stateRCDATA,
    82		elementTitle:    stateRCDATA,
    83	}
    84	
    85	// tTag is the context transition function for the tag state.
    86	func tTag(c context, s []byte) (context, int) {
    87		// Find the attribute name.
    88		i := eatWhiteSpace(s, 0)
    89		if i == len(s) {
    90			return c, len(s)
    91		}
    92		if s[i] == '>' {
    93			return context{
    94				state:   elementContentType[c.element],
    95				element: c.element,
    96			}, i + 1
    97		}
    98		j, err := eatAttrName(s, i)
    99		if err != nil {
   100			return context{state: stateError, err: err}, len(s)
   101		}
   102		state, attr := stateTag, attrNone
   103		if i == j {
   104			return context{
   105				state: stateError,
   106				err:   errorf(ErrBadHTML, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
   107			}, len(s)
   108		}
   109		switch attrType(string(s[i:j])) {
   110		case contentTypeURL:
   111			attr = attrURL
   112		case contentTypeCSS:
   113			attr = attrStyle
   114		case contentTypeJS:
   115			attr = attrScript
   116		}
   117		if j == len(s) {
   118			state = stateAttrName
   119		} else {
   120			state = stateAfterName
   121		}
   122		return context{state: state, element: c.element, attr: attr}, j
   123	}
   124	
   125	// tAttrName is the context transition function for stateAttrName.
   126	func tAttrName(c context, s []byte) (context, int) {
   127		i, err := eatAttrName(s, 0)
   128		if err != nil {
   129			return context{state: stateError, err: err}, len(s)
   130		} else if i != len(s) {
   131			c.state = stateAfterName
   132		}
   133		return c, i
   134	}
   135	
   136	// tAfterName is the context transition function for stateAfterName.
   137	func tAfterName(c context, s []byte) (context, int) {
   138		// Look for the start of the value.
   139		i := eatWhiteSpace(s, 0)
   140		if i == len(s) {
   141			return c, len(s)
   142		} else if s[i] != '=' {
   143			// Occurs due to tag ending '>', and valueless attribute.
   144			c.state = stateTag
   145			return c, i
   146		}
   147		c.state = stateBeforeValue
   148		// Consume the "=".
   149		return c, i + 1
   150	}
   151	
   152	var attrStartStates = [...]state{
   153		attrNone:   stateAttr,
   154		attrScript: stateJS,
   155		attrStyle:  stateCSS,
   156		attrURL:    stateURL,
   157	}
   158	
   159	// tBeforeValue is the context transition function for stateBeforeValue.
   160	func tBeforeValue(c context, s []byte) (context, int) {
   161		i := eatWhiteSpace(s, 0)
   162		if i == len(s) {
   163			return c, len(s)
   164		}
   165		// Find the attribute delimiter.
   166		delim := delimSpaceOrTagEnd
   167		switch s[i] {
   168		case '\'':
   169			delim, i = delimSingleQuote, i+1
   170		case '"':
   171			delim, i = delimDoubleQuote, i+1
   172		}
   173		c.state, c.delim, c.attr = attrStartStates[c.attr], delim, attrNone
   174		return c, i
   175	}
   176	
   177	// tHTMLCmt is the context transition function for stateHTMLCmt.
   178	func tHTMLCmt(c context, s []byte) (context, int) {
   179		if i := bytes.Index(s, commentEnd); i != -1 {
   180			return context{}, i + 3
   181		}
   182		return c, len(s)
   183	}
   184	
   185	// specialTagEndMarkers maps element types to the character sequence that
   186	// case-insensitively signals the end of the special tag body.
   187	var specialTagEndMarkers = [...]string{
   188		elementScript:   "</script",
   189		elementStyle:    "</style",
   190		elementTextarea: "</textarea",
   191		elementTitle:    "</title",
   192	}
   193	
   194	// tSpecialTagEnd is the context transition function for raw text and RCDATA
   195	// element states.
   196	func tSpecialTagEnd(c context, s []byte) (context, int) {
   197		if c.element != elementNone {
   198			if i := strings.Index(strings.ToLower(string(s)), specialTagEndMarkers[c.element]); i != -1 {
   199				return context{}, i
   200			}
   201		}
   202		return c, len(s)
   203	}
   204	
   205	// tAttr is the context transition function for the attribute state.
   206	func tAttr(c context, s []byte) (context, int) {
   207		return c, len(s)
   208	}
   209	
   210	// tURL is the context transition function for the URL state.
   211	func tURL(c context, s []byte) (context, int) {
   212		if bytes.IndexAny(s, "#?") >= 0 {
   213			c.urlPart = urlPartQueryOrFrag
   214		} else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone {
   215			// HTML5 uses "Valid URL potentially surrounded by spaces" for
   216			// attrs: http://www.w3.org/TR/html5/index.html#attributes-1
   217			c.urlPart = urlPartPreQuery
   218		}
   219		return c, len(s)
   220	}
   221	
   222	// tJS is the context transition function for the JS state.
   223	func tJS(c context, s []byte) (context, int) {
   224		i := bytes.IndexAny(s, `"'/`)
   225		if i == -1 {
   226			// Entire input is non string, comment, regexp tokens.
   227			c.jsCtx = nextJSCtx(s, c.jsCtx)
   228			return c, len(s)
   229		}
   230		c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
   231		switch s[i] {
   232		case '"':
   233			c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
   234		case '\'':
   235			c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
   236		case '/':
   237			switch {
   238			case i+1 < len(s) && s[i+1] == '/':
   239				c.state, i = stateJSLineCmt, i+1
   240			case i+1 < len(s) && s[i+1] == '*':
   241				c.state, i = stateJSBlockCmt, i+1
   242			case c.jsCtx == jsCtxRegexp:
   243				c.state = stateJSRegexp
   244			case c.jsCtx == jsCtxDivOp:
   245				c.jsCtx = jsCtxRegexp
   246			default:
   247				return context{
   248					state: stateError,
   249					err:   errorf(ErrSlashAmbig, 0, "'/' could start a division or regexp: %.32q", s[i:]),
   250				}, len(s)
   251			}
   252		default:
   253			panic("unreachable")
   254		}
   255		return c, i + 1
   256	}
   257	
   258	// tJSDelimited is the context transition function for the JS string and regexp
   259	// states.
   260	func tJSDelimited(c context, s []byte) (context, int) {
   261		specials := `\"`
   262		switch c.state {
   263		case stateJSSqStr:
   264			specials = `\'`
   265		case stateJSRegexp:
   266			specials = `\/[]`
   267		}
   268	
   269		k, inCharset := 0, false
   270		for {
   271			i := k + bytes.IndexAny(s[k:], specials)
   272			if i < k {
   273				break
   274			}
   275			switch s[i] {
   276			case '\\':
   277				i++
   278				if i == len(s) {
   279					return context{
   280						state: stateError,
   281						err:   errorf(ErrPartialEscape, 0, "unfinished escape sequence in JS string: %q", s),
   282					}, len(s)
   283				}
   284			case '[':
   285				inCharset = true
   286			case ']':
   287				inCharset = false
   288			default:
   289				// end delimiter
   290				if !inCharset {
   291					c.state, c.jsCtx = stateJS, jsCtxDivOp
   292					return c, i + 1
   293				}
   294			}
   295			k = i + 1
   296		}
   297	
   298		if inCharset {
   299			// This can be fixed by making context richer if interpolation
   300			// into charsets is desired.
   301			return context{
   302				state: stateError,
   303				err:   errorf(ErrPartialCharset, 0, "unfinished JS regexp charset: %q", s),
   304			}, len(s)
   305		}
   306	
   307		return c, len(s)
   308	}
   309	
   310	var blockCommentEnd = []byte("*/")
   311	
   312	// tBlockCmt is the context transition function for /*comment*/ states.
   313	func tBlockCmt(c context, s []byte) (context, int) {
   314		i := bytes.Index(s, blockCommentEnd)
   315		if i == -1 {
   316			return c, len(s)
   317		}
   318		switch c.state {
   319		case stateJSBlockCmt:
   320			c.state = stateJS
   321		case stateCSSBlockCmt:
   322			c.state = stateCSS
   323		default:
   324			panic(c.state.String())
   325		}
   326		return c, i + 2
   327	}
   328	
   329	// tLineCmt is the context transition function for //comment states.
   330	func tLineCmt(c context, s []byte) (context, int) {
   331		var lineTerminators string
   332		var endState state
   333		switch c.state {
   334		case stateJSLineCmt:
   335			lineTerminators, endState = "\n\r\u2028\u2029", stateJS
   336		case stateCSSLineCmt:
   337			lineTerminators, endState = "\n\f\r", stateCSS
   338			// Line comments are not part of any published CSS standard but
   339			// are supported by the 4 major browsers.
   340			// This defines line comments as
   341			//     LINECOMMENT ::= "//" [^\n\f\d]*
   342			// since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
   343			// newlines:
   344			//     nl ::= #xA | #xD #xA | #xD | #xC
   345		default:
   346			panic(c.state.String())
   347		}
   348	
   349		i := bytes.IndexAny(s, lineTerminators)
   350		if i == -1 {
   351			return c, len(s)
   352		}
   353		c.state = endState
   354		// Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
   355		// "However, the LineTerminator at the end of the line is not
   356		// considered to be part of the single-line comment; it is
   357		// recognized separately by the lexical grammar and becomes part
   358		// of the stream of input elements for the syntactic grammar."
   359		return c, i
   360	}
   361	
   362	// tCSS is the context transition function for the CSS state.
   363	func tCSS(c context, s []byte) (context, int) {
   364		// CSS quoted strings are almost never used except for:
   365		// (1) URLs as in background: "/foo.png"
   366		// (2) Multiword font-names as in font-family: "Times New Roman"
   367		// (3) List separators in content values as in inline-lists:
   368		//    <style>
   369		//    ul.inlineList { list-style: none; padding:0 }
   370		//    ul.inlineList > li { display: inline }
   371		//    ul.inlineList > li:before { content: ", " }
   372		//    ul.inlineList > li:first-child:before { content: "" }
   373		//    </style>
   374		//    <ul class=inlineList><li>One<li>Two<li>Three</ul>
   375		// (4) Attribute value selectors as in a[href="http://example.com/"]
   376		//
   377		// We conservatively treat all strings as URLs, but make some
   378		// allowances to avoid confusion.
   379		//
   380		// In (1), our conservative assumption is justified.
   381		// In (2), valid font names do not contain ':', '?', or '#', so our
   382		// conservative assumption is fine since we will never transition past
   383		// urlPartPreQuery.
   384		// In (3), our protocol heuristic should not be tripped, and there
   385		// should not be non-space content after a '?' or '#', so as long as
   386		// we only %-encode RFC 3986 reserved characters we are ok.
   387		// In (4), we should URL escape for URL attributes, and for others we
   388		// have the attribute name available if our conservative assumption
   389		// proves problematic for real code.
   390	
   391		k := 0
   392		for {
   393			i := k + bytes.IndexAny(s[k:], `("'/`)
   394			if i < k {
   395				return c, len(s)
   396			}
   397			switch s[i] {
   398			case '(':
   399				// Look for url to the left.
   400				p := bytes.TrimRight(s[:i], "\t\n\f\r ")
   401				if endsWithCSSKeyword(p, "url") {
   402					j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r "))
   403					switch {
   404					case j != len(s) && s[j] == '"':
   405						c.state, j = stateCSSDqURL, j+1
   406					case j != len(s) && s[j] == '\'':
   407						c.state, j = stateCSSSqURL, j+1
   408					default:
   409						c.state = stateCSSURL
   410					}
   411					return c, j
   412				}
   413			case '/':
   414				if i+1 < len(s) {
   415					switch s[i+1] {
   416					case '/':
   417						c.state = stateCSSLineCmt
   418						return c, i + 2
   419					case '*':
   420						c.state = stateCSSBlockCmt
   421						return c, i + 2
   422					}
   423				}
   424			case '"':
   425				c.state = stateCSSDqStr
   426				return c, i + 1
   427			case '\'':
   428				c.state = stateCSSSqStr
   429				return c, i + 1
   430			}
   431			k = i + 1
   432		}
   433		panic("unreachable")
   434	}
   435	
   436	// tCSSStr is the context transition function for the CSS string and URL states.
   437	func tCSSStr(c context, s []byte) (context, int) {
   438		var endAndEsc string
   439		switch c.state {
   440		case stateCSSDqStr, stateCSSDqURL:
   441			endAndEsc = `\"`
   442		case stateCSSSqStr, stateCSSSqURL:
   443			endAndEsc = `\'`
   444		case stateCSSURL:
   445			// Unquoted URLs end with a newline or close parenthesis.
   446			// The below includes the wc (whitespace character) and nl.
   447			endAndEsc = "\\\t\n\f\r )"
   448		default:
   449			panic(c.state.String())
   450		}
   451	
   452		k := 0
   453		for {
   454			i := k + bytes.IndexAny(s[k:], endAndEsc)
   455			if i < k {
   456				c, nread := tURL(c, decodeCSS(s[k:]))
   457				return c, k + nread
   458			}
   459			if s[i] == '\\' {
   460				i++
   461				if i == len(s) {
   462					return context{
   463						state: stateError,
   464						err:   errorf(ErrPartialEscape, 0, "unfinished escape sequence in CSS string: %q", s),
   465					}, len(s)
   466				}
   467			} else {
   468				c.state = stateCSS
   469				return c, i + 1
   470			}
   471			c, _ = tURL(c, decodeCSS(s[:i+1]))
   472			k = i + 1
   473		}
   474		panic("unreachable")
   475	}
   476	
   477	// tError is the context transition function for the error state.
   478	func tError(c context, s []byte) (context, int) {
   479		return c, len(s)
   480	}
   481	
   482	// eatAttrName returns the largest j such that s[i:j] is an attribute name.
   483	// It returns an error if s[i:] does not look like it begins with an
   484	// attribute name, such as encountering a quote mark without a preceding
   485	// equals sign.
   486	func eatAttrName(s []byte, i int) (int, *Error) {
   487		for j := i; j < len(s); j++ {
   488			switch s[j] {
   489			case ' ', '\t', '\n', '\f', '\r', '=', '>':
   490				return j, nil
   491			case '\'', '"', '<':
   492				// These result in a parse warning in HTML5 and are
   493				// indicative of serious problems if seen in an attr
   494				// name in a template.
   495				return -1, errorf(ErrBadHTML, 0, "%q in attribute name: %.32q", s[j:j+1], s)
   496			default:
   497				// No-op.
   498			}
   499		}
   500		return len(s), nil
   501	}
   502	
   503	var elementNameMap = map[string]element{
   504		"script":   elementScript,
   505		"style":    elementStyle,
   506		"textarea": elementTextarea,
   507		"title":    elementTitle,
   508	}
   509	
   510	// asciiAlpha returns whether c is an ASCII letter.
   511	func asciiAlpha(c byte) bool {
   512		return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
   513	}
   514	
   515	// asciiAlphaNum returns whether c is an ASCII letter or digit.
   516	func asciiAlphaNum(c byte) bool {
   517		return asciiAlpha(c) || '0' <= c && c <= '9'
   518	}
   519	
   520	// eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
   521	func eatTagName(s []byte, i int) (int, element) {
   522		if i == len(s) || !asciiAlpha(s[i]) {
   523			return i, elementNone
   524		}
   525		j := i + 1
   526		for j < len(s) {
   527			x := s[j]
   528			if asciiAlphaNum(x) {
   529				j++
   530				continue
   531			}
   532			// Allow "x-y" or "x:y" but not "x-", "-y", or "x--y".
   533			if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
   534				j += 2
   535				continue
   536			}
   537			break
   538		}
   539		return j, elementNameMap[strings.ToLower(string(s[i:j]))]
   540	}
   541	
   542	// eatWhiteSpace returns the largest j such that s[i:j] is white space.
   543	func eatWhiteSpace(s []byte, i int) int {
   544		for j := i; j < len(s); j++ {
   545			switch s[j] {
   546			case ' ', '\t', '\n', '\f', '\r':
   547				// No-op.
   548			default:
   549				return j
   550			}
   551		}
   552		return len(s)
   553	}